dots/config/inkscape/extensions/org.inkscape.extension.30306/scientific_inkscape/remove_kerning.py
2026-06-05 13:11:08 +02:00

659 lines
24 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
# coding=utf-8
#
# Copyright (c) 2023 David Burghoff <burghoff@utexas.edu>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
# for debugging parser
DEBUG_PARSER = True
DEBUG_PARSER = False
# for checking why elements aren't merging
DEBUG_MERGE = True
DEBUG_MERGE = False
NUM_SPACES = 1.0
# number of spaces beyond which text will be merged/split
XTOLEXT = 0.6
# x tolerance (number of spaces), let be big since there are
# kerning inaccuracies (as big as -0.56 in Whitney)
YTOLEXT = 0.1
# y tolerance (fraction of cap height), should be pretty small
XTOLMKN = 1.5
# left tolerance for manual kerning removal, used to be huge but is now tighter
# since differential kerning was made default for PDF
XTOLMKP = (
0.99
)
# right tolerance for manual kerning removal, should be fairly open-minded
YTOLMK = .01
XTOLSPLIT = 0.5
# tolerance for manual kerning splitting, should be fairly tight
SUBSUPER_THR = 0.99
# ensuring sub/superscripts are smaller helps reduce false merges
SUBSUPER_YTHR = 1 / 3
# superscripts must be at least 1/3 of the way above the baseline to merge
# (1/3 below cap for sub)
import inkex
import inkex.text.parser as tp
import os, sys, re
sys.path.append(
os.path.dirname(os.path.realpath(sys.argv[0]))
) # make sure my directory is on the path
import dhelpers as dh
def remove_kerning(
els,
removemanual,
mergesupersub,
splitdistant,
mergenearby,
justification=None,
debugparser=False,
):
tels = [el for el in els if isinstance(el, (inkex.TextElement, inkex.FlowRoot))]
if len(tels) > 0:
tels[0].croot.make_char_table(tels)
if DEBUG_PARSER or debugparser:
for el in tels:
el.parsed_text.make_highlights("char")
else:
# Do merges first (deciding based on original position)
tels = [el for el in els if isinstance(el, (inkex.TextElement,))]
ptl = tp.ParsedTextList(tels)
ptl.precalcs()
ptl.make_next_chain()
if removemanual:
for pt in ptl:
pt.differential_to_absolute_kerning()
pt.make_next_chain()
tels = Remove_Manual_Kerning(tels, mergesupersub)
if mergenearby or mergesupersub:
tels = External_Merges(tels, mergenearby, mergesupersub)
# # Then do splits (deciding based on current position, not original position,
# # since merges intentionally change position)
if splitdistant:
tels = Split_Distant_Chunks(tels)
if splitdistant:
tels = Split_Distant_Intrachunk(tels)
if splitdistant:
tels = Split_Lines(tels)
# # Final tweaks
tels = Change_Justification(tels, justification)
tels, removedspc = Remove_Trailing_Leading_Spaces(tels)
if removemanual or mergenearby or mergesupersub or removedspc:
tels = Fix_Merge_Positions(tels)
tels = Make_All_Editable(tels)
tels = Final_Cleanup(tels)
return dh.unique(els + tels)
def Final_Cleanup(els):
for el in els:
el.parsed_text.delete_empty()
return els
def Fix_Merge_Positions(els):
for el in els:
for line in el.parsed_text.lns:
for w in line.chks:
w.fix_merged_position()
return els
def Remove_Trailing_Leading_Spaces(els):
removed = False
for el in els:
if not (el.parsed_text.ismlinkscape) and not (
el.parsed_text.isflow
): # skip Inkscape-generated text
for line in el.parsed_text.lns:
mtxt = line.txt()
ii = len(mtxt) - 1
while ii >= 0 and mtxt[ii] == " ":
line.chrs[ii].delc()
ii -= 1
removed = True
mtxt = line.txt()
ii = 0
while ii < len(mtxt) and mtxt[ii] == " ":
line.chrs[0].delc()
ii += 1
removed = True
return els, removed
def Make_All_Editable(els):
for el in els:
el.parsed_text.make_editable()
return els
def Change_Justification(els, justification):
if justification is not None:
for ptxt in [el.parsed_text for el in els]:
if not (ptxt.ismlinkscape) and not (
ptxt.isflow
): # skip Inkscape-generated text
for line in ptxt.lns:
line.change_alignment(justification)
alignd = {"start": "start", "middle": "center", "end": "end"}
ptxt.textel.cstyle.__setitem__(
"text-anchor", justification, "text-align", alignd[justification]
)
return els
# Split different lines
def Split_Lines(els, ignoreinkscape=True):
ptxts = [el.parsed_text for el in els]
for jj in range(len(ptxts)):
ptxt = ptxts[jj]
if (
ptxt.lns is not None
and len(ptxt.lns) > 1
and (not (ptxt.ismlinkscape) or not (ignoreinkscape))
and not (ptxt.isflow)
):
for il in reversed(range(1, len(ptxt.lns))):
newtxt = ptxt.split_off_characters(ptxt.lns[il].chrs)
els.append(newtxt)
return els
# Generate splitting of distantly-kerned text
def Split_Distant_Chunks(els):
for ptxt in [el.parsed_text for el in els]:
if ptxt.lns is not None:
for il in reversed(range(len(ptxt.lns))):
line = ptxt.lns[il]
sws = [
x
for _, x in sorted(
zip([w.x for w in line.chks], line.chks),
key=lambda pair: pair[0],
)
] # chunks sorted in ascending x
splits = []
for ii in range(1, len(line.chks)):
w = sws[ii - 1]
w2 = sws[ii]
trl_spcs, ldg_spcs = trailing_leading(w.txt, w2.txt)
dx = w.spw * (NUM_SPACES - trl_spcs - ldg_spcs)
xtol = XTOLSPLIT * w.spw
tr1, br1, tl2, bl2 = w.get_ut_pts(w2, current_pts=True)
if bl2[0] > br1[0] + dx + xtol:
splits.append(ii)
line.splits = splits
line.sws = sws
if len(splits) > 0:
for ii in reversed(range(len(splits))):
sstart = splits[ii]
if ii != len(splits) - 1:
sstop = splits[ii + 1]
else:
sstop = len(line.chks)
newtxt = ptxt.split_off_chunks(sws[sstart:sstop])
els.append(newtxt)
return els
# Generate splitting of distantly-kerned text
def Split_Distant_Intrachunk(els):
for ptxt in [el.parsed_text for el in els]:
if ptxt.lns is not None and not (ptxt.ismlinkscape) and not (ptxt.isflow):
for line in ptxt.lns:
for w in line.chks:
if len(w.chrs) > 0:
chrs = sorted(w.chrs, key=lambda chr: chr.pts_ut[0][0])
lastnspc = None
splitiis = []
prevsplit = 0
if chrs[0].c not in [" ", "\u00a0"]:
lastnspc = chrs[0]
for ii in range(1, len(chrs)):
if lastnspc is not None:
c = lastnspc
c2 = chrs[ii]
bl2 = c2.pts_ut[0]
br1 = c.pts_ut[3]
dx = w.spw * (NUM_SPACES)
xtol = XTOLSPLIT * w.spw
# If this character is splitting two numbers,
# should always split in case they are ticks
import re
remainingnumeric = False
numbersplits = [" ", "-", ""]
# chars that may separate numbers
splrest = re.split("|".join(numbersplits), w.txt[ii:])
splrest = [v for v in splrest if v != ""]
if len(splrest) > 0:
remainingnumeric = isnumeric(splrest[0])
numbersplit = (
isnumeric(w.txt[prevsplit:ii])
and (c2.c in numbersplits and remainingnumeric)
and c.loc.elem == c2.loc.elem
)
if bl2[0] > br1[0] + dx + xtol or numbersplit:
splitiis.append(ii)
prevsplit = ii
if chrs[ii].c not in [" ", "\u00a0"]:
lastnspc = chrs[ii]
if len(splitiis) > 0:
for ii in reversed(range(len(splitiis))):
sstart = splitiis[ii]
if ii != len(splitiis) - 1:
sstop = splitiis[ii + 1]
else:
sstop = len(chrs)
split_chrs = [chr for chr in w.chrs if chr in chrs[sstart:sstop]]
newtxt = ptxt.split_off_characters(split_chrs)
els.append(newtxt)
return els
def Remove_Manual_Kerning(els, mergesupersub):
# Generate list of merges
chks = []
ptxts = [el.parsed_text for el in els]
for ptxt in ptxts:
if ptxt.lns is not None:
chks += [w for line in ptxt.lns for w in line.chks]
for w in chks:
mw = []
w2 = w.nextw
if w2 is not None and w2 in chks and not (twospaces(w.txt, w2.txt)):
trl_spcs, ldg_spcs = trailing_leading(w.txt, w2.txt)
dx = w.spw * (NUM_SPACES - trl_spcs - ldg_spcs)
xtoln = XTOLMKN * w.spw
xtolp = XTOLMKP * w.spw
ytol = YTOLMK * w.mch
try:
tr1, br1, tl2, bl2 = w.get_ut_pts(w2)
except ZeroDivisionError:
w.mw = mw
continue
if isnumeric(w.txt) and isnumeric(w2.txt, True):
dx = w.spw * 0
previoussp = w.txt == " " and w.prevw is not None
validmerge = br1[0] - xtoln <= bl2[0] <= br1[0] + dx + xtolp
validmerge = validmerge and br1[1] - ytol <= bl2[1] <= br1[1] + ytol
if previoussp and not validmerge:
# reconsider in case previous space was weirdly-kerned
tr1p, br1p, tl2p, bl2p = w.prevw.get_ut_pts(w2)
dx = w.spw * (NUM_SPACES - trl_spcs - ldg_spcs + 1)
validmerge = br1p[0] - xtoln <= bl2p[0] <= br1p[0] + dx + xtolp
if validmerge:
mw.append([w2, "same", br1, bl2])
w.mw = mw
Perform_Merges(chks, mk=True)
# Following manual kerning removal, lines with multiple chunks
# need to be split out into new text els
newptxts = []
for ptxt in ptxts:
for line in ptxt.lns:
while len(line.chks) > 1:
newtxt = ptxt.split_off_chunks([line.chks[-1]])
els.append(newtxt)
newptxts.append(newtxt.parsed_text)
return els
import numpy as np
def External_Merges(els, mergenearby, mergesupersub):
# Generate list of merges
chks = []
for ptxt in [el.parsed_text for el in els]:
if ptxt.lns is not None:
chks += [w for line in ptxt.lns for w in line.chks]
pbbs = [None]*len(chks)
for ii, w in enumerate(chks):
cx = [v[0] for c in w.chrs for v in c.parsed_pts_t]
cy = [v[1] for c in w.chrs for v in c.parsed_pts_t]
pbbs[ii] = tp.bbox([min(cx),min(cy),max(cx)-min(cx),max(cy)-min(cy)]);
for ii, w in enumerate(chks):
dx = (
w.spw * w.scf * (NUM_SPACES + XTOLEXT)
) # a big bounding box that includes the extra space
w.bb_big = tp.bbox(
[
pbbs[ii].x1 - dx,
pbbs[ii].y1 - dx,
pbbs[ii].w + 2 * dx,
pbbs[ii].h + 2 * dx,
]
)
w.mw = []
# Vectorized angle / bbox calculations
angles = np.array([[w.angle for w in chks]])
sameangle = abs(angles - angles.T) < 0.001
bb1s = [w.bb_big for w in chks]
bb2s = pbbs
intersects = dh.bb_intersects(bb1s, bb2s)
# reshape(-1,1) is a transpose
potentials = np.logical_and(sameangle, intersects)
potentials = np.logical_and(
potentials, np.identity(len(chks)) == 0
) # off-diagonal only
goodl = np.argwhere(potentials)
for ii in range(goodl.shape[0]):
w = chks[goodl[ii, 0]]
w2 = chks[goodl[ii, 1]]
trl_spcs, ldg_spcs = trailing_leading(w.txt, w2.txt)
dx = w.spw * (NUM_SPACES - trl_spcs - ldg_spcs)
xtol = XTOLEXT * w.spw
ytol = YTOLEXT * w.mch
# calculate 2's coords in 1's system
tr1, br1, tl2, bl2 = w.get_ut_pts(w2)
xpenmatch = br1[0] - xtol <= bl2[0] <= br1[0] + dx + xtol
neitherempty = len(wstrip(w.txt)) > 0 and len(wstrip(w2.txt)) > 0
if xpenmatch and neitherempty and not twospaces(w.txt, w2.txt):
weight_match = w.chrs[-1].tsty['font-weight'] == w2.chrs[0].tsty['font-weight']
# Don't sub/super merge when differences in font-weight
# Helps prevent accidental merges of subfigure label to tick
letterinpar = bool(re.fullmatch(r"^\([a-zA-Z]\)$", w.txt))
# Don't sub/super merge when is letter enclosed in parentheses
# Helps prevent accidental merges of subfigure label to tick
mtype = None
if (
abs(bl2[1] - br1[1]) < ytol
and abs(w.tfs - w2.tfs) < 0.001
and mergenearby
):
if isnumeric(w.line.txt()) and isnumeric(w2.line.txt(), True):
numsp = (bl2[0] - br1[0]) / (w.spw)
if abs(numsp) < 0.25:
# only merge numbers if very close (could be x ticks)
mtype = "same"
else:
mtype = "same"
elif (
br1[1] + ytol >= bl2[1] >= tr1[1] - ytol and mergesupersub and weight_match and not letterinpar
): # above baseline
aboveline = (
br1[1] * (1 - SUBSUPER_YTHR) + tr1[1] * SUBSUPER_YTHR + ytol
>= bl2[1]
)
if w2.tfs < w.tfs * SUBSUPER_THR: # new smaller, expect super
if aboveline:
mtype = "super"
elif w.tfs < w2.tfs * SUBSUPER_THR: # old smaller, expect reutrn
mtype = "subreturn"
elif SUBSUPER_THR == 1:
if aboveline:
if len(w2.line.txt()) > 2: # long text, probably not super
mtype = "subreturn"
else:
mtype = "superorsubreturn"
# could be either, decide later
else:
mtype = "subreturn"
elif br1[1] + ytol >= tl2[1] >= tr1[1] - ytol and mergesupersub and weight_match and not letterinpar:
belowline = (
tl2[1]
>= br1[1] * SUBSUPER_YTHR + tr1[1] * (1 - SUBSUPER_YTHR) - ytol
)
if w2.tfs < w.tfs * SUBSUPER_THR: # new smaller, expect sub
if belowline:
mtype = "sub"
elif w.tfs < w2.tfs * SUBSUPER_THR: # old smaller, expect superreturn
mtype = "superreturn"
elif SUBSUPER_THR == 1:
if belowline:
if len(w2.line.txt()) > 2: # long text, probably not sub
mtype = "superreturn"
else:
mtype = "suborsuperreturn"
# could be either, decide later
else:
mtype = "superreturn"
if mtype is not None:
w.mw.append([w2, mtype, br1, bl2])
# dh.debug(w.txt+' to '+w2.txt+' as '+mtype)
if DEBUG_MERGE:
dh.idebug('\nMerging "' + w.txt + '" and "' + w2.txt + '"')
if not (xpenmatch):
dh.idebug("Aborted, x pen too far: " + str([br1[0], bl2[0], dx]))
elif not (neitherempty):
dh.idebug("Aborted, one empty")
else:
if mtype is None:
if not (abs(bl2[1] - br1[1]) < ytol):
dh.idebug("Aborted, y pen too far: " + str([bl2[1], br1[1]]))
elif not (abs(w.tfs - w2.tfs) < 0.001):
dh.idebug(
"Aborted, fonts too different: " + str([w.tfs, w2.tfs])
)
elif not (
not (isnumeric(w.line.txt())) or not (isnumeric(w2.line.txt()))
):
dh.idebug("Aborted, both numbers")
else:
dh.idebug("Merged as " + mtype)
Perform_Merges(chks)
return els
def Perform_Merges(chks, mk=False):
for w in chks:
mw = w.mw
minx = float("inf")
for ii in range(len(mw)):
w2 = mw[ii][0]
mtype = mw[ii][1]
br1 = mw[ii][2]
bl2 = mw[ii][3]
if abs(bl2[0] - br1[0]) < minx:
minx = abs(bl2[0] - br1[0])
# starting pen best matches the stop of the previous one
mi = ii
w.merges = []
w.mergetypes = []
w.merged = False
if len(mw) > 0:
w2 = mw[mi][0]
mtype = mw[mi][1]
br1 = mw[mi][2]
bl2 = mw[mi][3]
w.merges = [w2]
w.mergetypes = [mtype]
# Generate chains of merges
for w in chks:
# if w.txt=='T':
if not (w.merged) and len(w.merges) > 0:
w.merges[-1].merged = True
nextmerge = w.merges[-1].merges
nextmerget = w.merges[-1].mergetypes
while len(nextmerge) > 0:
w.merges += nextmerge
w.mergetypes += nextmerget
w.merges[-1].merged = True
nextmerge = w.merges[-1].merges
nextmerget = w.merges[-1].mergetypes
# Create a merge plan
for w in chks:
if len(w.merges) > 0:
ctype = "normal"
w.wtypes = [ctype]
bail = False
for mt in w.mergetypes:
if ctype == "normal":
if mt == "same":
pass
elif mt == "sub":
ctype = "sub"
elif mt == "super":
ctype = "super"
elif mt == "suborsuperreturn":
ctype = "sub"
elif mt == "superorsubreturn":
ctype = "super"
elif all(
[t == "normal" for t in w.wtypes]
): # maybe started on sub/super
bail = True
else:
bail = True
elif ctype == "super":
if mt == "same":
pass
elif mt == "superreturn":
ctype = "normal"
elif mt == "suborsuperreturn":
ctype = "normal"
else:
bail = True
elif ctype == "sub":
if mt == "same":
pass
elif mt == "subreturn":
ctype = "normal"
elif mt == "superorsubreturn":
ctype = "normal"
else:
bail = True
w.wtypes.append(ctype)
if bail == True:
w.wtypes = []
w.merges = []
# Pre-merge position calculation
# Execute the merge plan
for w in chks:
if len(w.merges) > 0 and not (w.merged):
maxii = len(w.merges)
alltxt = "".join([w.txt] + [w2.txt for w2 in w.merges])
hasspaces = " " in alltxt
mels = []
for ii in range(maxii):
maxspaces = None
if mk and hasspaces and w.merges[ii].prevsametspan:
maxspaces = 0
if (
w.txt is not None and len(w.txt) > 0 and w.txt[-1] == " "
) or w.wtypes[ii + 1] in [
"super",
"sub",
]: # no extra spaces for sub/supers or if there's already one
maxspaces = 0
mels.append(w.merges[ii].line.ptxt.textel)
w.append_chk(w.merges[ii], w.wtypes[ii + 1], maxspaces)
# Union clips if necessary
mels = dh.unique([w.line.ptxt.textel] + mels)
if len(mels) > 1:
clips = [el.get_link("clip-path") for el in mels]
if any([c is None for c in clips]):
w.line.ptxt.textel.set("clip-path", None)
else:
# Duplicate main clip
dc = clips[0].duplicate()
wt = mels[0].ccomposed_transform
for ii in range(1, len(mels)):
# Duplicate merged clip, group contents, move to main dupe
dc2 = clips[ii].duplicate()
ng = dh.group(list(dc2))
dc.append(ng)
ng.ctransform = (-wt) @ mels[ii].ccomposed_transform
dc2.delete()
mels[0].set("clip-path", dc.get_id(2))
# Check if text represents a number
ncs = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ".", "e", "E", "-", "", ","]
def isnumeric(s, countminus=False):
s = (
s.strip().replace("", "-").replace(",", "")
) # strip whitespaces, replace minus signs with -, remove commas
if countminus and s == "-": # count a minus sign as a number
return True
try:
float(s)
return True
except ValueError:
return False
# Strip whitespaces
def wstrip(txt):
return txt.translate({ord(c): None for c in " \n\t\r"})
def twospaces(w1txt, w2txt):
if (
(w1txt is not None and len(w1txt) > 1 and w1txt[-2:] == " ")
or (
w1txt is not None
and len(w1txt) > 0
and w1txt[-1:] == " "
and w2txt is not None
and len(w2txt) > 0
and w2txt[0] == " "
)
or (w2txt is not None and len(w2txt) > 1 and w2txt[:1] == " ")
):
return True # resultant chunk has two spaces
return False
def trailing_leading(wtxt, w2txt):
trl_spcs = sum([all([c == " " for c in wtxt[ii:]]) for ii in range(len(wtxt))])
ldg_spcs = sum(
[all([c == " " for c in w2txt[: ii + 1]]) for ii in range(len(w2txt))]
)
return trl_spcs, ldg_spcs