Movatterモバイル変換


[0]ホーム

URL:


Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commitf6861ad

Browse files
committed
Type-1 subsetting
With this I can produce smaller pdf files with usetex in some smalltests, but this obviously needs more extensive testing, thus markingas draft.Give dviread.DviFont a fake filename attribute for character tracking.On top of#20715.Closes#127.
1 parente98bb83 commitf6861ad

File tree

3 files changed

+283
-10
lines changed

3 files changed

+283
-10
lines changed

‎lib/matplotlib/backends/backend_pdf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -981,6 +981,8 @@ def _embedTeXFont(self, fontinfo):
981981
t1font=type1font.Type1Font(fontinfo.fontfile)
982982
iffontinfo.effects:
983983
t1font=t1font.transform(fontinfo.effects)
984+
chars=self._character_tracker.used[fontinfo.dvifont.fname]
985+
t1font=t1font.subset(chars)
984986
fontdict['BaseFont']=Name(t1font.prop['FontName'])
985987

986988
# Font descriptors may be shared between differently encoded
@@ -2255,6 +2257,7 @@ def draw_tex(self, gc, x, y, s, prop, angle, *, mtext=None):
22552257
seq+= [['font',pdfname,dvifont.size]]
22562258
oldfont=dvifont
22572259
seq+= [['text',x1,y1, [bytes([glyph])],x1+width]]
2260+
self.file._character_tracker.track(dvifont,chr(glyph))
22582261

22592262
# Find consecutive text strings with constant y coordinate and
22602263
# combine into a sequence of strings and kerns, or just one

‎lib/matplotlib/dviread.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,9 @@ class DviFont:
546546
Attributes
547547
----------
548548
texname : bytes
549+
fname : str
550+
Compatibility shim so that DviFont can be used with
551+
``_backend_pdf_ps.CharacterTracker``; not a real filename.
549552
size : float
550553
Size of the font in Adobe points, converted from the slightly
551554
smaller TeX points.
@@ -570,6 +573,11 @@ def __init__(self, scale, tfm, texname, vf):
570573
self.widths= [(1000*tfm.width.get(char,0))>>20
571574
forcharinrange(nchars)]
572575

576+
@property
577+
deffname(self):
578+
"""A fake filename"""
579+
returnself.texname.decode('latin-1')
580+
573581
def__eq__(self,other):
574582
return (type(self)==type(other)
575583
andself.texname==other.texnameandself.size==other.size)

‎lib/matplotlib/type1font.py

Lines changed: 272 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,10 @@
2121
v1.1, 1993. ISBN 0-201-57044-0.
2222
"""
2323

24+
importbase64
2425
importbinascii
2526
importfunctools
27+
importitertools
2628
importlogging
2729
importre
2830
importstring
@@ -36,6 +38,35 @@
3638
_log=logging.getLogger(__name__)
3739

3840

41+
def_make_tag(set):
42+
"""
43+
Hash set into a six-character tag made of uppercase letters
44+
45+
Useful for adding a tag into subsetted fonts while keeping the code
46+
reproducible. The function always returns the same value for the
47+
same set on the same exact Python version but is not guaranteed to
48+
not have collisions.
49+
50+
Parameters
51+
----------
52+
set : iterable
53+
The set of glyphs present in a font subset
54+
55+
Returns
56+
-------
57+
str
58+
Six uppercase ASCII letters and a plus sign
59+
"""
60+
61+
# freeze the set to make it hashable, interpret the hash as bytes
62+
array=struct.pack("@q",hash(frozenset(set)))
63+
# turn the bytes into characters with b32encode, which uses uppercase
64+
# letters and numbers from 2 to 7 - remap those arbitrarily
65+
trans=str.maketrans('234567','MTPLIB','=')
66+
return (base64.b32encode(array).decode('ascii')
67+
.translate(trans)[:6]+'+')
68+
69+
3970
class_Token:
4071
"""
4172
A token in a PostScript stream
@@ -627,8 +658,7 @@ def _parse_subrs(self, tokens, _data):
627658

628659
returnarray,next(tokens).endpos()
629660

630-
@staticmethod
631-
def_parse_charstrings(tokens,_data):
661+
def_parse_charstrings(self,tokens,_data):
632662
count_token=next(tokens)
633663
ifnotcount_token.is_number():
634664
raiseRuntimeError(
@@ -650,7 +680,12 @@ def _parse_charstrings(tokens, _data):
650680
f"Token following /{glyphname} in CharStrings definition "
651681
f"must be a number, was{nbytes_token}"
652682
)
653-
next(tokens)# usually RD or |-
683+
token=next(tokens)
684+
ifnottoken.is_keyword(self._abbr['RD']):
685+
raiseRuntimeError(
686+
"Token preceding charstring must be {self._abbr['RD']}, "
687+
f"was{token}"
688+
)
654689
binary_token=tokens.send(1+nbytes_token.value())
655690
charstrings[glyphname]=binary_token.value()
656691

@@ -681,16 +716,15 @@ def _parse_encoding(tokens, _data):
681716
continue
682717
encoding[index_token.value()]=name_token.value()
683718

684-
@staticmethod
685-
def_parse_othersubrs(tokens,data):
719+
def_parse_othersubrs(self,tokens,data):
686720
init_pos=None
687721
whileTrue:
688722
token=next(tokens)
689723
ifinit_posisNone:
690724
init_pos=token.pos
691725
iftoken.is_delim():
692726
_expression(token,tokens,data)
693-
eliftoken.is_keyword('def','ND','|-'):
727+
eliftoken.is_keyword('def',self._abbr['ND']):
694728
returndata[init_pos:token.endpos()],token.endpos()
695729

696730
deftransform(self,effects):
@@ -745,7 +779,7 @@ def transform(self, effects):
745779
fontmatrix= (
746780
'[%s]'%' '.join(_format_approx(x,6)forxinarray)
747781
)
748-
replacements= (
782+
newparts=self._replace(
749783
[(x,'/FontName/%s def'%fontname)
750784
forxinself._pos['FontName']]
751785
+ [(x,'/ItalicAngle %a def'%italicangle)
@@ -755,11 +789,40 @@ def transform(self, effects):
755789
+ [(x,'')forxinself._pos.get('UniqueID', [])]
756790
)
757791

792+
returnType1Font((
793+
newparts[0],
794+
self._encrypt(newparts[1],'eexec'),
795+
self.parts[2]
796+
))
797+
798+
def_replace(self,replacements):
799+
"""
800+
Change the font according to `replacements`
801+
802+
Parameters
803+
----------
804+
replacements : list of ((int, int), str)
805+
Each element is ((pos0, pos1), replacement) where pos0 and
806+
pos1 are indices to the original font data (parts[0] and the
807+
decrypted part concatenated). The data in the interval
808+
pos0:pos1 will be replaced by the replacement text. To
809+
accommodate binary data, the replacement is taken to be in
810+
Latin-1 encoding.
811+
812+
The case where pos0 is inside parts[0] and pos1 inside
813+
the decrypted part is not supported.
814+
815+
Returns
816+
-------
817+
(bytes, bytes)
818+
The new parts[0] and decrypted part (which needs to be
819+
encrypted in the transformed font).
820+
"""
758821
data=bytearray(self.parts[0])
759822
data.extend(self.decrypted)
760823
len0=len(self.parts[0])
761824
for (pos0,pos1),valueinsorted(replacements,reverse=True):
762-
data[pos0:pos1]=value.encode('ascii','replace')
825+
data[pos0:pos1]=value.encode('latin-1')
763826
ifpos0<len(self.parts[0]):
764827
ifpos1>=len(self.parts[0]):
765828
raiseRuntimeError(
@@ -769,12 +832,211 @@ def transform(self, effects):
769832
len0+=len(value)-pos1+pos0
770833

771834
data=bytes(data)
835+
returndata[:len0],data[len0:]
836+
837+
defsubset(self,characters):
838+
"""
839+
Return a new font that only defines the given characters.
840+
841+
Parameters
842+
----------
843+
characters : sequence of bytes
844+
The subset of characters to include
845+
846+
Returns
847+
-------
848+
`Type1Font`
849+
"""
850+
851+
characters=set(characters)
852+
encoding= {code:glyph
853+
forcode,glyphinself.prop['Encoding'].items()
854+
ifcodeincharacters}
855+
encoding[0]='.notdef'
856+
# todo and done include strings (glyph names)
857+
todo=set(encoding.values())
858+
done=set()
859+
seen_subrs= {0,1,2,3}
860+
whiletodo-done:
861+
glyph=next(iter(todo-done))
862+
called_glyphs,called_subrs,_,_=self._simulate(glyph, [], [])
863+
todo.update(called_glyphs)
864+
seen_subrs.update(called_subrs)
865+
done.add(glyph)
866+
867+
fontname=_make_tag(todo)+self.prop['FontName']
868+
charstrings=self._subset_charstrings(todo)
869+
subrs=self._subset_subrs(seen_subrs)
870+
newparts=self._replace(
871+
[(x,'/FontName/%s def'%fontname)
872+
forxinself._pos['FontName']]
873+
+ [(self._pos['CharStrings'][0],charstrings),
874+
(self._pos['Subrs'][0],subrs),
875+
(self._pos['Encoding'][0],self._subset_encoding(encoding))
876+
]+ [(x,'')forxinself._pos.get('UniqueID', [])]
877+
)
772878
returnType1Font((
773-
data[:len0],
774-
self._encrypt(data[len0:],'eexec'),
879+
newparts[0],
880+
self._encrypt(newparts[1],'eexec'),
775881
self.parts[2]
776882
))
777883

884+
@staticmethod
885+
def_charstring_tokens(data):
886+
data=iter(data)
887+
forbyteindata:
888+
if32<=byte<=246:
889+
yieldbyte-139
890+
elif247<=byte<=250:
891+
byte2=next(data)
892+
yield (byte-247)*256+byte2+108
893+
elif251<=byte<=254:
894+
byte2=next(data)
895+
yield-(byte-251)*256-byte2-108
896+
elifbyte==255:
897+
bs=itertools.islice(data,4)
898+
yieldstruct.unpack('>i',bs)[0]
899+
elifbyte==12:
900+
byte1=next(data)
901+
yield {
902+
0:'dotsection',
903+
1:'vstem3',
904+
2:'hstem3',
905+
6:'seac',
906+
7:'sbw',
907+
12:'div',
908+
16:'callothersubr',
909+
17:'pop',
910+
33:'setcurrentpoint'
911+
}[byte1]
912+
else:
913+
yield {
914+
1:'hstem',
915+
3:'vstem',
916+
4:'vmoveto',
917+
5:'rlineto',
918+
6:'hlineto',
919+
7:'vlineto',
920+
8:'rrcurveto',
921+
9:'closepath',
922+
10:'callsubr',
923+
11:'return',
924+
13:'hsbw',
925+
14:'endchar',
926+
21:'rmoveto',
927+
22:'hmoveto',
928+
30:'vhcurveto',
929+
31:'hvcurveto'
930+
}[byte]
931+
932+
def_step(self,buildchar_stack,postscript_stack,opcode):
933+
ifisinstance(opcode,int):
934+
returnset(),set(),buildchar_stack+ [opcode],postscript_stack
935+
elifopcodein {
936+
'hsbw','sbw','closepath','hlineto','hmoveto','hcurveto',
937+
'hvcurveto','rlineto','rmoveto','rrcurveto','vhcurveto',
938+
'vlineto','vmoveto','dotsection','hstem','hstem3','vstem',
939+
'vstem3','setcurrentpoint'
940+
}:
941+
returnset(),set(), [],postscript_stack
942+
elifopcode=='seac':
943+
codes=buildchar_stack[3:5]
944+
glyphs= [self.prop['Encoding'][x]forxincodes]
945+
returnset(glyphs),set(), [],postscript_stack
946+
elifopcode=='div':
947+
num1,num2=buildchar_stack[-2:]
948+
return (
949+
set(),
950+
set(),
951+
buildchar_stack[-2:]+ [num1/num2],postscript_stack
952+
)
953+
elifopcode=='callothersubr':
954+
othersubr=buildchar_stack[-1]
955+
n=buildchar_stack[-2]
956+
args=buildchar_stack[-2-n:-2]
957+
ifothersubr==3:# Section 8.1 in Type-1 spec
958+
postscript_stack.append(args[0])
959+
else:
960+
postscript_stack.extend(args[::-1])
961+
returnset(),set(),buildchar_stack[:-n-2],postscript_stack
962+
elifopcode=='callsubr':
963+
subr=buildchar_stack[-1]
964+
glyphs,subrs,new_bc_stack,new_ps_stack= \
965+
self._simulate(subr,buildchar_stack[:-1],postscript_stack)
966+
returnset(),subrs| {subr},new_bc_stack,new_ps_stack
967+
elifopcode=='pop':
968+
return (
969+
set(),
970+
set(),
971+
buildchar_stack+ [postscript_stack[-1]],postscript_stack[:-1]
972+
)
973+
else:
974+
raiseRuntimeError(f'opcode{opcode}')
975+
976+
def_simulate(self,glyph_or_subr,buildchar_stack,postscript_stack):
977+
ifisinstance(glyph_or_subr,str):
978+
program=self.prop['CharStrings'][glyph_or_subr]
979+
glyphs= {glyph_or_subr}
980+
subrs=set()
981+
else:
982+
program=self.prop['Subrs'][glyph_or_subr]
983+
glyphs=set()
984+
subrs= {glyph_or_subr}
985+
foropcodeinself._charstring_tokens(program):
986+
ifopcodein ('return','endchar'):
987+
returnglyphs,subrs,buildchar_stack,postscript_stack
988+
newglyphs,newsubrs,buildchar_stack,postscript_stack= \
989+
self._step(buildchar_stack,postscript_stack,opcode)
990+
glyphs.update(newglyphs)
991+
subrs.update(newsubrs)
992+
993+
def_subset_encoding(self,encoding):
994+
result= [
995+
'/Encoding 256 array\n0 1 255 { 1 index exch /.notdef put } for'
996+
]
997+
result.extend(
998+
f'dup{i} /{glyph} put'
999+
fori,glyphinsorted(encoding.items())
1000+
ifglyph!='.notdef'
1001+
)
1002+
result.extend('readonly def\n')
1003+
return'\n'.join(result)
1004+
1005+
def_subset_charstrings(self,glyphs):
1006+
result= [f'/CharStrings{len(glyphs)} dict dup begin']
1007+
encrypted= [self._encrypt(self.prop['CharStrings'][glyph],
1008+
'charstring',
1009+
self.prop.get('lenIV',4)
1010+
).decode('latin-1')
1011+
forglyphinglyphs]
1012+
RD,ND=self._abbr['RD'],self._abbr['ND']
1013+
result.extend(
1014+
f'/{glyph}{len(enc)}{RD}{enc}{ND}'
1015+
forglyph,encinzip(glyphs,encrypted)
1016+
)
1017+
result.append('end\n')
1018+
return'\n'.join(result)
1019+
1020+
def_subset_subrs(self,indices):
1021+
# we can't remove subroutines, we just replace unused ones with a stub
1022+
n_subrs=len(self.prop['Subrs'])
1023+
result= [f'/Subrs{n_subrs} array']
1024+
lenIV=self.prop.get('lenIV',4)
1025+
stub=self._encrypt(b'\x0b','charstring',lenIV).decode('latin-1')
1026+
encrypted= [
1027+
self._encrypt(self.prop['Subrs'][i],'charstring',lenIV
1028+
).decode('latin-1')
1029+
ifiinindiceselsestub
1030+
foriinrange(n_subrs)
1031+
]
1032+
RD,ND,NP=self._abbr['RD'],self._abbr['ND'],self._abbr['NP']
1033+
result.extend(
1034+
f'dup{i}{len(enc)}{RD}{enc}{NP}'
1035+
fori,encinenumerate(encrypted)
1036+
)
1037+
result.extend((ND,''))
1038+
return'\n'.join(result)
1039+
7781040

7791041
_StandardEncoding= {
7801042
**{ord(letter):letterforletterinstring.ascii_letters},

0 commit comments

Comments
 (0)

[8]ページ先頭

©2009-2025 Movatter.jp