NotificationsYou must be signed in to change notification settings
Fork7.9k
Star21.3k

Commitf6861ad

committed

Type-1 subsetting

With this I can produce smaller pdf files with usetex in some smalltests, but this obviously needs more extensive testing, thus markingas draft.Give dviread.DviFont a fake filename attribute for character tracking.On top of#20715.Closes#127.

1 parente98bb83 commitf6861adCopy full SHA for f6861ad

File tree

3 files changed

+283

-10

lines changed

lib/matplotlib
- backends
  - backend_pdf.py
- dviread.py
- type1font.py

3 files changed

+283

-10

lines changed

`‎lib/matplotlib/backends/backend_pdf.py`

Lines changed: 3 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -981,6 +981,8 @@ def _embedTeXFont(self, fontinfo):`
`981`	`981`	`t1font=type1font.Type1Font(fontinfo.fontfile)`
`982`	`982`	`iffontinfo.effects:`
`983`	`983`	`t1font=t1font.transform(fontinfo.effects)`
	`984`	`+chars=self._character_tracker.used[fontinfo.dvifont.fname]`
	`985`	`+t1font=t1font.subset(chars)`
`984`	`986`	`fontdict['BaseFont']=Name(t1font.prop['FontName'])`
`985`	`987`
`986`	`988`	`# Font descriptors may be shared between differently encoded`
`@@ -2255,6 +2257,7 @@ def draw_tex(self, gc, x, y, s, prop, angle, *, mtext=None):`
`2255`	`2257`	`seq+= [['font',pdfname,dvifont.size]]`
`2256`	`2258`	`oldfont=dvifont`
`2257`	`2259`	`seq+= [['text',x1,y1, [bytes([glyph])],x1+width]]`
	`2260`	`+self.file._character_tracker.track(dvifont,chr(glyph))`
`2258`	`2261`
`2259`	`2262`	`# Find consecutive text strings with constant y coordinate and`
`2260`	`2263`	`# combine into a sequence of strings and kerns, or just one`

`‎lib/matplotlib/dviread.py`

Lines changed: 8 additions & 0 deletions

Original file line number	Diff line number	Diff line change
`@@ -546,6 +546,9 @@ class DviFont:`
`546`	`546`	`Attributes`
`547`	`547`	`----------`
`548`	`548`	`texname : bytes`
	`549`	`+ fname : str`
	`550`	`+ Compatibility shim so that DviFont can be used with`
	`551`	+ ``_backend_pdf_ps.CharacterTracker``; not a real filename.
`549`	`552`	`size : float`
`550`	`553`	`Size of the font in Adobe points, converted from the slightly`
`551`	`554`	`smaller TeX points.`
`@@ -570,6 +573,11 @@ def __init__(self, scale, tfm, texname, vf):`
`570`	`573`	`self.widths= [(1000*tfm.width.get(char,0))>>20`
`571`	`574`	`forcharinrange(nchars)]`
`572`	`575`
	`576`	`+@property`
	`577`	`+deffname(self):`
	`578`	`+"""A fake filename"""`
	`579`	`+returnself.texname.decode('latin-1')`
	`580`	`+`
`573`	`581`	`def__eq__(self,other):`
`574`	`582`	`return (type(self)==type(other)`
`575`	`583`	`andself.texname==other.texnameandself.size==other.size)`

`‎lib/matplotlib/type1font.py`

Lines changed: 272 additions & 10 deletions

Original file line number	Diff line number	Diff line change
`@@ -21,8 +21,10 @@`
`21`	`21`	`v1.1, 1993. ISBN 0-201-57044-0.`
`22`	`22`	`"""`
`23`	`23`
	`24`	`+importbase64`
`24`	`25`	`importbinascii`
`25`	`26`	`importfunctools`
	`27`	`+importitertools`
`26`	`28`	`importlogging`
`27`	`29`	`importre`
`28`	`30`	`importstring`
`@@ -36,6 +38,35 @@`
`36`	`38`	`_log=logging.getLogger(__name__)`
`37`	`39`
`38`	`40`
	`41`	`+def_make_tag(set):`
	`42`	`+"""`
	`43`	`+ Hash set into a six-character tag made of uppercase letters`
	`44`	`+`
	`45`	`+ Useful for adding a tag into subsetted fonts while keeping the code`
	`46`	`+ reproducible. The function always returns the same value for the`
	`47`	`+ same set on the same exact Python version but is not guaranteed to`
	`48`	`+ not have collisions.`
	`49`	`+`
	`50`	`+ Parameters`
	`51`	`+ ----------`
	`52`	`+ set : iterable`
	`53`	`+ The set of glyphs present in a font subset`
	`54`	`+`
	`55`	`+ Returns`
	`56`	`+ -------`
	`57`	`+ str`
	`58`	`+ Six uppercase ASCII letters and a plus sign`
	`59`	`+ """`
	`60`	`+`
	`61`	`+# freeze the set to make it hashable, interpret the hash as bytes`
	`62`	`+array=struct.pack("@q",hash(frozenset(set)))`
	`63`	`+# turn the bytes into characters with b32encode, which uses uppercase`
	`64`	`+# letters and numbers from 2 to 7 - remap those arbitrarily`
	`65`	`+trans=str.maketrans('234567','MTPLIB','=')`
	`66`	`+return (base64.b32encode(array).decode('ascii')`
	`67`	`+ .translate(trans)[:6]+'+')`
	`68`	`+`
	`69`	`+`
`39`	`70`	`class_Token:`
`40`	`71`	`"""`
`41`	`72`	`A token in a PostScript stream`
`@@ -627,8 +658,7 @@ def _parse_subrs(self, tokens, _data):`
`627`	`658`
`628`	`659`	`returnarray,next(tokens).endpos()`
`629`	`660`
`630`		`-@staticmethod`
`631`		`-def_parse_charstrings(tokens,_data):`
	`661`	`+def_parse_charstrings(self,tokens,_data):`
`632`	`662`	`count_token=next(tokens)`
`633`	`663`	`ifnotcount_token.is_number():`
`634`	`664`	`raiseRuntimeError(`
`@@ -650,7 +680,12 @@ def _parse_charstrings(tokens, _data):`
`650`	`680`	`f"Token following /{glyphname} in CharStrings definition "`
`651`	`681`	`f"must be a number, was{nbytes_token}"`
`652`	`682`	`)`
`653`		`-next(tokens)# usually RD or \|-`
	`683`	`+token=next(tokens)`
	`684`	`+ifnottoken.is_keyword(self._abbr['RD']):`
	`685`	`+raiseRuntimeError(`
	`686`	`+"Token preceding charstring must be {self._abbr['RD']}, "`
	`687`	`+f"was{token}"`
	`688`	`+ )`
`654`	`689`	`binary_token=tokens.send(1+nbytes_token.value())`
`655`	`690`	`charstrings[glyphname]=binary_token.value()`
`656`	`691`
`@@ -681,16 +716,15 @@ def _parse_encoding(tokens, _data):`
`681`	`716`	`continue`
`682`	`717`	`encoding[index_token.value()]=name_token.value()`
`683`	`718`
`684`		`-@staticmethod`
`685`		`-def_parse_othersubrs(tokens,data):`
	`719`	`+def_parse_othersubrs(self,tokens,data):`
`686`	`720`	`init_pos=None`
`687`	`721`	`whileTrue:`
`688`	`722`	`token=next(tokens)`
`689`	`723`	`ifinit_posisNone:`
`690`	`724`	`init_pos=token.pos`
`691`	`725`	`iftoken.is_delim():`
`692`	`726`	`_expression(token,tokens,data)`
`693`		`-eliftoken.is_keyword('def','ND','\|-'):`
	`727`	`+eliftoken.is_keyword('def',self._abbr['ND']):`
`694`	`728`	`returndata[init_pos:token.endpos()],token.endpos()`
`695`	`729`
`696`	`730`	`deftransform(self,effects):`
`@@ -745,7 +779,7 @@ def transform(self, effects):`
`745`	`779`	`fontmatrix= (`
`746`	`780`	`'[%s]'%' '.join(_format_approx(x,6)forxinarray)`
`747`	`781`	`)`
`748`		`-replacements= (`
	`782`	`+newparts=self._replace(`
`749`	`783`	`[(x,'/FontName/%s def'%fontname)`
`750`	`784`	`forxinself._pos['FontName']]`
`751`	`785`	`+ [(x,'/ItalicAngle %a def'%italicangle)`
`@@ -755,11 +789,40 @@ def transform(self, effects):`
`755`	`789`	`+ [(x,'')forxinself._pos.get('UniqueID', [])]`
`756`	`790`	`)`
`757`	`791`
	`792`	`+returnType1Font((`
	`793`	`+newparts[0],`
	`794`	`+self._encrypt(newparts[1],'eexec'),`
	`795`	`+self.parts[2]`
	`796`	`+ ))`
	`797`	`+`
	`798`	`+def_replace(self,replacements):`
	`799`	`+"""`
	`800`	+ Change the font according to `replacements`
	`801`	`+`
	`802`	`+ Parameters`
	`803`	`+ ----------`
	`804`	`+ replacements : list of ((int, int), str)`
	`805`	`+ Each element is ((pos0, pos1), replacement) where pos0 and`
	`806`	`+ pos1 are indices to the original font data (parts[0] and the`
	`807`	`+ decrypted part concatenated). The data in the interval`
	`808`	`+ pos0:pos1 will be replaced by the replacement text. To`
	`809`	`+ accommodate binary data, the replacement is taken to be in`
	`810`	`+ Latin-1 encoding.`
	`811`	`+`
	`812`	`+ The case where pos0 is inside parts[0] and pos1 inside`
	`813`	`+ the decrypted part is not supported.`
	`814`	`+`
	`815`	`+ Returns`
	`816`	`+ -------`
	`817`	`+ (bytes, bytes)`
	`818`	`+ The new parts[0] and decrypted part (which needs to be`
	`819`	`+ encrypted in the transformed font).`
	`820`	`+ """`
`758`	`821`	`data=bytearray(self.parts[0])`
`759`	`822`	`data.extend(self.decrypted)`
`760`	`823`	`len0=len(self.parts[0])`
`761`	`824`	`for (pos0,pos1),valueinsorted(replacements,reverse=True):`
`762`		`-data[pos0:pos1]=value.encode('ascii','replace')`
	`825`	`+data[pos0:pos1]=value.encode('latin-1')`
`763`	`826`	`ifpos0<len(self.parts[0]):`
`764`	`827`	`ifpos1>=len(self.parts[0]):`
`765`	`828`	`raiseRuntimeError(`
`@@ -769,12 +832,211 @@ def transform(self, effects):`
`769`	`832`	`len0+=len(value)-pos1+pos0`
`770`	`833`
`771`	`834`	`data=bytes(data)`
	`835`	`+returndata[:len0],data[len0:]`
	`836`	`+`
	`837`	`+defsubset(self,characters):`
	`838`	`+"""`
	`839`	`+ Return a new font that only defines the given characters.`
	`840`	`+`
	`841`	`+ Parameters`
	`842`	`+ ----------`
	`843`	`+ characters : sequence of bytes`
	`844`	`+ The subset of characters to include`
	`845`	`+`
	`846`	`+ Returns`
	`847`	`+ -------`
	`848`	+ `Type1Font`
	`849`	`+ """`
	`850`	`+`
	`851`	`+characters=set(characters)`
	`852`	`+encoding= {code:glyph`
	`853`	`+forcode,glyphinself.prop['Encoding'].items()`
	`854`	`+ifcodeincharacters}`
	`855`	`+encoding[0]='.notdef'`
	`856`	`+# todo and done include strings (glyph names)`
	`857`	`+todo=set(encoding.values())`
	`858`	`+done=set()`
	`859`	`+seen_subrs= {0,1,2,3}`
	`860`	`+whiletodo-done:`
	`861`	`+glyph=next(iter(todo-done))`
	`862`	`+called_glyphs,called_subrs,_,_=self._simulate(glyph, [], [])`
	`863`	`+todo.update(called_glyphs)`
	`864`	`+seen_subrs.update(called_subrs)`
	`865`	`+done.add(glyph)`
	`866`	`+`
	`867`	`+fontname=_make_tag(todo)+self.prop['FontName']`
	`868`	`+charstrings=self._subset_charstrings(todo)`
	`869`	`+subrs=self._subset_subrs(seen_subrs)`
	`870`	`+newparts=self._replace(`
	`871`	`+ [(x,'/FontName/%s def'%fontname)`
	`872`	`+forxinself._pos['FontName']]`
	`873`	`++ [(self._pos['CharStrings'][0],charstrings),`
	`874`	`+ (self._pos['Subrs'][0],subrs),`
	`875`	`+ (self._pos['Encoding'][0],self._subset_encoding(encoding))`
	`876`	`+ ]+ [(x,'')forxinself._pos.get('UniqueID', [])]`
	`877`	`+ )`
`772`	`878`	`returnType1Font((`
`773`		`-data[:len0],`
`774`		`-self._encrypt(data[len0:],'eexec'),`
	`879`	`+newparts[0],`
	`880`	`+self._encrypt(newparts[1],'eexec'),`
`775`	`881`	`self.parts[2]`
`776`	`882`	`))`
`777`	`883`
	`884`	`+@staticmethod`
	`885`	`+def_charstring_tokens(data):`
	`886`	`+data=iter(data)`
	`887`	`+forbyteindata:`
	`888`	`+if32<=byte<=246:`
	`889`	`+yieldbyte-139`
	`890`	`+elif247<=byte<=250:`
	`891`	`+byte2=next(data)`
	`892`	`+yield (byte-247)*256+byte2+108`
	`893`	`+elif251<=byte<=254:`
	`894`	`+byte2=next(data)`
	`895`	`+yield-(byte-251)*256-byte2-108`
	`896`	`+elifbyte==255:`
	`897`	`+bs=itertools.islice(data,4)`
	`898`	`+yieldstruct.unpack('>i',bs)[0]`
	`899`	`+elifbyte==12:`
	`900`	`+byte1=next(data)`
	`901`	`+yield {`
	`902`	`+0:'dotsection',`
	`903`	`+1:'vstem3',`
	`904`	`+2:'hstem3',`
	`905`	`+6:'seac',`
	`906`	`+7:'sbw',`
	`907`	`+12:'div',`
	`908`	`+16:'callothersubr',`
	`909`	`+17:'pop',`
	`910`	`+33:'setcurrentpoint'`
	`911`	`+ }[byte1]`
	`912`	`+else:`
	`913`	`+yield {`
	`914`	`+1:'hstem',`
	`915`	`+3:'vstem',`
	`916`	`+4:'vmoveto',`
	`917`	`+5:'rlineto',`
	`918`	`+6:'hlineto',`
	`919`	`+7:'vlineto',`
	`920`	`+8:'rrcurveto',`
	`921`	`+9:'closepath',`
	`922`	`+10:'callsubr',`
	`923`	`+11:'return',`
	`924`	`+13:'hsbw',`
	`925`	`+14:'endchar',`
	`926`	`+21:'rmoveto',`
	`927`	`+22:'hmoveto',`
	`928`	`+30:'vhcurveto',`
	`929`	`+31:'hvcurveto'`
	`930`	`+ }[byte]`
	`931`	`+`
	`932`	`+def_step(self,buildchar_stack,postscript_stack,opcode):`
	`933`	`+ifisinstance(opcode,int):`
	`934`	`+returnset(),set(),buildchar_stack+ [opcode],postscript_stack`
	`935`	`+elifopcodein {`
	`936`	`+'hsbw','sbw','closepath','hlineto','hmoveto','hcurveto',`
	`937`	`+'hvcurveto','rlineto','rmoveto','rrcurveto','vhcurveto',`
	`938`	`+'vlineto','vmoveto','dotsection','hstem','hstem3','vstem',`
	`939`	`+'vstem3','setcurrentpoint'`
	`940`	`+ }:`
	`941`	`+returnset(),set(), [],postscript_stack`
	`942`	`+elifopcode=='seac':`
	`943`	`+codes=buildchar_stack[3:5]`
	`944`	`+glyphs= [self.prop['Encoding'][x]forxincodes]`
	`945`	`+returnset(glyphs),set(), [],postscript_stack`
	`946`	`+elifopcode=='div':`
	`947`	`+num1,num2=buildchar_stack[-2:]`
	`948`	`+return (`
	`949`	`+set(),`
	`950`	`+set(),`
	`951`	`+buildchar_stack[-2:]+ [num1/num2],postscript_stack`
	`952`	`+ )`
	`953`	`+elifopcode=='callothersubr':`
	`954`	`+othersubr=buildchar_stack[-1]`
	`955`	`+n=buildchar_stack[-2]`
	`956`	`+args=buildchar_stack[-2-n:-2]`
	`957`	`+ifothersubr==3:# Section 8.1 in Type-1 spec`
	`958`	`+postscript_stack.append(args[0])`
	`959`	`+else:`
	`960`	`+postscript_stack.extend(args[::-1])`
	`961`	`+returnset(),set(),buildchar_stack[:-n-2],postscript_stack`
	`962`	`+elifopcode=='callsubr':`
	`963`	`+subr=buildchar_stack[-1]`
	`964`	`+glyphs,subrs,new_bc_stack,new_ps_stack= \`
	`965`	`+self._simulate(subr,buildchar_stack[:-1],postscript_stack)`
	`966`	`+returnset(),subrs\| {subr},new_bc_stack,new_ps_stack`
	`967`	`+elifopcode=='pop':`
	`968`	`+return (`
	`969`	`+set(),`
	`970`	`+set(),`
	`971`	`+buildchar_stack+ [postscript_stack[-1]],postscript_stack[:-1]`
	`972`	`+ )`
	`973`	`+else:`
	`974`	`+raiseRuntimeError(f'opcode{opcode}')`
	`975`	`+`
	`976`	`+def_simulate(self,glyph_or_subr,buildchar_stack,postscript_stack):`
	`977`	`+ifisinstance(glyph_or_subr,str):`
	`978`	`+program=self.prop['CharStrings'][glyph_or_subr]`
	`979`	`+glyphs= {glyph_or_subr}`
	`980`	`+subrs=set()`
	`981`	`+else:`
	`982`	`+program=self.prop['Subrs'][glyph_or_subr]`
	`983`	`+glyphs=set()`
	`984`	`+subrs= {glyph_or_subr}`
	`985`	`+foropcodeinself._charstring_tokens(program):`
	`986`	`+ifopcodein ('return','endchar'):`
	`987`	`+returnglyphs,subrs,buildchar_stack,postscript_stack`
	`988`	`+newglyphs,newsubrs,buildchar_stack,postscript_stack= \`
	`989`	`+self._step(buildchar_stack,postscript_stack,opcode)`
	`990`	`+glyphs.update(newglyphs)`
	`991`	`+subrs.update(newsubrs)`
	`992`	`+`
	`993`	`+def_subset_encoding(self,encoding):`
	`994`	`+result= [`
	`995`	`+'/Encoding 256 array\n0 1 255 { 1 index exch /.notdef put } for'`
	`996`	`+ ]`
	`997`	`+result.extend(`
	`998`	`+f'dup{i} /{glyph} put'`
	`999`	`+fori,glyphinsorted(encoding.items())`
	`1000`	`+ifglyph!='.notdef'`
	`1001`	`+ )`
	`1002`	`+result.extend('readonly def\n')`
	`1003`	`+return'\n'.join(result)`
	`1004`	`+`
	`1005`	`+def_subset_charstrings(self,glyphs):`
	`1006`	`+result= [f'/CharStrings{len(glyphs)} dict dup begin']`
	`1007`	`+encrypted= [self._encrypt(self.prop['CharStrings'][glyph],`
	`1008`	`+'charstring',`
	`1009`	`+self.prop.get('lenIV',4)`
	`1010`	`+ ).decode('latin-1')`
	`1011`	`+forglyphinglyphs]`
	`1012`	`+RD,ND=self._abbr['RD'],self._abbr['ND']`
	`1013`	`+result.extend(`
	`1014`	`+f'/{glyph}{len(enc)}{RD}{enc}{ND}'`
	`1015`	`+forglyph,encinzip(glyphs,encrypted)`
	`1016`	`+ )`
	`1017`	`+result.append('end\n')`
	`1018`	`+return'\n'.join(result)`
	`1019`	`+`
	`1020`	`+def_subset_subrs(self,indices):`
	`1021`	`+# we can't remove subroutines, we just replace unused ones with a stub`
	`1022`	`+n_subrs=len(self.prop['Subrs'])`
	`1023`	`+result= [f'/Subrs{n_subrs} array']`
	`1024`	`+lenIV=self.prop.get('lenIV',4)`
	`1025`	`+stub=self._encrypt(b'\x0b','charstring',lenIV).decode('latin-1')`
	`1026`	`+encrypted= [`
	`1027`	`+self._encrypt(self.prop['Subrs'][i],'charstring',lenIV`
	`1028`	`+ ).decode('latin-1')`
	`1029`	`+ifiinindiceselsestub`
	`1030`	`+foriinrange(n_subrs)`
	`1031`	`+ ]`
	`1032`	`+RD,ND,NP=self._abbr['RD'],self._abbr['ND'],self._abbr['NP']`
	`1033`	`+result.extend(`
	`1034`	`+f'dup{i}{len(enc)}{RD}{enc}{NP}'`
	`1035`	`+fori,encinenumerate(encrypted)`
	`1036`	`+ )`
	`1037`	`+result.extend((ND,''))`
	`1038`	`+return'\n'.join(result)`
	`1039`	`+`
`778`	`1040`
`779`	`1041`	`_StandardEncoding= {`
`780`	`1042`	`**{ord(letter):letterforletterinstring.ascii_letters},`

0 commit comments

Comments

(0)

Movatterモバイル変換

Navigation Menu

Search code, repositories, users, issues, pull requests...

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Uh oh!

Commitf6861ad

File tree

3 files changed

3 files changed

`‎lib/matplotlib/backends/backend_pdf.py`

`‎lib/matplotlib/dviread.py`

`‎lib/matplotlib/type1font.py`

0 commit comments