21
21
v1.1, 1993. ISBN 0-201-57044-0.
22
22
"""
23
23
24
+ import base64
24
25
import binascii
25
26
import functools
27
+ import itertools
26
28
import logging
27
29
import re
28
30
import string
36
38
_log = logging .getLogger (__name__ )
37
39
38
40
41
+ def _make_tag (set ):
42
+ """
43
+ Hash set into a six-character tag made of uppercase letters
44
+
45
+ Useful for adding a tag into subsetted fonts while keeping the code
46
+ reproducible. The function always returns the same value for the
47
+ same set on the same exact Python version but is not guaranteed to
48
+ not have collisions.
49
+
50
+ Parameters
51
+ ----------
52
+ set : iterable
53
+ The set of glyphs present in a font subset
54
+
55
+ Returns
56
+ -------
57
+ str
58
+ Six uppercase ASCII letters and a plus sign
59
+ """
60
+
61
+ # freeze the set to make it hashable, interpret the hash as bytes
62
+ array = struct .pack ("@q" ,hash (frozenset (set )))
63
+ # turn the bytes into characters with b32encode, which uses uppercase
64
+ # letters and numbers from 2 to 7 - remap those arbitrarily
65
+ trans = str .maketrans ('234567' ,'MTPLIB' ,'=' )
66
+ return (base64 .b32encode (array ).decode ('ascii' )
67
+ .translate (trans )[:6 ]+ '+' )
68
+
69
+
39
70
class _Token :
40
71
"""
41
72
A token in a PostScript stream
@@ -627,8 +658,7 @@ def _parse_subrs(self, tokens, _data):
627
658
628
659
return array ,next (tokens ).endpos ()
629
660
630
- @staticmethod
631
- def _parse_charstrings (tokens ,_data ):
661
+ def _parse_charstrings (self ,tokens ,_data ):
632
662
count_token = next (tokens )
633
663
if not count_token .is_number ():
634
664
raise RuntimeError (
@@ -650,7 +680,12 @@ def _parse_charstrings(tokens, _data):
650
680
f"Token following /{ glyphname } in CharStrings definition "
651
681
f"must be a number, was{ nbytes_token } "
652
682
)
653
- next (tokens )# usually RD or |-
683
+ token = next (tokens )
684
+ if not token .is_keyword (self ._abbr ['RD' ]):
685
+ raise RuntimeError (
686
+ "Token preceding charstring must be {self._abbr['RD']}, "
687
+ f"was{ token } "
688
+ )
654
689
binary_token = tokens .send (1 + nbytes_token .value ())
655
690
charstrings [glyphname ]= binary_token .value ()
656
691
@@ -681,16 +716,15 @@ def _parse_encoding(tokens, _data):
681
716
continue
682
717
encoding [index_token .value ()]= name_token .value ()
683
718
684
- @staticmethod
685
- def _parse_othersubrs (tokens ,data ):
719
+ def _parse_othersubrs (self ,tokens ,data ):
686
720
init_pos = None
687
721
while True :
688
722
token = next (tokens )
689
723
if init_pos is None :
690
724
init_pos = token .pos
691
725
if token .is_delim ():
692
726
_expression (token ,tokens ,data )
693
- elif token .is_keyword ('def' ,'ND' , '|-' ):
727
+ elif token .is_keyword ('def' ,self . _abbr [ 'ND' ] ):
694
728
return data [init_pos :token .endpos ()],token .endpos ()
695
729
696
730
def transform (self ,effects ):
@@ -745,7 +779,7 @@ def transform(self, effects):
745
779
fontmatrix = (
746
780
'[%s]' % ' ' .join (_format_approx (x ,6 )for x in array )
747
781
)
748
- replacements = (
782
+ newparts = self . _replace (
749
783
[(x ,'/FontName/%s def' % fontname )
750
784
for x in self ._pos ['FontName' ]]
751
785
+ [(x ,'/ItalicAngle %a def' % italicangle )
@@ -755,11 +789,40 @@ def transform(self, effects):
755
789
+ [(x ,'' )for x in self ._pos .get ('UniqueID' , [])]
756
790
)
757
791
792
+ return Type1Font ((
793
+ newparts [0 ],
794
+ self ._encrypt (newparts [1 ],'eexec' ),
795
+ self .parts [2 ]
796
+ ))
797
+
798
+ def _replace (self ,replacements ):
799
+ """
800
+ Change the font according to `replacements`
801
+
802
+ Parameters
803
+ ----------
804
+ replacements : list of ((int, int), str)
805
+ Each element is ((pos0, pos1), replacement) where pos0 and
806
+ pos1 are indices to the original font data (parts[0] and the
807
+ decrypted part concatenated). The data in the interval
808
+ pos0:pos1 will be replaced by the replacement text. To
809
+ accommodate binary data, the replacement is taken to be in
810
+ Latin-1 encoding.
811
+
812
+ The case where pos0 is inside parts[0] and pos1 inside
813
+ the decrypted part is not supported.
814
+
815
+ Returns
816
+ -------
817
+ (bytes, bytes)
818
+ The new parts[0] and decrypted part (which needs to be
819
+ encrypted in the transformed font).
820
+ """
758
821
data = bytearray (self .parts [0 ])
759
822
data .extend (self .decrypted )
760
823
len0 = len (self .parts [0 ])
761
824
for (pos0 ,pos1 ),value in sorted (replacements ,reverse = True ):
762
- data [pos0 :pos1 ]= value .encode ('ascii' , 'replace ' )
825
+ data [pos0 :pos1 ]= value .encode ('latin-1 ' )
763
826
if pos0 < len (self .parts [0 ]):
764
827
if pos1 >= len (self .parts [0 ]):
765
828
raise RuntimeError (
@@ -769,12 +832,211 @@ def transform(self, effects):
769
832
len0 += len (value )- pos1 + pos0
770
833
771
834
data = bytes (data )
835
+ return data [:len0 ],data [len0 :]
836
+
837
+ def subset (self ,characters ):
838
+ """
839
+ Return a new font that only defines the given characters.
840
+
841
+ Parameters
842
+ ----------
843
+ characters : sequence of bytes
844
+ The subset of characters to include
845
+
846
+ Returns
847
+ -------
848
+ `Type1Font`
849
+ """
850
+
851
+ characters = set (characters )
852
+ encoding = {code :glyph
853
+ for code ,glyph in self .prop ['Encoding' ].items ()
854
+ if code in characters }
855
+ encoding [0 ]= '.notdef'
856
+ # todo and done include strings (glyph names)
857
+ todo = set (encoding .values ())
858
+ done = set ()
859
+ seen_subrs = {0 ,1 ,2 ,3 }
860
+ while todo - done :
861
+ glyph = next (iter (todo - done ))
862
+ called_glyphs ,called_subrs ,_ ,_ = self ._simulate (glyph , [], [])
863
+ todo .update (called_glyphs )
864
+ seen_subrs .update (called_subrs )
865
+ done .add (glyph )
866
+
867
+ fontname = _make_tag (todo )+ self .prop ['FontName' ]
868
+ charstrings = self ._subset_charstrings (todo )
869
+ subrs = self ._subset_subrs (seen_subrs )
870
+ newparts = self ._replace (
871
+ [(x ,'/FontName/%s def' % fontname )
872
+ for x in self ._pos ['FontName' ]]
873
+ + [(self ._pos ['CharStrings' ][0 ],charstrings ),
874
+ (self ._pos ['Subrs' ][0 ],subrs ),
875
+ (self ._pos ['Encoding' ][0 ],self ._subset_encoding (encoding ))
876
+ ]+ [(x ,'' )for x in self ._pos .get ('UniqueID' , [])]
877
+ )
772
878
return Type1Font ((
773
- data [: len0 ],
774
- self ._encrypt (data [ len0 : ],'eexec' ),
879
+ newparts [ 0 ],
880
+ self ._encrypt (newparts [ 1 ],'eexec' ),
775
881
self .parts [2 ]
776
882
))
777
883
884
+ @staticmethod
885
+ def _charstring_tokens (data ):
886
+ data = iter (data )
887
+ for byte in data :
888
+ if 32 <= byte <= 246 :
889
+ yield byte - 139
890
+ elif 247 <= byte <= 250 :
891
+ byte2 = next (data )
892
+ yield (byte - 247 )* 256 + byte2 + 108
893
+ elif 251 <= byte <= 254 :
894
+ byte2 = next (data )
895
+ yield - (byte - 251 )* 256 - byte2 - 108
896
+ elif byte == 255 :
897
+ bs = itertools .islice (data ,4 )
898
+ yield struct .unpack ('>i' ,bs )[0 ]
899
+ elif byte == 12 :
900
+ byte1 = next (data )
901
+ yield {
902
+ 0 :'dotsection' ,
903
+ 1 :'vstem3' ,
904
+ 2 :'hstem3' ,
905
+ 6 :'seac' ,
906
+ 7 :'sbw' ,
907
+ 12 :'div' ,
908
+ 16 :'callothersubr' ,
909
+ 17 :'pop' ,
910
+ 33 :'setcurrentpoint'
911
+ }[byte1 ]
912
+ else :
913
+ yield {
914
+ 1 :'hstem' ,
915
+ 3 :'vstem' ,
916
+ 4 :'vmoveto' ,
917
+ 5 :'rlineto' ,
918
+ 6 :'hlineto' ,
919
+ 7 :'vlineto' ,
920
+ 8 :'rrcurveto' ,
921
+ 9 :'closepath' ,
922
+ 10 :'callsubr' ,
923
+ 11 :'return' ,
924
+ 13 :'hsbw' ,
925
+ 14 :'endchar' ,
926
+ 21 :'rmoveto' ,
927
+ 22 :'hmoveto' ,
928
+ 30 :'vhcurveto' ,
929
+ 31 :'hvcurveto'
930
+ }[byte ]
931
+
932
+ def _step (self ,buildchar_stack ,postscript_stack ,opcode ):
933
+ if isinstance (opcode ,int ):
934
+ return set (),set (),buildchar_stack + [opcode ],postscript_stack
935
+ elif opcode in {
936
+ 'hsbw' ,'sbw' ,'closepath' ,'hlineto' ,'hmoveto' ,'hcurveto' ,
937
+ 'hvcurveto' ,'rlineto' ,'rmoveto' ,'rrcurveto' ,'vhcurveto' ,
938
+ 'vlineto' ,'vmoveto' ,'dotsection' ,'hstem' ,'hstem3' ,'vstem' ,
939
+ 'vstem3' ,'setcurrentpoint'
940
+ }:
941
+ return set (),set (), [],postscript_stack
942
+ elif opcode == 'seac' :
943
+ codes = buildchar_stack [3 :5 ]
944
+ glyphs = [self .prop ['Encoding' ][x ]for x in codes ]
945
+ return set (glyphs ),set (), [],postscript_stack
946
+ elif opcode == 'div' :
947
+ num1 ,num2 = buildchar_stack [- 2 :]
948
+ return (
949
+ set (),
950
+ set (),
951
+ buildchar_stack [- 2 :]+ [num1 / num2 ],postscript_stack
952
+ )
953
+ elif opcode == 'callothersubr' :
954
+ othersubr = buildchar_stack [- 1 ]
955
+ n = buildchar_stack [- 2 ]
956
+ args = buildchar_stack [- 2 - n :- 2 ]
957
+ if othersubr == 3 :# Section 8.1 in Type-1 spec
958
+ postscript_stack .append (args [0 ])
959
+ else :
960
+ postscript_stack .extend (args [::- 1 ])
961
+ return set (),set (),buildchar_stack [:- n - 2 ],postscript_stack
962
+ elif opcode == 'callsubr' :
963
+ subr = buildchar_stack [- 1 ]
964
+ glyphs ,subrs ,new_bc_stack ,new_ps_stack = \
965
+ self ._simulate (subr ,buildchar_stack [:- 1 ],postscript_stack )
966
+ return set (),subrs | {subr },new_bc_stack ,new_ps_stack
967
+ elif opcode == 'pop' :
968
+ return (
969
+ set (),
970
+ set (),
971
+ buildchar_stack + [postscript_stack [- 1 ]],postscript_stack [:- 1 ]
972
+ )
973
+ else :
974
+ raise RuntimeError (f'opcode{ opcode } ' )
975
+
976
+ def _simulate (self ,glyph_or_subr ,buildchar_stack ,postscript_stack ):
977
+ if isinstance (glyph_or_subr ,str ):
978
+ program = self .prop ['CharStrings' ][glyph_or_subr ]
979
+ glyphs = {glyph_or_subr }
980
+ subrs = set ()
981
+ else :
982
+ program = self .prop ['Subrs' ][glyph_or_subr ]
983
+ glyphs = set ()
984
+ subrs = {glyph_or_subr }
985
+ for opcode in self ._charstring_tokens (program ):
986
+ if opcode in ('return' ,'endchar' ):
987
+ return glyphs ,subrs ,buildchar_stack ,postscript_stack
988
+ newglyphs ,newsubrs ,buildchar_stack ,postscript_stack = \
989
+ self ._step (buildchar_stack ,postscript_stack ,opcode )
990
+ glyphs .update (newglyphs )
991
+ subrs .update (newsubrs )
992
+
993
+ def _subset_encoding (self ,encoding ):
994
+ result = [
995
+ '/Encoding 256 array\n 0 1 255 { 1 index exch /.notdef put } for'
996
+ ]
997
+ result .extend (
998
+ f'dup{ i } /{ glyph } put'
999
+ for i ,glyph in sorted (encoding .items ())
1000
+ if glyph != '.notdef'
1001
+ )
1002
+ result .extend ('readonly def\n ' )
1003
+ return '\n ' .join (result )
1004
+
1005
+ def _subset_charstrings (self ,glyphs ):
1006
+ result = [f'/CharStrings{ len (glyphs )} dict dup begin' ]
1007
+ encrypted = [self ._encrypt (self .prop ['CharStrings' ][glyph ],
1008
+ 'charstring' ,
1009
+ self .prop .get ('lenIV' ,4 )
1010
+ ).decode ('latin-1' )
1011
+ for glyph in glyphs ]
1012
+ RD ,ND = self ._abbr ['RD' ],self ._abbr ['ND' ]
1013
+ result .extend (
1014
+ f'/{ glyph } { len (enc )} { RD } { enc } { ND } '
1015
+ for glyph ,enc in zip (glyphs ,encrypted )
1016
+ )
1017
+ result .append ('end\n ' )
1018
+ return '\n ' .join (result )
1019
+
1020
+ def _subset_subrs (self ,indices ):
1021
+ # we can't remove subroutines, we just replace unused ones with a stub
1022
+ n_subrs = len (self .prop ['Subrs' ])
1023
+ result = [f'/Subrs{ n_subrs } array' ]
1024
+ lenIV = self .prop .get ('lenIV' ,4 )
1025
+ stub = self ._encrypt (b'\x0b ' ,'charstring' ,lenIV ).decode ('latin-1' )
1026
+ encrypted = [
1027
+ self ._encrypt (self .prop ['Subrs' ][i ],'charstring' ,lenIV
1028
+ ).decode ('latin-1' )
1029
+ if i in indices else stub
1030
+ for i in range (n_subrs )
1031
+ ]
1032
+ RD ,ND ,NP = self ._abbr ['RD' ],self ._abbr ['ND' ],self ._abbr ['NP' ]
1033
+ result .extend (
1034
+ f'dup{ i } { len (enc )} { RD } { enc } { NP } '
1035
+ for i ,enc in enumerate (encrypted )
1036
+ )
1037
+ result .extend ((ND ,'' ))
1038
+ return '\n ' .join (result )
1039
+
778
1040
779
1041
_StandardEncoding = {
780
1042
** {ord (letter ):letter for letter in string .ascii_letters },