30
30
31
31
import numpy as np
32
32
33
- from matplotlib import _api ,cbook
33
+ from matplotlib import _api ,cbook ,textpath
34
+ from matplotlib .ft2font import FT2Font ,LoadFlags
34
35
35
36
_log = logging .getLogger (__name__ )
36
37
@@ -106,18 +107,27 @@ def font_effects(self):
106
107
@property
107
108
def glyph_name_or_index (self ):
108
109
"""
109
- Either the glyph name or the native charmap glyph index.
110
-
111
- If :file:`pdftex.map` specifies an encoding for this glyph's font, that
112
- is a mapping of glyph indices to Adobe glyph names; use it to convert
113
- dvi indices to glyph names. Callers can then convert glyph names to
114
- glyph indices (with FT_Get_Name_Index/get_name_index), and load the
115
- glyph using FT_Load_Glyph/load_glyph.
116
-
117
- If :file:`pdftex.map` specifies no encoding, the indices directly map
118
- to the font's "native" charmap; glyphs should directly load using
119
- FT_Load_Char/load_char after selecting the native charmap.
110
+ The glyph name, the native charmap glyph index, or the raw glyph index.
111
+
112
+ If the font is a TrueType file (which can currently only happen for
113
+ DVI files generated by xetex or luatex), then this number is the raw
114
+ index of the glyph, which can be passed to FT_Load_Glyph/load_glyph.
115
+
116
+ Otherwise, the font is a PostScript font. For such fonts, if
117
+ :file:`pdftex.map` specifies an encoding for this glyph's font,
118
+ that is a mapping of glyph indices to Adobe glyph names; which
119
+ is used by this property to convert dvi numbers to glyph names.
120
+ Callers can then convert glyph names to glyph indices (with
121
+ FT_Get_Name_Index/get_name_index), and load the glyph using
122
+ FT_Load_Glyph/load_glyph.
123
+
124
+ If :file:`pdftex.map` specifies no encoding for a PostScript font,
125
+ this number is an index to the font's "native" charmap; glyphs should
126
+ directly load using FT_Load_Char/load_char after selecting the native
127
+ charmap.
120
128
"""
129
+ # TODO: The last section is only true on luatex since luaotfload 3.15;
130
+ # add a version check in the tex file generated by texmanager.
121
131
entry = self ._get_pdftexmap_entry ()
122
132
return (_parse_enc (entry .encoding )[self .glyph ]
123
133
if entry .encoding is not None else self .glyph )
@@ -399,7 +409,7 @@ def _put_char_real(self, char):
399
409
scale = font ._scale
400
410
for x ,y ,f ,g ,w in font ._vf [char ].text :
401
411
newf = DviFont (scale = _mul1220 (scale ,f ._scale ),
402
- tfm = f ._tfm ,texname = f .texname ,vf = f ._vf )
412
+ metrics = f ._metrics ,texname = f .texname ,vf = f ._vf )
403
413
self .text .append (Text (self .h + _mul1220 (x ,scale ),
404
414
self .v + _mul1220 (y ,scale ),
405
415
newf ,g ,newf ._width_of (g )))
@@ -495,7 +505,27 @@ def _fnt_def(self, k, c, s, d, a, l):
495
505
496
506
def _fnt_def_real (self ,k ,c ,s ,d ,a ,l ):
497
507
n = self .file .read (a + l )
498
- fontname = n [- l :].decode ('ascii' )
508
+ fontname = n [- l :].decode ("ascii" )
509
+ # Note that checksum seems wrong?
510
+ if fontname .startswith ("[" ):
511
+ path ,sep ,rest = fontname [1 :].rpartition ("]" )
512
+ if not sep or rest [:1 ]not in ["" ,":" ]:
513
+ raise ValueError (f"Invalid modern font name:{ fontname } " )
514
+ flags = {}# TODO: Actually record these flags.
515
+ if rest [1 :]:
516
+ for kv in rest [1 :].split (";" ):
517
+ k ,v = kv .split ("=" ,1 )
518
+ if k == "index" :
519
+ if v != 0 :
520
+ raise NotImplementedError (
521
+ "Indexing TTC fonts is not supported yet" )
522
+ elif k in ["embolden" ,"slant" ,"extend" ]:
523
+ flags [k ]= int (v )/ 65536
524
+ else :
525
+ _log .warning ("Ignoring invalid key-value pair: %r" ,kv )
526
+ metrics = TtfMetrics (path )
527
+ self .fonts [k ]= DviFont (scale = s ,metrics = metrics ,texname = n ,vf = None )
528
+ return
499
529
try :
500
530
tfm = _tfmfile (fontname )
501
531
except FileNotFoundError as exc :
@@ -512,12 +542,12 @@ def _fnt_def_real(self, k, c, s, d, a, l):
512
542
vf = _vffile (fontname )
513
543
except FileNotFoundError :
514
544
vf = None
515
- self .fonts [k ]= DviFont (scale = s ,tfm = tfm ,texname = n ,vf = vf )
545
+ self .fonts [k ]= DviFont (scale = s ,metrics = tfm ,texname = n ,vf = vf )
516
546
517
547
@_dispatch (247 ,state = _dvistate .pre ,args = ('u1' ,'u4' ,'u4' ,'u4' ,'u1' ))
518
548
def _pre (self ,i ,num ,den ,mag ,k ):
519
549
self .file .read (k )# comment in the dvi file
520
- if i != 2 :
550
+ if i not in [ 2 , 7 ]: # 2: pdftex, luatex; 7: xetex
521
551
raise ValueError (f"Unknown dvi format{ i } " )
522
552
if num != 25400000 or den != 7227 * 2 ** 16 :
523
553
raise ValueError ("Nonstandard units in dvi file" )
@@ -538,13 +568,70 @@ def _post(self, _):
538
568
# TODO: actually read the postamble and finale?
539
569
# currently post_post just triggers closing the file
540
570
541
- @_dispatch (249 )
542
- def _post_post (self ,_ ):
571
+ @_dispatch (249 ,args = ())
572
+ def _post_post (self ):
573
+ raise NotImplementedError
574
+
575
+ @_dispatch (250 ,args = ())
576
+ def _begin_reflect (self ):
543
577
raise NotImplementedError
544
578
545
- @_dispatch (min = 250 ,max = 255 )
546
- def _malformed (self ,offset ):
547
- raise ValueError (f"unknown command: byte{ 250 + offset } " )
579
+ @_dispatch (251 ,args = ())
580
+ def _end_reflect (self ):
581
+ raise NotImplementedError
582
+
583
+ @_dispatch (252 ,args = ())
584
+ def _define_native_font (self ):
585
+ k = self ._read_arg (4 ,signed = False )
586
+ s = self ._read_arg (4 ,signed = False )
587
+ flags = self ._read_arg (2 ,signed = False )
588
+ l = self ._read_arg (1 ,signed = False )
589
+ n = self .file .read (l )
590
+ i = self ._read_arg (4 ,signed = False )
591
+ # TODO: Actually record these flags.
592
+ if flags & 0x0200 :
593
+ rgba = [self ._read_arg (1 ,signed = False )for _ in range (4 )]
594
+ if flags & 0x1000 :
595
+ extend = self ._read_arg (4 ,signed = True )/ 65536
596
+ if flags & 0x2000 :
597
+ slant = self ._read_arg (4 ,signed = True )/ 65536
598
+ if flags & 0x4000 :
599
+ embolden = self ._read_arg (4 ,signed = True )/ 65536
600
+ if i :
601
+ raise NotImplementedError ("Indexing TTC fonts is not supported yet" )
602
+ metrics = TtfMetrics (n )
603
+ self .fonts [k ]= DviFont (
604
+ scale = s ,metrics = metrics ,texname = b"[" + n + b"]" ,vf = None )
605
+
606
+ @_dispatch (253 ,args = ())
607
+ def _set_glyphs (self ):
608
+ w = self ._read_arg (4 ,signed = False )
609
+ k = self ._read_arg (2 ,signed = False )
610
+ xy = [self ._read_arg (4 ,signed = True )for _ in range (2 * k )]
611
+ g = [self ._read_arg (2 ,signed = False )for _ in range (k )]
612
+ font = self .fonts [self .f ]
613
+ for i in range (k ):
614
+ self .text .append (Text (self .h + xy [2 * i ],self .v + xy [2 * i + 1 ],
615
+ font ,g [i ],font ._width_of (g [i ])))
616
+ self .h += w
617
+
618
+ @_dispatch (254 ,args = ())
619
+ def _set_text_and_glyphs (self ):
620
+ l = self ._read_arg (2 ,signed = False )
621
+ t = self .file .read (2 * l )# utf16
622
+ w = self ._read_arg (4 ,signed = False )
623
+ k = self ._read_arg (2 ,signed = False )
624
+ xy = [self ._read_arg (4 ,signed = True )for _ in range (2 * k )]
625
+ g = [self ._read_arg (2 ,signed = False )for _ in range (k )]
626
+ font = self .fonts [self .f ]
627
+ for i in range (k ):
628
+ self .text .append (Text (self .h + xy [2 * i ],self .v + xy [2 * i + 1 ],
629
+ font ,g [i ],font ._width_of (g [i ])))
630
+ self .h += w
631
+
632
+ @_dispatch (255 )
633
+ def _malformed (self ,raw ):
634
+ raise ValueError ("unknown command: byte 255" )
548
635
549
636
550
637
class DviFont :
@@ -562,7 +649,7 @@ class DviFont:
562
649
----------
563
650
scale : float
564
651
Factor by which the font is scaled from its natural size.
565
- tfm : Tfm
652
+ tfm : Tfm | TtfMetrics
566
653
TeX font metrics for this font
567
654
texname : bytes
568
655
Name of the font as used internally by TeX and friends, as an ASCII
@@ -578,12 +665,12 @@ class DviFont:
578
665
Size of the font in Adobe points, converted from the slightly
579
666
smaller TeX points.
580
667
"""
581
- __slots__ = ('texname' ,'size' ,'_scale' ,'_vf' ,'_tfm ' )
668
+ __slots__ = ('texname' ,'size' ,'_scale' ,'_vf' ,'_metrics ' )
582
669
583
- def __init__ (self ,scale ,tfm ,texname ,vf ):
670
+ def __init__ (self ,scale ,metrics ,texname ,vf ):
584
671
_api .check_isinstance (bytes ,texname = texname )
585
672
self ._scale = scale
586
- self ._tfm = tfm
673
+ self ._metrics = metrics
587
674
self .texname = texname
588
675
self ._vf = vf
589
676
self .size = scale * (72.0 / (72.27 * 2 ** 16 ))
@@ -604,32 +691,30 @@ def __repr__(self):
604
691
605
692
def _width_of (self ,char ):
606
693
"""Width of char in dvi units."""
607
- width = self ._tfm . width . get (char , None )
608
- if width is not None :
609
- return _mul1220 ( width , self ._scale )
610
- _log . debug ( 'No width for char %d in font %s.' , char , self . texname )
611
- return 0
694
+ metrics = self ._metrics . get_metrics (char )
695
+ if metrics is None :
696
+ _log . debug ( 'No width for char %d in font %s.' , char , self .texname )
697
+ return 0
698
+ return _mul1220 ( metrics . width , self . _scale )
612
699
613
700
def _height_depth_of (self ,char ):
614
701
"""Height and depth of char in dvi units."""
615
- result = []
616
- for metric ,name in ((self ._tfm .height ,"height" ),
617
- (self ._tfm .depth ,"depth" )):
618
- value = metric .get (char ,None )
619
- if value is None :
620
- _log .debug ('No %s for char %d in font %s' ,
621
- name ,char ,self .texname )
622
- result .append (0 )
623
- else :
624
- result .append (_mul1220 (value ,self ._scale ))
702
+ metrics = self ._metrics .get_metrics (char )
703
+ if metrics is None :
704
+ _log .debug ('No metrics for char %d in font %s' ,char ,self .texname )
705
+ return [0 ,0 ]
706
+ metrics = [
707
+ _mul1220 (metrics .height ,self ._scale ),
708
+ _mul1220 (metrics .depth ,self ._scale ),
709
+ ]
625
710
# cmsyXX (symbols font) glyph 0 ("minus") has a nonzero descent
626
711
# so that TeX aligns equations properly
627
712
# (https://tex.stackexchange.com/q/526103/)
628
713
# but we actually care about the rasterization depth to align
629
714
# the dvipng-generated images.
630
715
if re .match (br'^cmsy\d+$' ,self .texname )and char == 0 :
631
- result [- 1 ]= 0
632
- return result
716
+ metrics [- 1 ]= 0
717
+ return metrics
633
718
634
719
635
720
class Vf (Dvi ):
@@ -761,6 +846,9 @@ def _mul1220(num1, num2):
761
846
return (num1 * num2 )>> 20
762
847
763
848
849
+ WHD = namedtuple ('WHD' ,'width height depth' )
850
+
851
+
764
852
class Tfm :
765
853
"""
766
854
A TeX Font Metric file.
@@ -783,7 +871,7 @@ class Tfm:
783
871
specified in the dvi file. These are dicts because indexing may
784
872
not start from 0.
785
873
"""
786
- __slots__ = ('checksum' ,'design_size' ,'width ' ,'height' , 'depth ' )
874
+ __slots__ = ('checksum' ,'design_size' ,'_whds ' ,'widths ' )
787
875
788
876
def __init__ (self ,filename ):
789
877
_log .debug ('opening tfm file %s' ,filename )
@@ -799,15 +887,42 @@ def __init__(self, filename):
799
887
widths = struct .unpack (f'!{ nw } i' ,file .read (4 * nw ))
800
888
heights = struct .unpack (f'!{ nh } i' ,file .read (4 * nh ))
801
889
depths = struct .unpack (f'!{ nd } i' ,file .read (4 * nd ))
802
- self .width = {}
803
- self .height = {}
804
- self .depth = {}
890
+ self ._whds = {}
805
891
for idx ,char in enumerate (range (bc ,ec + 1 )):
806
892
byte0 = char_info [4 * idx ]
807
893
byte1 = char_info [4 * idx + 1 ]
808
- self .width [char ]= widths [byte0 ]
809
- self .height [char ]= heights [byte1 >> 4 ]
810
- self .depth [char ]= depths [byte1 & 0xf ]
894
+ self ._whds [char ]= WHD (
895
+ widths [byte0 ],heights [byte1 >> 4 ],depths [byte1 & 0xf ])
896
+ self .widths = [(1000 * self ._whds [c ].width if c in self ._whds else 0 )>> 20
897
+ for c in range (max (self ._whds ))]if self ._whds else []
898
+
899
+ def get_metrics (self ,char ):
900
+ return self ._whds [char ]
901
+
902
+ width = _api .deprecated ("3.11" )(
903
+ property (lambda self : {c :m .width for c ,m in self ._whds }))
904
+ height = _api .deprecated ("3.11" )(
905
+ property (lambda self : {c :m .height for c ,m in self ._whds }))
906
+ depth = _api .deprecated ("3.11" )(
907
+ property (lambda self : {c :m .depth for c ,m in self ._whds }))
908
+
909
+
910
+ class TtfMetrics :
911
+ def __init__ (self ,filename ):
912
+ self ._face = FT2Font (filename ,hinting_factor = 1 )# Manage closing?
913
+
914
+ def get_metrics (self ,char ):
915
+ # _mul2012 uses a truncating bitshift for compatibility with dvitype,
916
+ # but I still need to figure out truncation rules when upem is 1000
917
+ # (e.g. lmroman10-regular.otf) and thus the metrics themselves are not
918
+ # exactly representable as 20.12 fp. For now, just truncate during
919
+ # conversion to 20.12 as well. (When upem is 2048 the conversion is
920
+ # exact and the truncation does nothing.)
921
+ upem = self ._face .units_per_EM # Usually 2048 or 1000.
922
+ g = self ._face .load_glyph (char ,LoadFlags .NO_SCALE )
923
+ return WHD (int (g .horiAdvance / upem * 2 ** 20 ),
924
+ int (g .height / upem * 2 ** 20 ),
925
+ int ((g .height - g .horiBearingY )/ upem * 2 ** 20 ))
811
926
812
927
813
928
PsFont = namedtuple ('PsFont' ,'texname psname effects encoding filename' )
@@ -1002,8 +1117,7 @@ def _parse_enc(path):
1002
1117
Returns
1003
1118
-------
1004
1119
list
1005
- The nth entry of the list is the PostScript glyph name of the nth
1006
- glyph.
1120
+ The nth list item is the PostScript glyph name of the nth glyph.
1007
1121
"""
1008
1122
no_comments = re .sub ("%.*" ,"" ,Path (path ).read_text (encoding = "ascii" ))
1009
1123
array = re .search (r"(?s)\[(.*)\]" ,no_comments ).group (1 )
@@ -1108,26 +1222,45 @@ def _fontfile(cls, suffix, texname):
1108
1222
from argparse import ArgumentParser
1109
1223
import itertools
1110
1224
1225
+ import fontTools .agl
1226
+
1111
1227
parser = ArgumentParser ()
1112
1228
parser .add_argument ("filename" )
1113
1229
parser .add_argument ("dpi" ,nargs = "?" ,type = float ,default = None )
1114
1230
args = parser .parse_args ()
1115
1231
with Dvi (args .filename ,args .dpi )as dvi :
1116
1232
fontmap = PsfontsMap (find_tex_file ('pdftex.map' ))
1117
1233
for page in dvi :
1118
- print (f"===new page === "
1234
+ print (f"===NEW PAGE === "
1119
1235
f"(w:{ page .width } , h:{ page .height } , d:{ page .descent } )" )
1120
- for font ,group in itertools .groupby (
1121
- page .text ,lambda text :text .font ):
1122
- print (f"font:{ font .texname .decode ('latin-1' )!r} \t "
1123
- f"scale:{ font ._scale / 2 ** 20 } " )
1124
- print ("x" ,"y" ,"glyph" ,"chr" ,"w" ,"(glyphs)" ,sep = "\t " )
1236
+ print ("--- GLYPHS ---" )
1237
+ for font ,group in itertools .groupby (page .text ,lambda text :text .font ):
1238
+ font_name = font .texname .decode ("latin-1" )
1239
+ filename = (font_name [1 :- 1 ]if font_name .startswith ("[" )
1240
+ else fontmap [font .texname ].filename )
1241
+ if font_name .startswith ("[" ):
1242
+ print (f"font:{ font_name } " )
1243
+ else :
1244
+ print (f"font:{ font_name } at{ filename } " )
1245
+ print (f"scale:{ font ._scale / 2 ** 20 } " )
1246
+ print (" " .join (map ("{:>11}" .format , ["x" ,"y" ,"glyph" ,"chr" ,"w" ])))
1247
+ face = FT2Font (filename )
1125
1248
for text in group :
1126
- print (text .x ,text .y ,text .glyph ,
1127
- chr (text .glyph )if chr (text .glyph ).isprintable ()
1128
- else "." ,
1129
- text .width ,sep = "\t " )
1249
+ if font_name .startswith ("[" ):
1250
+ glyph_name = face .get_glyph_name (text .glyph )
1251
+ else :
1252
+ if isinstance (text .glyph_name_or_index ,str ):
1253
+ glyph_name = text .glyph_name_or_index
1254
+ else :
1255
+ textpath .TextToPath ._select_native_charmap (face )
1256
+ glyph_name = face .get_glyph_name (
1257
+ face .get_char_index (text .glyph ))
1258
+ glyph_str = fontTools .agl .toUnicode (glyph_name )
1259
+ print (" " .join (map ("{:>11}" .format , [
1260
+ text .x ,text .y ,text .glyph ,glyph_str ,text .width ])))
1130
1261
if page .boxes :
1131
- print ("x" ,"y" ,"h" ,"w" ,"" ,"(boxes)" ,sep = "\t " )
1262
+ print ("--- BOXES ---" )
1263
+ print (" " .join (map ("{:>11}" .format , ["x" ,"y" ,"h" ,"w" ])))
1132
1264
for box in page .boxes :
1133
- print (box .x ,box .y ,box .height ,box .width ,sep = "\t " )
1265
+ print (" " .join (map ("{:>11}" .format , [
1266
+ box .x ,box .y ,box .height ,box .width ])))