extract_ttfs.py (4107B)
1 #!/usr/bin/env python 2 3 from fontTools.ttLib import TTFont 4 import sys 5 import json 6 7 # map of characters to extract 8 metrics_to_extract = { 9 # Font name 10 "AMS-Regular": { 11 u"\u21e2": None, # \dashrightarrow 12 u"\u21e0": None, # \dashleftarrow 13 }, 14 "Main-Regular": { 15 # Skew and italic metrics can't be easily parsed from the TTF. Instead, 16 # we map each character to a "base character", which is a character 17 # from the same font with correct italic and skew metrics. A character 18 # maps to None if it doesn't have a base. 19 20 u"\u2260": None, # \neq 21 u"\u2245": None, # \cong 22 u"\u0020": None, # space 23 u"\u00a0": None, # nbsp 24 u"\u2026": None, # \ldots 25 u"\u22ef": None, # \cdots 26 u"\u22f1": None, # \ddots 27 u"\u22ee": None, # \vdots 28 u"\u22ee": None, # \vdots 29 u"\u22a8": None, # \models 30 u"\u22c8": None, # \bowtie 31 u"\u2250": None, # \doteq 32 u"\u23b0": None, # \lmoustache 33 u"\u23b1": None, # \rmoustache 34 u"\u27ee": None, # \lgroup 35 u"\u27ef": None, # \rgroup 36 u"\u27f5": None, # \longleftarrow 37 u"\u27f8": None, # \Longleftarrow 38 u"\u27f6": None, # \longrightarrow 39 u"\u27f9": None, # \Longrightarrow 40 u"\u27f7": None, # \longleftrightarrow 41 u"\u27fa": None, # \Longleftrightarrow 42 u"\u21a6": None, # \mapsto 43 u"\u27fc": None, # \longmapsto 44 u"\u21a9": None, # \hookleftarrow 45 u"\u21aa": None, # \hookrightarrow 46 u"\u21cc": None, # \rightleftharpoons 47 }, 48 "Size1-Regular": { 49 u"\u222c": u"\u222b", # \iint, based on \int 50 u"\u222d": u"\u222b", # \iiint, based on \int 51 }, 52 "Size2-Regular": { 53 u"\u222c": u"\u222b", # \iint, based on \int 54 u"\u222d": u"\u222b", # \iiint, based on \int 55 }, 56 } 57 58 59 def main(): 60 start_json = json.load(sys.stdin) 61 62 for font, chars in metrics_to_extract.iteritems(): 63 fontInfo = TTFont("../static/fonts/KaTeX_" + font + ".ttf") 64 glyf = fontInfo["glyf"] 65 unitsPerEm = float(fontInfo["head"].unitsPerEm) 66 67 # We keep ALL Unicode cmaps, not just fontInfo["cmap"].getcmap(3, 1). 68 # This is playing it extra safe, since it reports inconsistencies. 69 # Platform 0 is Unicode, platform 3 is Windows. For platform 3, 70 # encoding 1 is UCS-2 and encoding 10 is UCS-4. 71 cmap = [t.cmap for t in fontInfo["cmap"].tables 72 if (t.platformID == 0) 73 or (t.platformID == 3 and t.platEncID in (1, 10))] 74 75 for char, base_char in chars.iteritems(): 76 code = ord(char) 77 names = set(t.get(code) for t in cmap) 78 if not names: 79 sys.stderr.write( 80 "Codepoint {} of font {} maps to no name\n" 81 .format(code, font)) 82 continue 83 if len(names) != 1: 84 sys.stderr.write( 85 "Codepoint {} of font {} maps to multiple names: {}\n" 86 .format(code, font, ", ".join(sorted(names)))) 87 continue 88 name = names.pop() 89 90 height = depth = italic = skew = width = 0 91 glyph = glyf[name] 92 if glyph.numberOfContours: 93 height = glyph.yMax 94 depth = -glyph.yMin 95 width = glyph.xMax - glyph.xMin 96 if base_char: 97 base_char_str = str(ord(base_char)) 98 base_metrics = start_json[font][base_char_str] 99 italic = base_metrics["italic"] 100 skew = base_metrics["skew"] 101 width = base_metrics["width"] 102 103 start_json[font][str(code)] = { 104 "height": height / unitsPerEm, 105 "depth": depth / unitsPerEm, 106 "italic": italic, 107 "skew": skew, 108 "width": width 109 } 110 111 sys.stdout.write( 112 json.dumps(start_json, separators=(',', ':'), sort_keys=True)) 113 114 if __name__ == "__main__": 115 main()