-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbdf_parser.py
137 lines (117 loc) · 4.17 KB
/
bdf_parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
#!/usr/bin/env python3
import sys
import itertools
def msb(n):
'Return position of the most significant bit in an integer.'
i = 0
while n:
i += 1
n >>= 1
return i
for line in sys.stdin:
group = line.split()
if group[0] == 'FONT':
print(' '.join(group[1:]))
elif group[0] == 'STARTFONT':
count = ascii_count = gb_count = 0
storage = {}
elif group[0] == 'ENDFONT':
ascii_total, gb_total = 95, (6763 + 682)
sys.stderr.write(
'%d utilized glyphs amongst %d parsed ones.\n'
' ASCII coverage: %.2f%% (%d hits, %d misses)\n'
'GB2312 coverage: %.2f%% (%d hits, %d misses)\n' % (
ascii_count + gb_count, count,
ascii_count / ascii_total * 100,
ascii_count, ascii_total - ascii_count,
gb_count / gb_total * 100,
gb_count, gb_total - gb_count
)
)
break
elif group[0] == 'SIZE':
pixel_size = int(group[1])
hex_data_size = int((pixel_size + 7) / 8) * 8
elif group[0] == 'STARTCHAR':
glyph_name = group[1]
count += 1
elif group[0] == 'ENCODING':
code_point = int(group[1])
try:
euc_cn = chr(code_point).encode('GB2312')
except UnicodeEncodeError:
while line != 'ENDCHAR':
line = sys.stdin.readline().strip()
continue
except ValueError as e:
print('Unknown glyph:', glyph_name)
continue
if code_point <= 0x7F:
ascii_count += 1
key = code_point
charset = 'ASCII: %#X' % code_point
else:
gb_count += 1
key = int.from_bytes(euc_cn, 'big')
charset = 'EUC-CN: %#X GB2312: %s' % (
key,
''.join(format(byte - 0xA0, '02d') for byte in euc_cn)
)
print(
'Glyph: "%s" Unicode: %#X %s' %
(chr(code_point), code_point, charset)
)
elif group[0] == 'BBX':
width, height, x0, y0 = map(int, group[1:5])
print(width, height, x0, y0)
elif group[0] == 'BITMAP':
buffer, x_offset = [], 0
while True:
line = sys.stdin.readline().strip()
if line != 'ENDCHAR':
hex_data = int(line, 16)
buffer.append(hex_data)
x_offset = max(x_offset, msb(hex_data))
else:
glyph = []
x_offset += x0 - pixel_size + 1
y_offset = y0 + 1
for i, _ in enumerate(buffer):
if x_offset > 0:
buffer[i] >>= x_offset
else:
buffer[i] <<= -x_offset
print(
format(buffer[i], '0%db' % width).
translate({ord('0'): '..', ord('1'): '##'})
)
buffer.extend(itertools.repeat(0, y_offset))
storage[key] = buffer
break
all_euc_cn_codepoints = map(
lambda pair: ((0xA1 + pair[0]) << 8) | (0xA1 + pair[1]),
itertools.product(*map(range, [87, 94]))
)
with open('HZK%d' % pixel_size, 'wb') as f:
for key in all_euc_cn_codepoints:
# Fill missing glyphs with zero
in_buffer = storage.get(key, [0] * pixel_size)
# Add padding for items with spaces
while len(in_buffer) < pixel_size:
in_buffer.insert(0, 0)
if pixel_size == 8:
out_buffer = in_buffer
else:
# reshape glyphs into bytes
out_buffer = []
out_cycle = itertools.cycle(reversed(range(8)))
# remove fixed padding to reduce size
for hex_data in in_buffer[1:]:
in_cycle = reversed(range(pixel_size - 1))
for in_bit, out_bit in zip(in_cycle, out_cycle):
# add a new byte to buffer
if out_bit == 7:
out_buffer.append(0)
# copy one bit to output
out_buffer[-1] |= ((hex_data >> in_bit) & 1) << out_bit
f.write(bytes(out_buffer))