|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import sys |
|
|
import string |
|
|
import rangetab |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
blockAliases = [] |
|
|
blockAliases.append("CombiningMarksforSymbols:CombiningDiacriticalMarksforSymbols") |
|
|
blockAliases.append("Greek:GreekandCoptic") |
|
|
blockAliases.append("PrivateUse:PrivateUseArea,SupplementaryPrivateUseArea-A," + |
|
|
"SupplementaryPrivateUseArea-B") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
minTableSize = 8 |
|
|
|
|
|
blockfile = "Blocks-4.0.1.txt" |
|
|
catfile = "UnicodeData-4.0.1.txt" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
BlockNames = {} |
|
|
try: |
|
|
blocks = open(blockfile, "r") |
|
|
except: |
|
|
print("Missing %s, aborting ..." % blockfile) |
|
|
sys.exit(1) |
|
|
|
|
|
for line in blocks.readlines(): |
|
|
if line[0] == '#': |
|
|
continue |
|
|
line = line.strip() |
|
|
if line == '': |
|
|
continue |
|
|
try: |
|
|
fields = line.split(';') |
|
|
range = fields[0].strip() |
|
|
(start, end) = range.split("..") |
|
|
name = fields[1].strip() |
|
|
name = name.replace(' ', '') |
|
|
except: |
|
|
print("Failed to process line: %s" % (line)) |
|
|
continue |
|
|
start = int(start, 16) |
|
|
end = int(end, 16) |
|
|
try: |
|
|
BlockNames[name].append((start, end)) |
|
|
except: |
|
|
BlockNames[name] = [(start, end)] |
|
|
blocks.close() |
|
|
print("Parsed %d blocks descriptions" % (len(BlockNames.keys()))) |
|
|
|
|
|
for block in blockAliases: |
|
|
alias = block.split(':') |
|
|
alist = alias[1].split(',') |
|
|
for comp in alist: |
|
|
if comp in BlockNames: |
|
|
if alias[0] not in BlockNames: |
|
|
BlockNames[alias[0]] = [] |
|
|
for r in BlockNames[comp]: |
|
|
BlockNames[alias[0]].append(r) |
|
|
else: |
|
|
print("Alias %s: %s not in Blocks" % (alias[0], comp)) |
|
|
continue |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
data = open(catfile, "r") |
|
|
except: |
|
|
print("Missing %s, aborting ..." % catfile) |
|
|
sys.exit(1) |
|
|
|
|
|
nbchar = 0; |
|
|
Categories = {} |
|
|
for line in data.readlines(): |
|
|
if line[0] == '#': |
|
|
continue |
|
|
line = line.strip() |
|
|
if line == '': |
|
|
continue |
|
|
try: |
|
|
fields = line.split(';') |
|
|
point = fields[0].strip() |
|
|
value = 0 |
|
|
while point != '': |
|
|
value = value * 16 |
|
|
if point[0] >= '0' and point[0] <= '9': |
|
|
value = value + ord(point[0]) - ord('0') |
|
|
elif point[0] >= 'A' and point[0] <= 'F': |
|
|
value = value + 10 + ord(point[0]) - ord('A') |
|
|
elif point[0] >= 'a' and point[0] <= 'f': |
|
|
value = value + 10 + ord(point[0]) - ord('a') |
|
|
point = point[1:] |
|
|
name = fields[2] |
|
|
except: |
|
|
print("Failed to process line: %s" % (line)) |
|
|
continue |
|
|
|
|
|
nbchar = nbchar + 1 |
|
|
|
|
|
try: |
|
|
Categories[name].append(value) |
|
|
except: |
|
|
try: |
|
|
Categories[name] = [value] |
|
|
except: |
|
|
print("Failed to process line: %s" % (line)) |
|
|
|
|
|
try: |
|
|
Categories[name[0]].append(value) |
|
|
except: |
|
|
try: |
|
|
Categories[name[0]] = [value] |
|
|
except: |
|
|
print("Failed to process line: %s" % (line)) |
|
|
|
|
|
data.close() |
|
|
print("Parsed %d char generating %d categories" % (nbchar, len(Categories.keys()))) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for cat in Categories.keys(): |
|
|
list = Categories[cat] |
|
|
start = -1 |
|
|
prev = -1 |
|
|
end = -1 |
|
|
ranges = [] |
|
|
for val in list: |
|
|
if start == -1: |
|
|
start = val |
|
|
prev = val |
|
|
continue |
|
|
elif val == prev + 1: |
|
|
prev = val |
|
|
continue |
|
|
elif prev == start: |
|
|
ranges.append((prev, prev)) |
|
|
start = val |
|
|
prev = val |
|
|
continue |
|
|
else: |
|
|
ranges.append((start, prev)) |
|
|
start = val |
|
|
prev = val |
|
|
continue |
|
|
if prev == start: |
|
|
ranges.append((prev, prev)) |
|
|
else: |
|
|
ranges.append((start, prev)) |
|
|
Categories[cat] = ranges |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bkeys = sorted(BlockNames.keys()) |
|
|
|
|
|
ckeys = sorted(Categories.keys()) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
try: |
|
|
output = open("codegen/unicode.inc", "w") |
|
|
except: |
|
|
print("Failed to open codegen/unicode.inc") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for name in ckeys: |
|
|
if len(Categories[name]) <= minTableSize or name == 'Cs': |
|
|
continue |
|
|
ranges = Categories[name] |
|
|
group = rangetab.gen_range_tables(output, 'xml' + name, 'S', 'L', ranges) |
|
|
output.write("static const xmlChRangeGroup xml%sG = %s;\n\n" % |
|
|
(name, group)) |
|
|
|
|
|
for name in ckeys: |
|
|
if name == 'Cs': |
|
|
continue |
|
|
ranges = Categories[name] |
|
|
output.write("static int\nxmlUCSIsCat%s(int code) {\n" % name) |
|
|
if len(Categories[name]) > minTableSize: |
|
|
output.write(" return(xmlCharInRange((unsigned int)code, &xml%sG)" |
|
|
% name) |
|
|
else: |
|
|
start = 1 |
|
|
for range in ranges: |
|
|
(begin, end) = range; |
|
|
if start: |
|
|
output.write(" return("); |
|
|
start = 0 |
|
|
else: |
|
|
output.write(" ||\n "); |
|
|
if (begin == end): |
|
|
output.write("(code == %s)" % (hex(begin))) |
|
|
else: |
|
|
output.write("((code >= %s) && (code <= %s))" % ( |
|
|
hex(begin), hex(end))) |
|
|
output.write(");\n}\n\n") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
blockGroups = '' |
|
|
for block in bkeys: |
|
|
name = block.replace('-', '') |
|
|
ranges = BlockNames[block] |
|
|
group = rangetab.gen_range_tables(output, 'xml' + name, 'S', 'L', ranges) |
|
|
output.write("\n") |
|
|
if blockGroups != '': |
|
|
blockGroups += ",\n" |
|
|
blockGroups += ' {"%s",\n %s}' % (block, group) |
|
|
|
|
|
output.write("static const xmlUnicodeRange xmlUnicodeBlocks[] = {\n") |
|
|
output.write(blockGroups) |
|
|
output.write("\n};\n\n") |
|
|
|
|
|
output.close() |
|
|
|