| #!/usr/bin/env python3 | |
| # | |
| # Portions of this script have been (shamelessly) stolen from the | |
| # prior work of Daniel Veillard (genUnicode.py) | |
| # | |
| # I, however, take full credit for any bugs, errors or difficulties :-) | |
| # | |
| # William Brack | |
| # October 2003 | |
| # | |
| # 18 October 2003 | |
| # Modified to maintain binary compatibility with previous library versions | |
| # by adding a suffix 'Q' ('quick') to the macro generated for the original, | |
| # function, and adding generation of a function (with the original name) which | |
| # instantiates the macro. | |
| # | |
| import sys | |
| import rangetab | |
| # | |
| # A routine to take a list of yes/no (1, 0) values and turn it | |
| # into a list of ranges. This will later be used to determine whether | |
| # to generate single-byte lookup tables, or inline comparisons | |
| # | |
| def makeRange(lst): | |
| ret = [] | |
| pos = 0 | |
| while pos < len(lst): | |
| try: # index generates exception if not present | |
| s = lst[pos:].index(1) # look for start of next range | |
| except: | |
| break # if no more, finished | |
| pos += s # pointer to start of possible range | |
| try: | |
| e = lst[pos:].index(0) # look for end of range | |
| e += pos | |
| except: # if no end, set to end of list | |
| e = len(lst) | |
| ret.append((pos, e-1)) # append range tuple to list | |
| pos = e + 1 # ready to check for next range | |
| return ret | |
| # minTableSize gives the minimum number of ranges which must be present | |
| # before a 256-byte lookup table is produced. If there are less than this | |
| # number, a macro with inline comparisons is generated | |
| minTableSize = 6 | |
| # dictionary of functions, key=name, element contains char-map and range-list | |
| Functs = {} | |
| state = 0 | |
| try: | |
| defines = open("codegen/ranges.def", "r") | |
| except: | |
| print("Missing codegen/ranges.def, aborting ...") | |
| sys.exit(1) | |
| # | |
| # The lines in the .def file have three types:- | |
| # name: Defines a new function block | |
| # ur: Defines individual or ranges of unicode values | |
| # end: Indicates the end of the function block | |
| # | |
| # These lines are processed below. | |
| # | |
| for line in defines.readlines(): | |
| # ignore blank lines, or lines beginning with '#' | |
| if line[0] == '#': | |
| continue | |
| line = line.strip() | |
| if line == '': | |
| continue | |
| # split line into space-separated fields, then split on type | |
| try: | |
| fields = line.split(' ') | |
| # | |
| # name line: | |
| # validate any previous function block already ended | |
| # validate this function not already defined | |
| # initialize an entry in the function dicitonary | |
| # including a mask table with no values yet defined | |
| # | |
| if fields[0] == 'name': | |
| name = fields[1] | |
| if state != 0: | |
| print("'name' %s found before previous name" \ | |
| "completed" % (fields[1])) | |
| continue | |
| state = 1 | |
| if name in Functs: | |
| print("name '%s' already present - may give" \ | |
| " wrong results" % (name)) | |
| else: | |
| # dict entry with two list elements (chdata, rangedata) | |
| Functs[name] = [ [], [] ] | |
| for v in range(256): | |
| Functs[name][0].append(0) | |
| # | |
| # end line: | |
| # validate there was a preceding function name line | |
| # set state to show no current function active | |
| # | |
| elif fields[0] == 'end': | |
| if state == 0: | |
| print("'end' found outside of function block") | |
| continue | |
| state = 0 | |
| # | |
| # ur line: | |
| # validate function has been defined | |
| # process remaining fields on the line, which may be either | |
| # individual unicode values or ranges of values | |
| # | |
| elif fields[0] == 'ur': | |
| if state != 1: | |
| raise Exception("'ur' found outside of 'name' block") | |
| for el in fields[1:]: | |
| pos = el.find('..') | |
| # pos <=0 means not a range, so must be individual value | |
| if pos <= 0: | |
| # cheap handling of hex or decimal values | |
| if el[0:2] == '0x': | |
| value = int(el[2:],16) | |
| elif el[0] == "'": | |
| value = ord(el[1]) | |
| else: | |
| value = int(el) | |
| if ((value < 0) | (value > 0x1fffff)): | |
| raise Exception('Illegal value (%s) in ch for'\ | |
| ' name %s' % (el,name)) | |
| # for ur we have only ranges (makes things simpler), | |
| # so convert val to range | |
| currange = (value, value) | |
| # pos > 0 means this is a range, so isolate/validate | |
| # the interval | |
| else: | |
| # split the range into it's first-val, last-val | |
| (first, last) = el.split("..") | |
| # convert values from text into binary | |
| if first[0:2] == '0x': | |
| start = int(first[2:],16) | |
| elif first[0] == "'": | |
| start = ord(first[1]) | |
| else: | |
| start = int(first) | |
| if last[0:2] == '0x': | |
| end = int(last[2:],16) | |
| elif last[0] == "'": | |
| end = ord(last[1]) | |
| else: | |
| end = int(last) | |
| if (start < 0) | (end > 0x1fffff) | (start > end): | |
| raise Exception("Invalid range '%s'" % el) | |
| currange = (start, end) | |
| # common path - 'currange' has the range, now take care of it | |
| # We split on single-byte values vs. multibyte | |
| if currange[1] < 0x100: # single-byte | |
| for ch in range(currange[0],currange[1]+1): | |
| # validate that value not previously defined | |
| if Functs[name][0][ch]: | |
| msg = "Duplicate ch value '%s' for name '%s'" % (el, name) | |
| raise Exception(msg) | |
| Functs[name][0][ch] = 1 | |
| else: # multi-byte | |
| if currange in Functs[name][1]: | |
| raise Exception("range already defined in" \ | |
| " function") | |
| else: | |
| Functs[name][1].append(currange) | |
| except: | |
| print("Failed to process line: %s" % (line)) | |
| raise | |
| try: | |
| output = open("codegen/ranges.inc", "w") | |
| except: | |
| print("Failed to open codegen/ranges.inc") | |
| sys.exit(1) | |
| # | |
| # Now output the generated data. | |
| # | |
| fkeys = sorted(Functs.keys()) | |
| for f in fkeys: | |
| # First we convert the specified single-byte values into a group of ranges. | |
| if max(Functs[f][0]) > 0: # only check if at least one entry | |
| rangeTable = makeRange(Functs[f][0]) | |
| numRanges = len(rangeTable) | |
| if numRanges >= minTableSize: # table is worthwhile | |
| # write the constant data to the code file | |
| output.write("const unsigned char %s_tab[256] = {\n" % f) | |
| pline = " " | |
| for n in range(255): | |
| pline += " 0x%02x," % Functs[f][0][n] | |
| if len(pline) > 72: | |
| output.write(pline + "\n") | |
| pline = " " | |
| output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255]) | |
| # | |
| # Next we do the unicode ranges | |
| # | |
| for f in fkeys: | |
| if len(Functs[f][1]) > 0: # only generate if unicode ranges present | |
| rangeTable = Functs[f][1] | |
| rangeTable.sort() # ascending tuple sequence | |
| group = rangetab.gen_range_tables(output, f, '_srng', '_lrng', | |
| rangeTable) | |
| output.write("const xmlChRangeGroup %sGroup =\n\t%s;\n\n" % | |
| (f, group)) | |
| output.close() | |