File size: 8,144 Bytes
6baed57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
#!/usr/bin/env python3
#
# Portions of this script have been (shamelessly) stolen from the
# prior work of Daniel Veillard (genUnicode.py)
#
# I, however, take full credit for any bugs, errors or difficulties :-)
#
# William Brack
# October 2003
#
# 18 October 2003
# Modified to maintain binary compatibility with previous library versions
# by adding a suffix 'Q' ('quick') to the macro generated for the original,
# function, and adding generation of a function (with the original name) which
# instantiates the macro.
#

import sys
import rangetab

#
# A routine to take a list of yes/no (1, 0) values and turn it
# into a list of ranges.  This will later be used to determine whether
# to generate single-byte lookup tables, or inline comparisons
#
def makeRange(lst):
    ret = []
    pos = 0
    while pos < len(lst):
        try:            # index generates exception if not present
            s = lst[pos:].index(1)      # look for start of next range
        except:
            break                       # if no more, finished
        pos += s                        # pointer to start of possible range
        try:
            e = lst[pos:].index(0)      # look for end of range
            e += pos
        except:                         # if no end, set to end of list
            e = len(lst)
        ret.append((pos, e-1))          # append range tuple to list
        pos = e + 1                     # ready to check for next range
    return ret

# minTableSize gives the minimum number of ranges which must be present
# before a 256-byte lookup table is produced.  If there are less than this
# number, a macro with inline comparisons is generated
minTableSize = 6

# dictionary of functions, key=name, element contains char-map and range-list
Functs = {}

state = 0

try:
    defines = open("codegen/ranges.def", "r")
except:
    print("Missing codegen/ranges.def, aborting ...")
    sys.exit(1)

#
# The lines in the .def file have three types:-
#   name:   Defines a new function block
#   ur:     Defines individual or ranges of unicode values
#   end:    Indicates the end of the function block
#
# These lines are processed below.
#
for line in defines.readlines():
    # ignore blank lines, or lines beginning with '#'
    if line[0] == '#':
        continue
    line = line.strip()
    if line == '':
        continue
    # split line into space-separated fields, then split on type
    try:
        fields = line.split(' ')
        #
        # name line:
        #   validate any previous function block already ended
        #   validate this function not already defined
        #   initialize an entry in the function dicitonary
        #       including a mask table with no values yet defined
        #
        if fields[0] == 'name':
            name = fields[1]
            if state != 0:
                print("'name' %s found before previous name" \
                      "completed" % (fields[1]))
                continue
            state = 1
            if name in Functs:
                print("name '%s' already present - may give" \
                      " wrong results" % (name))
            else:
                # dict entry with two list elements (chdata, rangedata)
                Functs[name] = [ [], [] ]
                for v in range(256):
                    Functs[name][0].append(0)
        #
        # end line:
        #   validate there was a preceding function name line
        #   set state to show no current function active
        #
        elif fields[0] == 'end':
            if state == 0:
                print("'end' found outside of function block")
                continue
            state = 0

        #
        # ur line:
        #   validate function has been defined
        #   process remaining fields on the line, which may be either
        #       individual unicode values or ranges of values
        #
        elif fields[0] == 'ur':
            if state != 1:
                raise Exception("'ur' found outside of 'name' block")
            for el in fields[1:]:
                pos = el.find('..')
                # pos <=0 means not a range, so must be individual value
                if pos <= 0:
                    # cheap handling of hex or decimal values
                    if el[0:2] == '0x':
                        value = int(el[2:],16)
                    elif el[0] == "'":
                        value = ord(el[1])
                    else:
                        value = int(el)
                    if ((value < 0) | (value > 0x1fffff)):
                        raise Exception('Illegal value (%s) in ch for'\
                                ' name %s' % (el,name))
                    # for ur we have only ranges (makes things simpler),
                    # so convert val to range
                    currange = (value, value)
                # pos > 0 means this is a range, so isolate/validate
                # the interval
                else:
                    # split the range into it's first-val, last-val
                    (first, last) = el.split("..")
                    # convert values from text into binary
                    if first[0:2] == '0x':
                        start = int(first[2:],16)
                    elif first[0] == "'":
                        start = ord(first[1])
                    else:
                        start = int(first)
                    if last[0:2] == '0x':
                        end = int(last[2:],16)
                    elif last[0] == "'":
                        end = ord(last[1])
                    else:
                        end = int(last)
                    if (start < 0) | (end > 0x1fffff) | (start > end):
                        raise Exception("Invalid range '%s'" % el)
                    currange = (start, end)
                # common path - 'currange' has the range, now take care of it
                # We split on single-byte values vs. multibyte
                if currange[1] < 0x100: # single-byte
                    for ch in range(currange[0],currange[1]+1):
                        # validate that value not previously defined
                        if Functs[name][0][ch]:
                            msg = "Duplicate ch value '%s' for name '%s'" % (el, name)
                            raise Exception(msg)
                        Functs[name][0][ch] = 1
                else:                   # multi-byte
                    if currange in Functs[name][1]:
                        raise Exception("range already defined in" \
                                " function")
                    else:
                        Functs[name][1].append(currange)

    except:
        print("Failed to process line: %s" % (line))
        raise

try:
    output = open("codegen/ranges.inc", "w")
except:
    print("Failed to open codegen/ranges.inc")
    sys.exit(1)

#
# Now output the generated data.
#

fkeys = sorted(Functs.keys())

for f in fkeys:

# First we convert the specified single-byte values into a group of ranges.
    if max(Functs[f][0]) > 0:   # only check if at least one entry
        rangeTable = makeRange(Functs[f][0])
        numRanges = len(rangeTable)
        if numRanges >= minTableSize:   # table is worthwhile
            # write the constant data to the code file
            output.write("const unsigned char %s_tab[256] = {\n" % f)
            pline = "   "
            for n in range(255):
                pline += " 0x%02x," % Functs[f][0][n]
                if len(pline) > 72:
                    output.write(pline + "\n")
                    pline = "   "
            output.write(pline + " 0x%02x };\n\n" % Functs[f][0][255])

#
# Next we do the unicode ranges
#

for f in fkeys:
    if len(Functs[f][1]) > 0:   # only generate if unicode ranges present
        rangeTable = Functs[f][1]
        rangeTable.sort()       # ascending tuple sequence
        group = rangetab.gen_range_tables(output, f, '_srng', '_lrng',
                                          rangeTable)

        output.write("const xmlChRangeGroup %sGroup =\n\t%s;\n\n" %
                     (f, group))

output.close()