File size: 9,282 Bytes
5c2ed06
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
/**
 * Dashycode!
 *
 * Encodes a string in a restricted string containing only alphanumeric
 * characters and dashes.
 *
 * (The name is a riff on Punycode, which is what I originally wanted
 * to use for this purpose, but it turns out Punycode does not work on
 * arbitrary strings.)
 *
 * @author Guangcong Luo <guangcongluo@gmail.com>
 * @license MIT
 */

const CODE_MAP = "23456789abcdefghijkmnpqrstuvwxyz";
const UNSAFE_MAP = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";

// chunk types:
// 0b00 = 0x0 = block separator ("wait until next dash")
// 0b01 = 0x1 = capitalize
//   0b1_01 = 0x5 = specify capitalization of next 8 alphabetic chars of safe part (8 more bits)
//   0b0_01 = 0x1 = short for 0b00000001_1_01 (capitalize first letter of next 8 alpha chars of safe part)
// 0b10 = 0x2 = ASCII in UNSAFE_MAP (5 more bits)
// 0b11 = 0x3 = other
//   0b0_11 = 0x3 = space
//   0b1_11 = 0x7 = UTF-16 (16 more bits)

/**
 * An object representing a Dashycode bitstream.
 * The stream can be either a read stream or a write stream, but not
 * both simultaneously.
 */
interface DashyStream {
	codeBuf: string;
	buf: number;
	bufLength: number;
}

function streamWrite(stream: DashyStream, writeBufLength: number, writeBuf: number) {
	stream.buf += (writeBuf << stream.bufLength);
	stream.bufLength += writeBufLength;
	while (stream.bufLength >= 5) {
		stream.codeBuf += CODE_MAP.charAt(stream.buf & 0x1F);
		stream.buf >>= 5;
		stream.bufLength -= 5;
	}
}

function streamGetCode(stream: DashyStream) {
	const buf = stream.codeBuf + CODE_MAP.charAt(stream.buf);

	// truncate trailing `2`s (0b00000 chunks)
	let end2Len = 0;
	while (buf.charAt(buf.length - 1 - end2Len) === '2') end2Len++;
	return end2Len ? buf.slice(0, -end2Len) : buf;
}

function streamPeek(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) {
	while (stream.bufLength < readLength && stream.codeBuf.length) {
		const next5Bits = CODE_MAP.indexOf(stream.codeBuf.charAt(0));
		if (next5Bits < 0) throw new Error("Invalid character in coded buffer");
		stream.codeBuf = stream.codeBuf.slice(1);
		stream.buf += next5Bits << stream.bufLength;
		stream.bufLength += 5;
	}
	return stream.buf & readMask;
}

function streamRead(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) {
	const output = streamPeek(stream, readLength, readMask);
	// Note: bufLength can go negative! Streams have infinite trailing 0s
	stream.buf >>= readLength;
	stream.bufLength -= readLength;
	return output;
}

export function encode(str: string, allowCaps = false) {
	if (!str) return '0--0';
	let safePart = '';
	const unsafeStream: DashyStream = {
		codeBuf: '',
		buf: 0x0,
		bufLength: 0,
	};
	let isSafe = true;
	let alphaIndex = 0;
	let capBuffer = 0x0;
	for (let i = 0; i < str.length + 1; i++) {
		let curCharCode = i !== str.length ? str.charCodeAt(i) : -1;
		const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z
		const isUppercase = (65 <= curCharCode && curCharCode <= 90); // A-Z
		const isNumeric = (48 <= curCharCode && curCharCode <= 57); // 0-9
		if (capBuffer && (
			!(isLowercase || isUppercase || isNumeric) ||
			alphaIndex >= 8 ||
			i === str.length
		)) {
			// flush cap buffer
			if (capBuffer === 0xD) {
				streamWrite(unsafeStream, 3, 0x1);
			} else {
				streamWrite(unsafeStream, 11, capBuffer);
			}
			alphaIndex -= 8;
			capBuffer = 0x0;
		}
		if (i === str.length) break;
		if (isLowercase || isUppercase || isNumeric) {
			if (alphaIndex < 0) throw new Error("alphaIndex should be non-negative here");
			if (!isSafe) {
				if (capBuffer) throw new Error("capBuffer shouldn't exist here");
				streamWrite(unsafeStream, 2, 0x0);
				isSafe = true;
			}
			if (isUppercase && !allowCaps) {
				safePart += String.fromCharCode(curCharCode + 32);
				while (alphaIndex >= 8) {
					if (capBuffer) throw new Error("capBuffer shouldn't exist here");
					alphaIndex -= 8;
					streamWrite(unsafeStream, 11, 0x5);
				}
				if (!capBuffer) capBuffer = 0x5;
				capBuffer += 1 << (alphaIndex + 3);
			} else {
				safePart += str.charAt(i);
			}
			if (isUppercase || isLowercase) alphaIndex++;
			continue;
		}
		if (capBuffer) throw new Error("capBuffer shouldn't exist here");
		alphaIndex = 0;
		if (isSafe && curCharCode === 32) { // space
			const nextCharCode = str.charCodeAt(i + 1);
			if ((97 <= nextCharCode && nextCharCode <= 122) || // a-z
				(65 <= nextCharCode && nextCharCode <= 90) || // A-Z
				(48 <= nextCharCode && nextCharCode <= 57)) { // 0-9
				safePart += '-';
				streamWrite(unsafeStream, 2, 0x0);
				continue;
			}
		}
		if (isSafe) {
			safePart += '-';
			isSafe = false;
		}
		let unsafeMapIndex = -1;
		if (curCharCode === -1) {
			streamWrite(unsafeStream, 2, 0x0);
		} else if (curCharCode === 32) { // space
			streamWrite(unsafeStream, 3, 0x3);
		} else if ((unsafeMapIndex = UNSAFE_MAP.indexOf(str.charAt(i))) >= 0) {
			curCharCode = (unsafeMapIndex << 2) + 0x2;
			streamWrite(unsafeStream, 7, curCharCode);
		} else {
			curCharCode = (curCharCode << 3) + 0x7;
			streamWrite(unsafeStream, 19, curCharCode);
		}
	}
	let unsafePart = streamGetCode(unsafeStream);
	if (safePart.startsWith('-')) {
		safePart = safePart.slice(1);
		unsafePart = `${unsafePart}2`;
	}
	if (safePart.endsWith('-')) {
		safePart = safePart.slice(0, -1);
	}
	if (!safePart) {
		safePart = '0';
		unsafePart = `0${unsafePart}`;
		if (unsafePart.endsWith('2')) unsafePart = unsafePart.slice(0, -1);
	}
	if (!unsafePart) return safePart;
	return `${safePart}--${unsafePart}`;
}

export function decode(codedStr: string) {
	let str = '';
	let lastDashIndex = codedStr.lastIndexOf('--');
	if (lastDashIndex < 0) {
		// the regular decoder can also handle this case; but this should
		// be faster
		return codedStr.replace(/-/g, ' ');
	}
	if (codedStr.charAt(lastDashIndex + 2) === '0') {
		if (!codedStr.startsWith('0') || lastDashIndex !== 1) {
			throw new Error("Invalid Dashycode");
		}
		lastDashIndex -= 1;
		codedStr = '--' + codedStr.slice(4);
	}
	if (codedStr.endsWith('2')) {
		codedStr = '-' + codedStr.slice(0, -1);
		lastDashIndex += 1;
	}
	const unsafeStream: DashyStream = {
		codeBuf: codedStr.slice(lastDashIndex + 2),
		buf: 0x0,
		bufLength: 0,
	};
	/**
	 * Status:
	 * 1 : awaiting next read
	 * 0 : assume all-lowercase
	 * other: 1 followed by n bits, describing the capitalization of the
	 * next n bits of alphabetic characters
	 */
	let capBuffer = 1;
	for (let i = 0; i < lastDashIndex + 1; i++) {
		let curChar = codedStr.charAt(i);
		if (curChar !== '-') {
			// safe char
			const curCharCode = codedStr.charCodeAt(i);
			const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z
			if (isLowercase) {
				if (capBuffer === 1) {
					capBuffer = 0;
					if (streamPeek(unsafeStream, 2, 0x3) === 0x1) {
						switch (streamRead(unsafeStream, 3, 0x7)) {
						case 0x5:
							capBuffer = streamRead(unsafeStream, 8, 0xFF) + 0x100;
							break;
						case 0x1:
							capBuffer = 0x101;
							break;
						}
					}
				}
				const toCapitalize = capBuffer & 0x1;
				capBuffer >>= 1;
				if (toCapitalize) {
					curChar = String.fromCharCode(curCharCode - 32);
				}
			}
			str += curChar;
		} else {
			capBuffer = 1;
			// pull out the next unsafe string
			let isEmpty = true;
			do {
				switch (streamRead(unsafeStream, 2, 0x3)) {
				case 0x0:
					// go back to parsing safe chars
					curChar = '';
					break;
				case 0x1:
					throw new Error("Invalid capitalization token");
				case 0x2:
					curChar = UNSAFE_MAP.charAt(streamRead(unsafeStream, 5, 0x1F));
					isEmpty = false;
					break;
				case 0x3:
					if (streamRead(unsafeStream, 1, 0x1)) {
						curChar = String.fromCharCode(streamRead(unsafeStream, 16, 0xFFFF));
					} else {
						curChar = ' ';
					}
					isEmpty = false;
					break;
				}
				str += curChar;
			} while (curChar);
			if (isEmpty && i !== lastDashIndex) str += ' ';
		}
	}
	return str;
}

export function vizStream(codeBuf: string, translate = true) {
	let spacedStream = '';
	if (codeBuf.startsWith('0')) {
		codeBuf = codeBuf.slice(1);
		spacedStream = ' [no safe chars]' + spacedStream;
	}
	if (codeBuf.endsWith('2')) {
		codeBuf = codeBuf.slice(0, -1);
		spacedStream = ' [start unsafe]' + spacedStream;
	}
	const stream: DashyStream = {
		codeBuf,
		buf: 0x0,
		bufLength: 0,
	};

	function vizBlock(s: DashyStream, bufLen: number) {
		const buf = streamRead(s, bufLen);
		return buf.toString(2).padStart(bufLen, '0');
	}

	while (stream.bufLength > 0 || stream.codeBuf) {
		switch (streamRead(stream, 2)) {
		case 0x0:
			spacedStream = (translate ? ' |' : ' 00') + spacedStream;
			break;
		case 0x1:
			if (streamRead(stream, 1)) {
				spacedStream = ' ' + vizBlock(stream, 8) + (translate ? '-cap' : '_1_01') + spacedStream;
			} else {
				spacedStream = (translate ? ' capfirst' : ' 0_01') + spacedStream;
			}
			break;
		case 0x2:
			spacedStream = ' ' + vizBlock(stream, 5) + (translate ? '-ascii' : '_10') + spacedStream;
			break;
		case 0x3:
			if (streamRead(stream, 1)) {
				spacedStream = ' ' + vizBlock(stream, 16) + (translate ? '-utf' : '_1_11') + spacedStream;
			} else {
				spacedStream = (translate ? ' space' : ' 0_11') + spacedStream;
			}
			break;
		}
	}
	return spacedStream;
}