Spaces:
Running
Running
/** | |
* Dashycode! | |
* | |
* Encodes a string in a restricted string containing only alphanumeric | |
* characters and dashes. | |
* | |
* (The name is a riff on Punycode, which is what I originally wanted | |
* to use for this purpose, but it turns out Punycode does not work on | |
* arbitrary strings.) | |
* | |
* @author Guangcong Luo <guangcongluo@gmail.com> | |
* @license MIT | |
*/ | |
const CODE_MAP = "23456789abcdefghijkmnpqrstuvwxyz"; | |
const UNSAFE_MAP = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; | |
// chunk types: | |
// 0b00 = 0x0 = block separator ("wait until next dash") | |
// 0b01 = 0x1 = capitalize | |
// 0b1_01 = 0x5 = specify capitalization of next 8 alphabetic chars of safe part (8 more bits) | |
// 0b0_01 = 0x1 = short for 0b00000001_1_01 (capitalize first letter of next 8 alpha chars of safe part) | |
// 0b10 = 0x2 = ASCII in UNSAFE_MAP (5 more bits) | |
// 0b11 = 0x3 = other | |
// 0b0_11 = 0x3 = space | |
// 0b1_11 = 0x7 = UTF-16 (16 more bits) | |
/** | |
* An object representing a Dashycode bitstream. | |
* The stream can be either a read stream or a write stream, but not | |
* both simultaneously. | |
*/ | |
interface DashyStream { | |
codeBuf: string; | |
buf: number; | |
bufLength: number; | |
} | |
function streamWrite(stream: DashyStream, writeBufLength: number, writeBuf: number) { | |
stream.buf += (writeBuf << stream.bufLength); | |
stream.bufLength += writeBufLength; | |
while (stream.bufLength >= 5) { | |
stream.codeBuf += CODE_MAP.charAt(stream.buf & 0x1F); | |
stream.buf >>= 5; | |
stream.bufLength -= 5; | |
} | |
} | |
function streamGetCode(stream: DashyStream) { | |
const buf = stream.codeBuf + CODE_MAP.charAt(stream.buf); | |
// truncate trailing `2`s (0b00000 chunks) | |
let end2Len = 0; | |
while (buf.charAt(buf.length - 1 - end2Len) === '2') end2Len++; | |
return end2Len ? buf.slice(0, -end2Len) : buf; | |
} | |
function streamPeek(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) { | |
while (stream.bufLength < readLength && stream.codeBuf.length) { | |
const next5Bits = CODE_MAP.indexOf(stream.codeBuf.charAt(0)); | |
if (next5Bits < 0) throw new Error("Invalid character in coded buffer"); | |
stream.codeBuf = stream.codeBuf.slice(1); | |
stream.buf += next5Bits << stream.bufLength; | |
stream.bufLength += 5; | |
} | |
return stream.buf & readMask; | |
} | |
function streamRead(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) { | |
const output = streamPeek(stream, readLength, readMask); | |
// Note: bufLength can go negative! Streams have infinite trailing 0s | |
stream.buf >>= readLength; | |
stream.bufLength -= readLength; | |
return output; | |
} | |
export function encode(str: string, allowCaps = false) { | |
if (!str) return '0--0'; | |
let safePart = ''; | |
const unsafeStream: DashyStream = { | |
codeBuf: '', | |
buf: 0x0, | |
bufLength: 0, | |
}; | |
let isSafe = true; | |
let alphaIndex = 0; | |
let capBuffer = 0x0; | |
for (let i = 0; i < str.length + 1; i++) { | |
let curCharCode = i !== str.length ? str.charCodeAt(i) : -1; | |
const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z | |
const isUppercase = (65 <= curCharCode && curCharCode <= 90); // A-Z | |
const isNumeric = (48 <= curCharCode && curCharCode <= 57); // 0-9 | |
if (capBuffer && ( | |
!(isLowercase || isUppercase || isNumeric) || | |
alphaIndex >= 8 || | |
i === str.length | |
)) { | |
// flush cap buffer | |
if (capBuffer === 0xD) { | |
streamWrite(unsafeStream, 3, 0x1); | |
} else { | |
streamWrite(unsafeStream, 11, capBuffer); | |
} | |
alphaIndex -= 8; | |
capBuffer = 0x0; | |
} | |
if (i === str.length) break; | |
if (isLowercase || isUppercase || isNumeric) { | |
if (alphaIndex < 0) throw new Error("alphaIndex should be non-negative here"); | |
if (!isSafe) { | |
if (capBuffer) throw new Error("capBuffer shouldn't exist here"); | |
streamWrite(unsafeStream, 2, 0x0); | |
isSafe = true; | |
} | |
if (isUppercase && !allowCaps) { | |
safePart += String.fromCharCode(curCharCode + 32); | |
while (alphaIndex >= 8) { | |
if (capBuffer) throw new Error("capBuffer shouldn't exist here"); | |
alphaIndex -= 8; | |
streamWrite(unsafeStream, 11, 0x5); | |
} | |
if (!capBuffer) capBuffer = 0x5; | |
capBuffer += 1 << (alphaIndex + 3); | |
} else { | |
safePart += str.charAt(i); | |
} | |
if (isUppercase || isLowercase) alphaIndex++; | |
continue; | |
} | |
if (capBuffer) throw new Error("capBuffer shouldn't exist here"); | |
alphaIndex = 0; | |
if (isSafe && curCharCode === 32) { // space | |
const nextCharCode = str.charCodeAt(i + 1); | |
if ((97 <= nextCharCode && nextCharCode <= 122) || // a-z | |
(65 <= nextCharCode && nextCharCode <= 90) || // A-Z | |
(48 <= nextCharCode && nextCharCode <= 57)) { // 0-9 | |
safePart += '-'; | |
streamWrite(unsafeStream, 2, 0x0); | |
continue; | |
} | |
} | |
if (isSafe) { | |
safePart += '-'; | |
isSafe = false; | |
} | |
let unsafeMapIndex = -1; | |
if (curCharCode === -1) { | |
streamWrite(unsafeStream, 2, 0x0); | |
} else if (curCharCode === 32) { // space | |
streamWrite(unsafeStream, 3, 0x3); | |
} else if ((unsafeMapIndex = UNSAFE_MAP.indexOf(str.charAt(i))) >= 0) { | |
curCharCode = (unsafeMapIndex << 2) + 0x2; | |
streamWrite(unsafeStream, 7, curCharCode); | |
} else { | |
curCharCode = (curCharCode << 3) + 0x7; | |
streamWrite(unsafeStream, 19, curCharCode); | |
} | |
} | |
let unsafePart = streamGetCode(unsafeStream); | |
if (safePart.startsWith('-')) { | |
safePart = safePart.slice(1); | |
unsafePart = `${unsafePart}2`; | |
} | |
if (safePart.endsWith('-')) { | |
safePart = safePart.slice(0, -1); | |
} | |
if (!safePart) { | |
safePart = '0'; | |
unsafePart = `0${unsafePart}`; | |
if (unsafePart.endsWith('2')) unsafePart = unsafePart.slice(0, -1); | |
} | |
if (!unsafePart) return safePart; | |
return `${safePart}--${unsafePart}`; | |
} | |
export function decode(codedStr: string) { | |
let str = ''; | |
let lastDashIndex = codedStr.lastIndexOf('--'); | |
if (lastDashIndex < 0) { | |
// the regular decoder can also handle this case; but this should | |
// be faster | |
return codedStr.replace(/-/g, ' '); | |
} | |
if (codedStr.charAt(lastDashIndex + 2) === '0') { | |
if (!codedStr.startsWith('0') || lastDashIndex !== 1) { | |
throw new Error("Invalid Dashycode"); | |
} | |
lastDashIndex -= 1; | |
codedStr = '--' + codedStr.slice(4); | |
} | |
if (codedStr.endsWith('2')) { | |
codedStr = '-' + codedStr.slice(0, -1); | |
lastDashIndex += 1; | |
} | |
const unsafeStream: DashyStream = { | |
codeBuf: codedStr.slice(lastDashIndex + 2), | |
buf: 0x0, | |
bufLength: 0, | |
}; | |
/** | |
* Status: | |
* 1 : awaiting next read | |
* 0 : assume all-lowercase | |
* other: 1 followed by n bits, describing the capitalization of the | |
* next n bits of alphabetic characters | |
*/ | |
let capBuffer = 1; | |
for (let i = 0; i < lastDashIndex + 1; i++) { | |
let curChar = codedStr.charAt(i); | |
if (curChar !== '-') { | |
// safe char | |
const curCharCode = codedStr.charCodeAt(i); | |
const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z | |
if (isLowercase) { | |
if (capBuffer === 1) { | |
capBuffer = 0; | |
if (streamPeek(unsafeStream, 2, 0x3) === 0x1) { | |
switch (streamRead(unsafeStream, 3, 0x7)) { | |
case 0x5: | |
capBuffer = streamRead(unsafeStream, 8, 0xFF) + 0x100; | |
break; | |
case 0x1: | |
capBuffer = 0x101; | |
break; | |
} | |
} | |
} | |
const toCapitalize = capBuffer & 0x1; | |
capBuffer >>= 1; | |
if (toCapitalize) { | |
curChar = String.fromCharCode(curCharCode - 32); | |
} | |
} | |
str += curChar; | |
} else { | |
capBuffer = 1; | |
// pull out the next unsafe string | |
let isEmpty = true; | |
do { | |
switch (streamRead(unsafeStream, 2, 0x3)) { | |
case 0x0: | |
// go back to parsing safe chars | |
curChar = ''; | |
break; | |
case 0x1: | |
throw new Error("Invalid capitalization token"); | |
case 0x2: | |
curChar = UNSAFE_MAP.charAt(streamRead(unsafeStream, 5, 0x1F)); | |
isEmpty = false; | |
break; | |
case 0x3: | |
if (streamRead(unsafeStream, 1, 0x1)) { | |
curChar = String.fromCharCode(streamRead(unsafeStream, 16, 0xFFFF)); | |
} else { | |
curChar = ' '; | |
} | |
isEmpty = false; | |
break; | |
} | |
str += curChar; | |
} while (curChar); | |
if (isEmpty && i !== lastDashIndex) str += ' '; | |
} | |
} | |
return str; | |
} | |
export function vizStream(codeBuf: string, translate = true) { | |
let spacedStream = ''; | |
if (codeBuf.startsWith('0')) { | |
codeBuf = codeBuf.slice(1); | |
spacedStream = ' [no safe chars]' + spacedStream; | |
} | |
if (codeBuf.endsWith('2')) { | |
codeBuf = codeBuf.slice(0, -1); | |
spacedStream = ' [start unsafe]' + spacedStream; | |
} | |
const stream: DashyStream = { | |
codeBuf, | |
buf: 0x0, | |
bufLength: 0, | |
}; | |
function vizBlock(s: DashyStream, bufLen: number) { | |
const buf = streamRead(s, bufLen); | |
return buf.toString(2).padStart(bufLen, '0'); | |
} | |
while (stream.bufLength > 0 || stream.codeBuf) { | |
switch (streamRead(stream, 2)) { | |
case 0x0: | |
spacedStream = (translate ? ' |' : ' 00') + spacedStream; | |
break; | |
case 0x1: | |
if (streamRead(stream, 1)) { | |
spacedStream = ' ' + vizBlock(stream, 8) + (translate ? '-cap' : '_1_01') + spacedStream; | |
} else { | |
spacedStream = (translate ? ' capfirst' : ' 0_01') + spacedStream; | |
} | |
break; | |
case 0x2: | |
spacedStream = ' ' + vizBlock(stream, 5) + (translate ? '-ascii' : '_10') + spacedStream; | |
break; | |
case 0x3: | |
if (streamRead(stream, 1)) { | |
spacedStream = ' ' + vizBlock(stream, 16) + (translate ? '-utf' : '_1_11') + spacedStream; | |
} else { | |
spacedStream = (translate ? ' space' : ' 0_11') + spacedStream; | |
} | |
break; | |
} | |
} | |
return spacedStream; | |
} | |