/** * Dashycode! * * Encodes a string in a restricted string containing only alphanumeric * characters and dashes. * * (The name is a riff on Punycode, which is what I originally wanted * to use for this purpose, but it turns out Punycode does not work on * arbitrary strings.) * * @author Guangcong Luo * @license MIT */ const CODE_MAP = "23456789abcdefghijkmnpqrstuvwxyz"; const UNSAFE_MAP = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"; // chunk types: // 0b00 = 0x0 = block separator ("wait until next dash") // 0b01 = 0x1 = capitalize // 0b1_01 = 0x5 = specify capitalization of next 8 alphabetic chars of safe part (8 more bits) // 0b0_01 = 0x1 = short for 0b00000001_1_01 (capitalize first letter of next 8 alpha chars of safe part) // 0b10 = 0x2 = ASCII in UNSAFE_MAP (5 more bits) // 0b11 = 0x3 = other // 0b0_11 = 0x3 = space // 0b1_11 = 0x7 = UTF-16 (16 more bits) /** * An object representing a Dashycode bitstream. * The stream can be either a read stream or a write stream, but not * both simultaneously. */ interface DashyStream { codeBuf: string; buf: number; bufLength: number; } function streamWrite(stream: DashyStream, writeBufLength: number, writeBuf: number) { stream.buf += (writeBuf << stream.bufLength); stream.bufLength += writeBufLength; while (stream.bufLength >= 5) { stream.codeBuf += CODE_MAP.charAt(stream.buf & 0x1F); stream.buf >>= 5; stream.bufLength -= 5; } } function streamGetCode(stream: DashyStream) { const buf = stream.codeBuf + CODE_MAP.charAt(stream.buf); // truncate trailing `2`s (0b00000 chunks) let end2Len = 0; while (buf.charAt(buf.length - 1 - end2Len) === '2') end2Len++; return end2Len ? buf.slice(0, -end2Len) : buf; } function streamPeek(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) { while (stream.bufLength < readLength && stream.codeBuf.length) { const next5Bits = CODE_MAP.indexOf(stream.codeBuf.charAt(0)); if (next5Bits < 0) throw new Error("Invalid character in coded buffer"); stream.codeBuf = stream.codeBuf.slice(1); stream.buf += next5Bits << stream.bufLength; stream.bufLength += 5; } return stream.buf & readMask; } function streamRead(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) { const output = streamPeek(stream, readLength, readMask); // Note: bufLength can go negative! Streams have infinite trailing 0s stream.buf >>= readLength; stream.bufLength -= readLength; return output; } export function encode(str: string, allowCaps = false) { if (!str) return '0--0'; let safePart = ''; const unsafeStream: DashyStream = { codeBuf: '', buf: 0x0, bufLength: 0, }; let isSafe = true; let alphaIndex = 0; let capBuffer = 0x0; for (let i = 0; i < str.length + 1; i++) { let curCharCode = i !== str.length ? str.charCodeAt(i) : -1; const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z const isUppercase = (65 <= curCharCode && curCharCode <= 90); // A-Z const isNumeric = (48 <= curCharCode && curCharCode <= 57); // 0-9 if (capBuffer && ( !(isLowercase || isUppercase || isNumeric) || alphaIndex >= 8 || i === str.length )) { // flush cap buffer if (capBuffer === 0xD) { streamWrite(unsafeStream, 3, 0x1); } else { streamWrite(unsafeStream, 11, capBuffer); } alphaIndex -= 8; capBuffer = 0x0; } if (i === str.length) break; if (isLowercase || isUppercase || isNumeric) { if (alphaIndex < 0) throw new Error("alphaIndex should be non-negative here"); if (!isSafe) { if (capBuffer) throw new Error("capBuffer shouldn't exist here"); streamWrite(unsafeStream, 2, 0x0); isSafe = true; } if (isUppercase && !allowCaps) { safePart += String.fromCharCode(curCharCode + 32); while (alphaIndex >= 8) { if (capBuffer) throw new Error("capBuffer shouldn't exist here"); alphaIndex -= 8; streamWrite(unsafeStream, 11, 0x5); } if (!capBuffer) capBuffer = 0x5; capBuffer += 1 << (alphaIndex + 3); } else { safePart += str.charAt(i); } if (isUppercase || isLowercase) alphaIndex++; continue; } if (capBuffer) throw new Error("capBuffer shouldn't exist here"); alphaIndex = 0; if (isSafe && curCharCode === 32) { // space const nextCharCode = str.charCodeAt(i + 1); if ((97 <= nextCharCode && nextCharCode <= 122) || // a-z (65 <= nextCharCode && nextCharCode <= 90) || // A-Z (48 <= nextCharCode && nextCharCode <= 57)) { // 0-9 safePart += '-'; streamWrite(unsafeStream, 2, 0x0); continue; } } if (isSafe) { safePart += '-'; isSafe = false; } let unsafeMapIndex = -1; if (curCharCode === -1) { streamWrite(unsafeStream, 2, 0x0); } else if (curCharCode === 32) { // space streamWrite(unsafeStream, 3, 0x3); } else if ((unsafeMapIndex = UNSAFE_MAP.indexOf(str.charAt(i))) >= 0) { curCharCode = (unsafeMapIndex << 2) + 0x2; streamWrite(unsafeStream, 7, curCharCode); } else { curCharCode = (curCharCode << 3) + 0x7; streamWrite(unsafeStream, 19, curCharCode); } } let unsafePart = streamGetCode(unsafeStream); if (safePart.startsWith('-')) { safePart = safePart.slice(1); unsafePart = `${unsafePart}2`; } if (safePart.endsWith('-')) { safePart = safePart.slice(0, -1); } if (!safePart) { safePart = '0'; unsafePart = `0${unsafePart}`; if (unsafePart.endsWith('2')) unsafePart = unsafePart.slice(0, -1); } if (!unsafePart) return safePart; return `${safePart}--${unsafePart}`; } export function decode(codedStr: string) { let str = ''; let lastDashIndex = codedStr.lastIndexOf('--'); if (lastDashIndex < 0) { // the regular decoder can also handle this case; but this should // be faster return codedStr.replace(/-/g, ' '); } if (codedStr.charAt(lastDashIndex + 2) === '0') { if (!codedStr.startsWith('0') || lastDashIndex !== 1) { throw new Error("Invalid Dashycode"); } lastDashIndex -= 1; codedStr = '--' + codedStr.slice(4); } if (codedStr.endsWith('2')) { codedStr = '-' + codedStr.slice(0, -1); lastDashIndex += 1; } const unsafeStream: DashyStream = { codeBuf: codedStr.slice(lastDashIndex + 2), buf: 0x0, bufLength: 0, }; /** * Status: * 1 : awaiting next read * 0 : assume all-lowercase * other: 1 followed by n bits, describing the capitalization of the * next n bits of alphabetic characters */ let capBuffer = 1; for (let i = 0; i < lastDashIndex + 1; i++) { let curChar = codedStr.charAt(i); if (curChar !== '-') { // safe char const curCharCode = codedStr.charCodeAt(i); const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z if (isLowercase) { if (capBuffer === 1) { capBuffer = 0; if (streamPeek(unsafeStream, 2, 0x3) === 0x1) { switch (streamRead(unsafeStream, 3, 0x7)) { case 0x5: capBuffer = streamRead(unsafeStream, 8, 0xFF) + 0x100; break; case 0x1: capBuffer = 0x101; break; } } } const toCapitalize = capBuffer & 0x1; capBuffer >>= 1; if (toCapitalize) { curChar = String.fromCharCode(curCharCode - 32); } } str += curChar; } else { capBuffer = 1; // pull out the next unsafe string let isEmpty = true; do { switch (streamRead(unsafeStream, 2, 0x3)) { case 0x0: // go back to parsing safe chars curChar = ''; break; case 0x1: throw new Error("Invalid capitalization token"); case 0x2: curChar = UNSAFE_MAP.charAt(streamRead(unsafeStream, 5, 0x1F)); isEmpty = false; break; case 0x3: if (streamRead(unsafeStream, 1, 0x1)) { curChar = String.fromCharCode(streamRead(unsafeStream, 16, 0xFFFF)); } else { curChar = ' '; } isEmpty = false; break; } str += curChar; } while (curChar); if (isEmpty && i !== lastDashIndex) str += ' '; } } return str; } export function vizStream(codeBuf: string, translate = true) { let spacedStream = ''; if (codeBuf.startsWith('0')) { codeBuf = codeBuf.slice(1); spacedStream = ' [no safe chars]' + spacedStream; } if (codeBuf.endsWith('2')) { codeBuf = codeBuf.slice(0, -1); spacedStream = ' [start unsafe]' + spacedStream; } const stream: DashyStream = { codeBuf, buf: 0x0, bufLength: 0, }; function vizBlock(s: DashyStream, bufLen: number) { const buf = streamRead(s, bufLen); return buf.toString(2).padStart(bufLen, '0'); } while (stream.bufLength > 0 || stream.codeBuf) { switch (streamRead(stream, 2)) { case 0x0: spacedStream = (translate ? ' |' : ' 00') + spacedStream; break; case 0x1: if (streamRead(stream, 1)) { spacedStream = ' ' + vizBlock(stream, 8) + (translate ? '-cap' : '_1_01') + spacedStream; } else { spacedStream = (translate ? ' capfirst' : ' 0_01') + spacedStream; } break; case 0x2: spacedStream = ' ' + vizBlock(stream, 5) + (translate ? '-ascii' : '_10') + spacedStream; break; case 0x3: if (streamRead(stream, 1)) { spacedStream = ' ' + vizBlock(stream, 16) + (translate ? '-utf' : '_1_11') + spacedStream; } else { spacedStream = (translate ? ' space' : ' 0_11') + spacedStream; } break; } } return spacedStream; }