Pokemon_server / lib /dashycode.ts
Jofthomas's picture
Jofthomas HF staff
Upload 4781 files
5c2ed06 verified
raw
history blame
9.28 kB
/**
* Dashycode!
*
* Encodes a string in a restricted string containing only alphanumeric
* characters and dashes.
*
* (The name is a riff on Punycode, which is what I originally wanted
* to use for this purpose, but it turns out Punycode does not work on
* arbitrary strings.)
*
* @author Guangcong Luo <guangcongluo@gmail.com>
* @license MIT
*/
const CODE_MAP = "23456789abcdefghijkmnpqrstuvwxyz";
const UNSAFE_MAP = "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~";
// chunk types:
// 0b00 = 0x0 = block separator ("wait until next dash")
// 0b01 = 0x1 = capitalize
// 0b1_01 = 0x5 = specify capitalization of next 8 alphabetic chars of safe part (8 more bits)
// 0b0_01 = 0x1 = short for 0b00000001_1_01 (capitalize first letter of next 8 alpha chars of safe part)
// 0b10 = 0x2 = ASCII in UNSAFE_MAP (5 more bits)
// 0b11 = 0x3 = other
// 0b0_11 = 0x3 = space
// 0b1_11 = 0x7 = UTF-16 (16 more bits)
/**
* An object representing a Dashycode bitstream.
* The stream can be either a read stream or a write stream, but not
* both simultaneously.
*/
interface DashyStream {
codeBuf: string;
buf: number;
bufLength: number;
}
function streamWrite(stream: DashyStream, writeBufLength: number, writeBuf: number) {
stream.buf += (writeBuf << stream.bufLength);
stream.bufLength += writeBufLength;
while (stream.bufLength >= 5) {
stream.codeBuf += CODE_MAP.charAt(stream.buf & 0x1F);
stream.buf >>= 5;
stream.bufLength -= 5;
}
}
function streamGetCode(stream: DashyStream) {
const buf = stream.codeBuf + CODE_MAP.charAt(stream.buf);
// truncate trailing `2`s (0b00000 chunks)
let end2Len = 0;
while (buf.charAt(buf.length - 1 - end2Len) === '2') end2Len++;
return end2Len ? buf.slice(0, -end2Len) : buf;
}
function streamPeek(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) {
while (stream.bufLength < readLength && stream.codeBuf.length) {
const next5Bits = CODE_MAP.indexOf(stream.codeBuf.charAt(0));
if (next5Bits < 0) throw new Error("Invalid character in coded buffer");
stream.codeBuf = stream.codeBuf.slice(1);
stream.buf += next5Bits << stream.bufLength;
stream.bufLength += 5;
}
return stream.buf & readMask;
}
function streamRead(stream: DashyStream, readLength: number, readMask: number = 0xFFFF >> (16 - readLength)) {
const output = streamPeek(stream, readLength, readMask);
// Note: bufLength can go negative! Streams have infinite trailing 0s
stream.buf >>= readLength;
stream.bufLength -= readLength;
return output;
}
export function encode(str: string, allowCaps = false) {
if (!str) return '0--0';
let safePart = '';
const unsafeStream: DashyStream = {
codeBuf: '',
buf: 0x0,
bufLength: 0,
};
let isSafe = true;
let alphaIndex = 0;
let capBuffer = 0x0;
for (let i = 0; i < str.length + 1; i++) {
let curCharCode = i !== str.length ? str.charCodeAt(i) : -1;
const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z
const isUppercase = (65 <= curCharCode && curCharCode <= 90); // A-Z
const isNumeric = (48 <= curCharCode && curCharCode <= 57); // 0-9
if (capBuffer && (
!(isLowercase || isUppercase || isNumeric) ||
alphaIndex >= 8 ||
i === str.length
)) {
// flush cap buffer
if (capBuffer === 0xD) {
streamWrite(unsafeStream, 3, 0x1);
} else {
streamWrite(unsafeStream, 11, capBuffer);
}
alphaIndex -= 8;
capBuffer = 0x0;
}
if (i === str.length) break;
if (isLowercase || isUppercase || isNumeric) {
if (alphaIndex < 0) throw new Error("alphaIndex should be non-negative here");
if (!isSafe) {
if (capBuffer) throw new Error("capBuffer shouldn't exist here");
streamWrite(unsafeStream, 2, 0x0);
isSafe = true;
}
if (isUppercase && !allowCaps) {
safePart += String.fromCharCode(curCharCode + 32);
while (alphaIndex >= 8) {
if (capBuffer) throw new Error("capBuffer shouldn't exist here");
alphaIndex -= 8;
streamWrite(unsafeStream, 11, 0x5);
}
if (!capBuffer) capBuffer = 0x5;
capBuffer += 1 << (alphaIndex + 3);
} else {
safePart += str.charAt(i);
}
if (isUppercase || isLowercase) alphaIndex++;
continue;
}
if (capBuffer) throw new Error("capBuffer shouldn't exist here");
alphaIndex = 0;
if (isSafe && curCharCode === 32) { // space
const nextCharCode = str.charCodeAt(i + 1);
if ((97 <= nextCharCode && nextCharCode <= 122) || // a-z
(65 <= nextCharCode && nextCharCode <= 90) || // A-Z
(48 <= nextCharCode && nextCharCode <= 57)) { // 0-9
safePart += '-';
streamWrite(unsafeStream, 2, 0x0);
continue;
}
}
if (isSafe) {
safePart += '-';
isSafe = false;
}
let unsafeMapIndex = -1;
if (curCharCode === -1) {
streamWrite(unsafeStream, 2, 0x0);
} else if (curCharCode === 32) { // space
streamWrite(unsafeStream, 3, 0x3);
} else if ((unsafeMapIndex = UNSAFE_MAP.indexOf(str.charAt(i))) >= 0) {
curCharCode = (unsafeMapIndex << 2) + 0x2;
streamWrite(unsafeStream, 7, curCharCode);
} else {
curCharCode = (curCharCode << 3) + 0x7;
streamWrite(unsafeStream, 19, curCharCode);
}
}
let unsafePart = streamGetCode(unsafeStream);
if (safePart.startsWith('-')) {
safePart = safePart.slice(1);
unsafePart = `${unsafePart}2`;
}
if (safePart.endsWith('-')) {
safePart = safePart.slice(0, -1);
}
if (!safePart) {
safePart = '0';
unsafePart = `0${unsafePart}`;
if (unsafePart.endsWith('2')) unsafePart = unsafePart.slice(0, -1);
}
if (!unsafePart) return safePart;
return `${safePart}--${unsafePart}`;
}
export function decode(codedStr: string) {
let str = '';
let lastDashIndex = codedStr.lastIndexOf('--');
if (lastDashIndex < 0) {
// the regular decoder can also handle this case; but this should
// be faster
return codedStr.replace(/-/g, ' ');
}
if (codedStr.charAt(lastDashIndex + 2) === '0') {
if (!codedStr.startsWith('0') || lastDashIndex !== 1) {
throw new Error("Invalid Dashycode");
}
lastDashIndex -= 1;
codedStr = '--' + codedStr.slice(4);
}
if (codedStr.endsWith('2')) {
codedStr = '-' + codedStr.slice(0, -1);
lastDashIndex += 1;
}
const unsafeStream: DashyStream = {
codeBuf: codedStr.slice(lastDashIndex + 2),
buf: 0x0,
bufLength: 0,
};
/**
* Status:
* 1 : awaiting next read
* 0 : assume all-lowercase
* other: 1 followed by n bits, describing the capitalization of the
* next n bits of alphabetic characters
*/
let capBuffer = 1;
for (let i = 0; i < lastDashIndex + 1; i++) {
let curChar = codedStr.charAt(i);
if (curChar !== '-') {
// safe char
const curCharCode = codedStr.charCodeAt(i);
const isLowercase = (97 <= curCharCode && curCharCode <= 122); // a-z
if (isLowercase) {
if (capBuffer === 1) {
capBuffer = 0;
if (streamPeek(unsafeStream, 2, 0x3) === 0x1) {
switch (streamRead(unsafeStream, 3, 0x7)) {
case 0x5:
capBuffer = streamRead(unsafeStream, 8, 0xFF) + 0x100;
break;
case 0x1:
capBuffer = 0x101;
break;
}
}
}
const toCapitalize = capBuffer & 0x1;
capBuffer >>= 1;
if (toCapitalize) {
curChar = String.fromCharCode(curCharCode - 32);
}
}
str += curChar;
} else {
capBuffer = 1;
// pull out the next unsafe string
let isEmpty = true;
do {
switch (streamRead(unsafeStream, 2, 0x3)) {
case 0x0:
// go back to parsing safe chars
curChar = '';
break;
case 0x1:
throw new Error("Invalid capitalization token");
case 0x2:
curChar = UNSAFE_MAP.charAt(streamRead(unsafeStream, 5, 0x1F));
isEmpty = false;
break;
case 0x3:
if (streamRead(unsafeStream, 1, 0x1)) {
curChar = String.fromCharCode(streamRead(unsafeStream, 16, 0xFFFF));
} else {
curChar = ' ';
}
isEmpty = false;
break;
}
str += curChar;
} while (curChar);
if (isEmpty && i !== lastDashIndex) str += ' ';
}
}
return str;
}
export function vizStream(codeBuf: string, translate = true) {
let spacedStream = '';
if (codeBuf.startsWith('0')) {
codeBuf = codeBuf.slice(1);
spacedStream = ' [no safe chars]' + spacedStream;
}
if (codeBuf.endsWith('2')) {
codeBuf = codeBuf.slice(0, -1);
spacedStream = ' [start unsafe]' + spacedStream;
}
const stream: DashyStream = {
codeBuf,
buf: 0x0,
bufLength: 0,
};
function vizBlock(s: DashyStream, bufLen: number) {
const buf = streamRead(s, bufLen);
return buf.toString(2).padStart(bufLen, '0');
}
while (stream.bufLength > 0 || stream.codeBuf) {
switch (streamRead(stream, 2)) {
case 0x0:
spacedStream = (translate ? ' |' : ' 00') + spacedStream;
break;
case 0x1:
if (streamRead(stream, 1)) {
spacedStream = ' ' + vizBlock(stream, 8) + (translate ? '-cap' : '_1_01') + spacedStream;
} else {
spacedStream = (translate ? ' capfirst' : ' 0_01') + spacedStream;
}
break;
case 0x2:
spacedStream = ' ' + vizBlock(stream, 5) + (translate ? '-ascii' : '_10') + spacedStream;
break;
case 0x3:
if (streamRead(stream, 1)) {
spacedStream = ' ' + vizBlock(stream, 16) + (translate ? '-utf' : '_1_11') + spacedStream;
} else {
spacedStream = (translate ? ' space' : ' 0_11') + spacedStream;
}
break;
}
}
return spacedStream;
}