Spaces:

PShowdown
/

Pokemon_server

Running

App Files Files Community

Pokemon_server / server /chat-formatter.ts

Jofthomas HF staff

Upload 4781 files

5c2ed06 verified 21 days ago

raw

history blame contribute delete

18.1 kB

	/**
	* Chat parser
	* Pokemon Showdown - http://pokemonshowdown.com/
	*
	* Parses formate.
	*
	* @license MIT
	*/

	/*
	SOURCE FOR LINKREGEX (compile with https://regexfree.k55.io/ )

	(
	(
	# When using http://, allow any domain
	https?:\/\/ [a-z0-9-]+ ( \. [a-z0-9-]+ )*
	\|
	# When using www., expect at least one more dot
	www \. [a-z0-9-]+ ( \. [a-z0-9-]+ )+
	\|
	# Otherwise, allow any domain, but only if
	\b [a-z0-9-]+ ( \. [a-z0-9-]+ )* \.
	(
	# followed either a common TLD...
	( com? \| org \| net \| edu \| info \| us \| jp ) \b
	\|
	# or any 2-3 letter TLD followed by a port or /
	[a-z]{2,3} (?= :[0-9] \| / )
	)
	)
	# possible custom port
	( : [0-9]+ )?
	(
	\/
	(
	# characters allowed inside URL paths
	(
	[^\s()&<>[\]] \| & \| "
	\|
	# parentheses in URLs should be matched, so they're not confused
	# for parentheses around URLs
	$ ( [^\s()<>&[\]] \| & )* $
	\|
	\[ ( [^\s()<>&[\]] \| & )* ]
	)*
	# URLs usually don't end with punctuation, so don't allow
	# punctuation symbols that probably arent related to URL.
	(
	[^\s()[\]{}\".,!?;:&<>*`^~\\]
	\|
	# annoyingly, Wikipedia URLs often end in )
	$ ( [^\s()<>&[\]] \| & )* $
	)
	)?
	)?
	\|
	# email address
	[a-z0-9.]+ @ [a-z0-9-]+ ( \. [a-z0-9-]+ )* \. [a-z]{2,}
	)
	(?! [^ ]*> )

	*/
	export const linkRegex = /(?:(?:https?:\/\/[a-z0-9-]+(?:\.[a-z0-9-]+)\|www\.[a-z0-9-]+(?:\.[a-z0-9-]+)+\|\b[a-z0-9-]+(?:\.[a-z0-9-]+)\.(?:(?:com?\|org\|net\|edu\|info\|us\|jp)\b\|[a-z]{2,3}(?=:[0-9]\|\/)))(?::[0-9]+)?(?:\/(?:(?:[^\s()&<>[\]]\|&\|"\|$(?:[^\s()<>&[\]]\|&)$\|\[(?:[^\s()<>&[\]]\|&)])(?:[^\s()[\]{}".,!?;:&<>`^~\\]\|$(?:[^\s()<>&[\]]\|&)$))?)?\|[a-z0-9.]+@[a-z0-9-]+(?:\.[a-z0-9-]+)\.[a-z]{2,})(?![^ ]*>)/ig;

	/**
	* A span is a part of the text that's formatted. In the text:
	*
	* Hi, this is an example.
	*
	* The word `this` is a `*` span. Many spans are just a symbol repeated, and
	* that symbol is the span type, but also many are more complicated.
	* For an explanation of all of these, see the `TextFormatter#get` function
	* implementation.
	*/
	type SpanType = '_' \| '*' \| '~' \| '^' \| '\\' \| '\|' \| '<' \| '[' \| '`' \| 'a' \| 'u' \| 'spoiler' \| '>' \| '(';

	type FormatSpan = [SpanType, number];

	class TextFormatter {
	readonly str: string;
	readonly buffers: string[];
	readonly stack: FormatSpan[];
	/** Allows access to special formatting (links without URL preview, pokemon icons) */
	readonly isTrusted: boolean;
	/** Replace \n with <br /> */
	readonly replaceLinebreaks: boolean;
	/** Discord-style WYSIWYM output; markup characters are in `<tt>` */
	readonly showSyntax: boolean;
	/** offset of str that's been parsed so far */
	offset: number;

	constructor(str: string, isTrusted = false, replaceLinebreaks = false, showSyntax = false) {
	// escapeHTML, without escaping /
	str = `${str}`
	.replace(/&/g, '&')
	.replace(/</g, '<')
	.replace(/>/g, '>')
	.replace(/"/g, '"')
	.replace(/'/g, ''');

	// filter links first
	str = str.replace(linkRegex, uri => {
	if (showSyntax) return `<u>${uri}</u>`;
	let fulluri;
	if (/^[a-z0-9.]+@/ig.test(uri)) {
	fulluri = 'mailto:' + uri;
	} else {
	fulluri = uri.replace(/^([a-z]*[^a-z:])/g, 'http://$1');
	if (uri.substr(0, 24) === 'https://docs.google.com/' \|\| uri.substr(0, 16) === 'docs.google.com/') {
	if (uri.startsWith('https')) uri = uri.slice(8);
	if (uri.substr(-12) === '?usp=sharing' \|\| uri.substr(-12) === '&usp=sharing') uri = uri.slice(0, -12);
	if (uri.substr(-6) === '#gid=0') uri = uri.slice(0, -6);
	let slashIndex = uri.lastIndexOf('/');
	if (uri.length - slashIndex > 18) slashIndex = uri.length;
	if (slashIndex - 4 > 19 + 3) {
	uri = `${uri.slice(0, 19)}<small class="message-overflow">${uri.slice(19, slashIndex - 4)}</small>` +
	`${uri.slice(slashIndex - 4)}`;
	}
	}
	}
	return `<a href="${fulluri}" rel="noopener" target="_blank">${uri}</a>`;
	});
	// (links don't have any specific syntax, they're just a pattern, so we detect them in a separate pass)

	this.str = str;
	this.buffers = [];
	this.stack = [];
	this.isTrusted = isTrusted;
	this.replaceLinebreaks = this.isTrusted \|\| replaceLinebreaks;
	this.showSyntax = showSyntax;
	this.offset = 0;
	}
	// debugAt(i=0, j=i+1) { console.log(`${this.slice(0, i)}[${this.slice(i, j)}]${this.slice(j, this.str.length)}`); }

	slice(start: number, end: number) {
	return this.str.slice(start, end);
	}

	at(start: number) {
	return this.str.charAt(start);
	}

	/**
	* We've encountered a possible start for a span. It's pushed onto our span
	* stack.
	*
	* The span stack saves the start position so it can be replaced with HTML
	* if we find an end for the span, but we don't actually replace it until
	* `closeSpan` is called, so nothing happens (it stays plaintext) if no end
	* is found.
	*/
	pushSpan(spanType: SpanType, start: number, end: number) {
	this.pushSlice(start);
	this.stack.push([spanType, this.buffers.length]);
	this.buffers.push(this.slice(start, end));
	this.offset = end;
	}

	pushSlice(end: number) {
	if (end !== this.offset) {
	this.buffers.push(this.slice(this.offset, end));
	this.offset = end;
	}
	}

	closeParenSpan(start: number) {
	let stackPosition = -1;
	for (let i = this.stack.length - 1; i >= 0; i--) {
	const span = this.stack[i];
	if (span[0] === '(') {
	stackPosition = i;
	break;
	}
	if (span[0] !== 'spoiler') break;
	}
	if (stackPosition === -1) return false;

	this.pushSlice(start);
	while (this.stack.length > stackPosition) this.popSpan(start);
	this.offset = start;
	return true;
	}

	/**
	* We've encountered a possible end for a span. If it's in the span stack,
	* we transform it into HTML.
	*/
	closeSpan(spanType: SpanType, start: number, end: number) {
	// loop backwards
	let stackPosition = -1;
	for (let i = this.stack.length - 1; i >= 0; i--) {
	const span = this.stack[i];
	if (span[0] === spanType) {
	stackPosition = i;
	break;
	}
	}
	if (stackPosition === -1) return false;

	this.pushSlice(start);
	while (this.stack.length > stackPosition + 1) this.popSpan(start);
	const span = this.stack.pop()!;
	const startIndex = span[1];
	let tagName = '';
	let attrs = '';
	switch (spanType) {
	case '_': tagName = 'i'; break;
	case '*': tagName = 'b'; break;
	case '~': tagName = 's'; break;
	case '^': tagName = 'sup'; break;
	case '\\': tagName = 'sub'; break;
	case '\|': tagName = 'span'; attrs = (this.showSyntax ? ' class="spoiler-shown"' : ' class="spoiler"'); break;
	}
	const syntax = (this.showSyntax ? `<tt>${spanType}${spanType}</tt>` : '');
	if (tagName) {
	this.buffers[startIndex] = `${syntax}<${tagName}${attrs}>`;
	this.buffers.push(`</${tagName}>${syntax}`);
	this.offset = end;
	}
	return true;
	}

	/**
	* Ends a span without an ending symbol. For most spans, this means
	* they don't take effect, but certain spans like spoiler tags don't
	* require ending symbols.
	*/
	popSpan(end: number) {
	const span = this.stack.pop();
	if (!span) return false;
	this.pushSlice(end);
	switch (span[0]) {
	case 'spoiler':
	this.buffers.push(`</span>`);
	this.buffers[span[1]] = (this.showSyntax ? `<span class="spoiler-shown">` : `<span class="spoiler">`);
	break;
	case '>':
	this.buffers.push(`</span>`);
	this.buffers[span[1]] = `<span class="greentext">`;
	break;
	default:
	// do nothing
	break;
	}
	return true;
	}

	popAllSpans(end: number) {
	while (this.stack.length) this.popSpan(end);
	this.pushSlice(end);
	}

	toUriComponent(html: string) {
	const component = html.replace(/</g, '<')
	.replace(/>/g, '>')
	.replace(/"/g, '"')
	.replace(/'/g, '\'')
	.replace(/&/g, '&');
	return encodeURIComponent(component);
	}

	/**
	* Handles special cases.
	*/
	runLookahead(spanType: SpanType, start: number) {
	switch (spanType) {
	case '`':
	// code span. Not only are the contents not formatted, but
	// the start and end delimiters must match in length.
	// ``Neither `this` nor ```this``` end this code span.``
	{
	let delimLength = 0;
	let i = start;
	while (this.at(i) === '`') {
	delimLength++;
	i++;
	}
	let curDelimLength = 0;
	while (i < this.str.length) {
	const char = this.at(i);
	if (char === '\n') break;
	if (char === '`') {
	curDelimLength++;
	} else {
	if (curDelimLength === delimLength) break;
	curDelimLength = 0;
	}
	i++;
	}
	if (curDelimLength !== delimLength) return false;
	const end = i;
	// matching delims found
	this.pushSlice(start);
	let innerStart = start + delimLength;
	let innerEnd = i - delimLength;
	if (innerStart + 1 >= innerEnd) {
	// no special whitespace handling
	} else if (this.at(innerStart) === ' ' && this.at(innerEnd - 1) === ' ') {
	innerStart++; // strip starting and ending space
	innerEnd--;
	} else if (this.at(innerStart) === ' ' && this.at(innerStart + 1) === '`') {
	innerStart++; // strip starting space
	} else if (this.at(innerEnd - 1) === ' ' && this.at(innerEnd - 2) === '`') {
	innerEnd--; // strip ending space
	}
	if (this.showSyntax) this.buffers.push(`<tt>${this.slice(start, innerStart)}</tt>`);
	this.buffers.push(`<code>`);
	this.buffers.push(this.slice(innerStart, innerEnd));
	this.buffers.push(`</code>`);
	if (this.showSyntax) this.buffers.push(`<tt>${this.slice(innerEnd, end)}</tt>`);
	this.offset = end;
	}
	return true;
	case '[':
	// Link span. Several possiblilities:
	// [[text <uri>]] - a link with custom text
	// [[search term]] - Google search
	// [[wiki: search term]] - Wikipedia search
	// [[pokemon: species name]] - icon (also item:, type:, category:)
	{
	if (this.slice(start, start + 2) !== '[[') return false;
	let i = start + 2;
	let colonPos = -1; // `:`
	let anglePos = -1; // `<`
	while (i < this.str.length) {
	const char = this.at(i);
	if (char === ']' \|\| char === '\n') break;
	if (char === ':' && colonPos < 0) colonPos = i;
	if (char === '&' && this.slice(i, i + 4) === '<') anglePos = i;
	i++;
	}
	if (this.slice(i, i + 2) !== ']]') return false;

	this.pushSlice(start);
	this.offset = i + 2;
	let termEnd = i;
	let uri = '';
	if (anglePos >= 0 && this.slice(i - 4, i) === '>') { // `>`
	uri = this.slice(anglePos + 4, i - 4);
	termEnd = anglePos;
	if (this.at(termEnd - 1) === ' ') termEnd--;
	uri = encodeURI(uri.replace(/^([a-z]*[^a-z:])/g, 'http://$1'));
	}
	let term = this.slice(start + 2, termEnd).replace(/<\/?[au](?: [^>]+)?>/g, '');
	if (this.showSyntax) {
	term += `<small>${this.slice(termEnd, i)}</small>`;
	} else if (uri && !this.isTrusted) {
	const shortUri = uri.replace(/^https?:\/\//, '').replace(/^www\./, '').replace(/\/$/, '');
	term += `<small> <${shortUri}></small>`;
	uri += '" rel="noopener';
	}

	if (colonPos > 0) {
	const key = this.slice(start + 2, colonPos).toLowerCase();
	switch (key) {
	case 'w':
	case 'wiki':
	if (this.showSyntax) break;
	term = term.slice(term.charAt(key.length + 1) === ' ' ? key.length + 2 : key.length + 1);
	uri = `//en.wikipedia.org/w/index.php?title=Special:Search&search=${this.toUriComponent(term)}`;
	term = `wiki: ${term}`;
	break;
	case 'pokemon':
	case 'item':
	case 'type':
	case 'category':
	if (this.showSyntax) {
	this.buffers.push(`<tt>${this.slice(start, this.offset)}</tt>`);
	return true;
	}
	term = term.slice(term.charAt(key.length + 1) === ' ' ? key.length + 2 : key.length + 1);

	let display = '';
	if (this.isTrusted) {
	display = `<psicon ${key}="${term}" />`;
	} else {
	display = `[${term}]`;
	}

	let dir = key;
	if (key === 'item') dir += 's';
	if (key === 'category') dir = 'categories' as 'category';

	uri = `//dex.pokemonshowdown.com/${dir}/${toID(term)}`;
	term = display;
	}
	}
	if (!uri) {
	uri = `//www.google.com/search?ie=UTF-8&btnI&q=${this.toUriComponent(term)}`;
	}
	if (this.showSyntax) {
	this.buffers.push(`<tt>[[</tt><u>${term}</u><tt>]]</tt>`);
	} else {
	this.buffers.push(`<a href="${uri}" target="_blank">${term}</a>`);
	}
	}
	return true;
	case '<':
	// Roomid-link span. Not to be confused with a URL span.
	// `<<roomid>>`
	{
	if (this.slice(start, start + 8) !== '<<') return false; // <<
	let i = start + 8;
	while (/[a-z0-9-]/.test(this.at(i))) i++;
	if (this.slice(i, i + 8) !== '>>') return false; // >>

	this.pushSlice(start);
	const roomid = this.slice(start + 8, i);
	if (this.showSyntax) {
	this.buffers.push(`<small><<</small><u>${roomid}</u><small>>></small>`);
	} else {
	this.buffers.push(`«<a href="/${roomid}" target="_blank">${roomid}</a>»`);
	}
	this.offset = i + 8;
	}
	return true;
	case 'a': case 'u':
	// URL span. Skip to the end of the link - where `</a>` or `</u>` is.
	// Nothing inside should be formatted further (obviously we don't want
	// `example.com/__foo__` to turn `foo` italic).
	{
	let i = start + 2;
	// Find </a> or </u>.
	// We need to check the location of `>` to disambiguate from </small>.
	while (this.at(i) !== '<' \|\| this.at(i + 1) !== '/' \|\| this.at(i + 3) !== '>') i++;
	i += 4;
	this.pushSlice(i);
	}
	return true;
	}
	return false;
	}

	get() {
	let beginningOfLine = this.offset;
	// main loop! `i` tracks our position
	// Note that we skip around a lot; `i` is mutated inside the loop
	// pretty often.
	for (let i = beginningOfLine; i < this.str.length; i++) {
	const char = this.at(i);
	switch (char) {
	case '_':
	case '*':
	case '~':
	case '^':
	case '\\':
	case '\|':
	// Must be exactly two chars long.
	if (this.at(i + 1) === char && this.at(i + 2) !== char) {
	// This is a completely normal two-char span. Close it if it's
	// already open, open it if it's not.
	// The inside of regular spans must not start or end with a space.
	if (!(this.at(i - 1) !== ' ' && this.closeSpan(char, i, i + 2))) {
	if (this.at(i + 2) !== ' ') this.pushSpan(char, i, i + 2);
	}
	if (i < this.offset) {
	i = this.offset - 1;
	break;
	}
	}
	while (this.at(i + 1) === char) i++;
	break;
	case '(':
	// `(` span - does nothing except end spans
	this.stack.push(['(', -1]);
	break;
	case ')':
	// end of `(` span
	this.closeParenSpan(i);
	if (i < this.offset) {
	i = this.offset - 1;
	break;
	}
	break;
	case '`':
	// ` ``code`` ` span. Uses lookahead because its contents are not
	// formatted.
	// Must be at least two `` ` `` in a row.
	if (this.at(i + 1) === '`') this.runLookahead('`', i);
	if (i < this.offset) {
	i = this.offset - 1;
	break;
	}
	while (this.at(i + 1) === '`') i++;
	break;
	case '[':
	// `[` (link) span. Uses lookahead because it might contain a
	// URL which can't be formatted, or search terms that can't be
	// formatted.
	this.runLookahead('[', i);
	if (i < this.offset) {
	i = this.offset - 1;
	break;
	}
	while (this.at(i + 1) === '[') i++;
	break;
	case ':':
	// Looks behind for `spoiler:` or `spoilers:`. Spoiler spans
	// are also weird because they don't require an ending symbol,
	// although that's not handled here.
	if (i < 7) break;
	if (this.slice(i - 7, i + 1).toLowerCase() === 'spoiler:' \|\|
	this.slice(i - 8, i + 1).toLowerCase() === 'spoilers:') {
	if (this.at(i + 1) === ' ') i++;
	this.pushSpan('spoiler', i + 1, i + 1);
	}
	break;
	case '&': // escaped '<' or '>'
	// greentext or roomid
	if (i === beginningOfLine && this.slice(i, i + 4) === '>') {
	// greentext span, normal except it lacks an ending span
	// check for certain emoticons like `>_>` or `>w<`
	if (!"._/=:;".includes(this.at(i + 4)) && !['w<', 'w>'].includes(this.slice(i + 4, i + 9))) {
	this.pushSpan('>', i, i);
	}
	} else {
	// completely normal `<<roomid>>` span
	// uses lookahead because roomids can't be formatted.
	this.runLookahead('<', i);
	}
	if (i < this.offset) {
	i = this.offset - 1;
	break;
	}
	while (this.slice(i + 1, i + 5) === 'lt;&') i += 4;
	break;
	case '<': // guaranteed to be <a ...> or <u>
	// URL span
	// The constructor has already converted `<` to `<` and URLs
	// to links, so `<` must be the start of a converted link.
	this.runLookahead('a', i);
	if (i < this.offset) {
	i = this.offset - 1;
	break;
	}
	// should never happen
	break;
	case '\r':
	case '\n':
	// End of the line. No spans span multiple lines.
	this.popAllSpans(i);
	if (this.replaceLinebreaks) {
	this.buffers.push(`<br />`);
	this.offset++;
	}
	beginningOfLine = i + 1;
	break;
	}
	}

	this.popAllSpans(this.str.length);
	return this.buffers.join('');
	}
	}

	/**
	* Takes a string and converts it to HTML by replacing standard chat formatting with the appropriate HTML tags.
	*/
	export function formatText(str: string, isTrusted = false, replaceLinebreaks = false, showSyntax = false) {
	return new TextFormatter(str, isTrusted, replaceLinebreaks, showSyntax).get();
	}

	/**
	* Takes a string and strips all standard chat formatting except greentext from it, the text of a link is kept.
	*/
	export function stripFormatting(str: string) {
	// Doesn't match > meme arrows because the angle bracket appears in the chat still.
	str = str.replace(/\\([^\s]+)\\*\|__([^\s_]+)__\|~~([^\s~]+)~~\|``([^\s`]+)``\|\^\^([^\s^]+)\^\^\|\\([^\s\\]+)\\/g,
	(match, $1, $2, $3, $4, $5, $6) => $1 \|\| $2 \|\| $3 \|\| $4 \|\| $5 \|\| $6);
	// Remove all of the link expect for the text in [[text<url>]]
	return str.replace(/\[\[(?:([^<])\s<[^>]+>\|([^\]]+))\]\]/g, (match, $1, $2) => $1 \|\| $2 \|\| '');
	}