Rework Markdown parsing (#719)

* Switch markdown parser

* Add inline maths

* Basic plain text rendering

* Add display math support

* Remove unnecessary <p> tag

* Fixed spoiler not working

* Add spoiler reason input support

* Make paragraphs display with newline in between

* Handle single newlines

* Fix typo when allowing start attribute

* Cleanup for merge

* Remove unused import
This commit is contained in:
ginnyTheCat 2022-08-21 16:04:09 +02:00 committed by GitHub
parent 76c16ce294
commit 80aa55b706
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 206 additions and 1404 deletions

View file

@ -1,198 +1,89 @@
/* eslint-disable no-param-reassign */
/* eslint-disable no-plusplus */
/* eslint-disable no-continue */
import SimpleMarkdown from '@khanacademy/simple-markdown';
import { codes } from 'micromark-util-symbol/codes';
import { types } from 'micromark-util-symbol/types';
import { resolveAll } from 'micromark-util-resolve-all';
import { splice } from 'micromark-util-chunked';
const {
defaultRules, parserFor, outputFor, anyScopeRegex, blockRegex, inlineRegex, htmlTag, sanitizeText,
} = SimpleMarkdown;
function inlineExtension(marker, len, key) {
const keySeq = `${key}Sequence`;
const keySeqTmp = `${keySeq}Temporary`;
return () => {
function tokenize(effects, ok, nok) {
const { previous, events } = this;
let size = 0;
function more(code) {
// consume more markers if the maximum length hasn't been reached yet
if (code === marker && size < len) {
effects.consume(code);
size += 1;
return more;
}
// check for minimum length
if (size < len) return nok(code);
effects.exit(keySeqTmp);
return ok(code);
}
function start(code) {
// ignore code if it's not a marker
if (code !== marker) return nok(code);
if (previous === marker
&& events[events.length - 1][1].type !== types.characterEscape) return nok(code);
effects.enter(keySeqTmp);
return more(code);
}
return start;
}
function resolve(events, context) {
let i = -1;
while (++i < events.length) {
if (events[i][0] !== 'enter' || events[i][1].type !== keySeqTmp) continue;
let open = i;
while (open--) {
if (events[open][0] !== 'exit' || events[open][1].type !== keySeqTmp) continue;
events[i][1].type = keySeq;
events[open][1].type = keySeq;
const border = {
type: key,
start: { ...events[open][1].start },
end: { ...events[i][1].end },
};
const text = {
type: `${key}Text`,
start: { ...events[open][1].end },
end: { ...events[i][1].start },
};
const nextEvents = [
['enter', border, context],
['enter', events[open][1], context],
['exit', events[open][1], context],
['enter', text, context],
];
splice(
nextEvents,
nextEvents.length,
0,
resolveAll(
context.parser.constructs.insideSpan.null,
events.slice(open + 1, i),
context,
),
);
splice(nextEvents, nextEvents.length, 0, [
['exit', text, context],
['enter', events[i][1], context],
['exit', events[i][1], context],
['exit', border, context],
]);
splice(events, open - 1, i - open + 3, nextEvents);
i = open + nextEvents.length - 2;
break;
}
}
events.forEach((event) => {
if (event[1].type === keySeqTmp) {
event[1].type = types.data;
}
});
return events;
}
const tokenizer = {
tokenize,
resolveAll: resolve,
};
return {
text: { [marker]: tokenizer },
insideSpan: { null: [tokenizer] },
attentionMarkers: { null: [marker] },
};
};
function mathHtml(wrap, node) {
return htmlTag(wrap, htmlTag('code', sanitizeText(node.content)), { 'data-mx-maths': node.content });
}
const spoilerExtension = inlineExtension(codes.verticalBar, 2, 'spoiler');
const spoilerExtensionHtml = {
enter: {
spoiler() {
this.tag('<span data-mx-spoiler>');
},
const rules = {
...defaultRules,
Array: {
...defaultRules.Array,
plain: (arr, output, state) => arr.map((node) => output(node, state)).join(''),
},
exit: {
spoiler() {
this.tag('</span>');
},
displayMath: {
order: defaultRules.list.order + 0.5,
match: blockRegex(/^\$\$\n*([\s\S]+?)\n*\$\$/),
parse: (capture) => ({ content: capture[1] }),
plain: (node) => `$$\n${node.content}\n$$`,
html: (node) => mathHtml('div', node),
},
newline: {
...defaultRules.newline,
plain: () => '\n',
},
paragraph: {
...defaultRules.paragraph,
plain: (node, output, state) => `${output(node.content, state)}\n\n`,
html: (node, output, state) => htmlTag('p', output(node.content, state)),
},
escape: {
...defaultRules.escape,
plain: (node, output, state) => `\\${output(node.content, state)}`,
},
em: {
...defaultRules.em,
plain: (node, output, state) => `_${output(node.content, state)}_`,
},
strong: {
...defaultRules.strong,
plain: (node, output, state) => `**${output(node.content, state)}**`,
},
u: {
...defaultRules.u,
plain: (node, output, state) => `__${output(node.content, state)}__`,
},
del: {
...defaultRules.del,
plain: (node, output, state) => `~~${output(node.content, state)}~~`,
},
spoiler: {
order: defaultRules.em.order - 0.5,
match: inlineRegex(/^\|\|([\s\S]+?)\|\|(?:\(([\s\S]+?)\))?/),
parse: (capture, parse, state) => ({
content: parse(capture[1], state),
reason: capture[2],
}),
plain: (node) => `[spoiler${node.reason ? `: ${node.reason}` : ''}](mxc://somewhere)`,
html: (node, output, state) => `<span data-mx-spoiler${node.reason ? `="${sanitizeText(node.reason)}"` : ''}>${output(node.content, state)}</span>`,
},
inlineMath: {
order: defaultRules.del.order + 0.5,
match: inlineRegex(/^\$(\S[\s\S]+?\S|\S)\$(?!\d)/),
parse: (capture) => ({ content: capture[1] }),
plain: (node) => `$${node.content}$`,
html: (node) => mathHtml('span', node),
},
br: {
...defaultRules.br,
match: anyScopeRegex(/^ *\n/),
plain: () => '\n',
},
text: {
...defaultRules.text,
match: anyScopeRegex(/^[\s\S]+?(?=[^0-9A-Za-z\s\u00c0-\uffff]| *\n|\w+:\S|$)/),
plain: (node) => node.content,
},
};
const mathExtensionHtml = {
enter: {
mathFlow() {
this.lineEndingIfNeeded();
},
mathFlowFenceMeta() {
this.buffer();
},
mathText() {
this.buffer();
},
},
exit: {
mathFlow() {
const value = this.encode(this.resume().replace(/(?:\r?\n|\r)$/, ''));
this.tag('<div data-mx-maths="');
this.tag(value);
this.tag('"><code>');
this.raw(value);
this.tag('</code></div>');
this.setData('mathFlowOpen');
this.setData('slurpOneLineEnding');
},
mathFlowFence() {
// After the first fence.
if (!this.getData('mathFlowOpen')) {
this.setData('mathFlowOpen', true);
this.setData('slurpOneLineEnding', true);
this.buffer();
}
},
mathFlowFenceMeta() {
this.resume();
},
mathFlowValue(token) {
this.raw(this.sliceSerialize(token));
},
mathText() {
const value = this.encode(this.resume());
this.tag('<span data-mx-maths="');
this.tag(value);
this.tag('"><code>');
this.raw(value);
this.tag('</code></span>');
},
mathTextData(token) {
this.raw(this.sliceSerialize(token));
},
},
};
const parser = parserFor(rules);
const plainOutput = outputFor(rules, 'plain');
const htmlOutput = outputFor(rules, 'html');
export {
inlineExtension,
spoilerExtension, spoilerExtensionHtml,
mathExtensionHtml,
parser, plainOutput, htmlOutput,
};

View file

@ -19,7 +19,7 @@ const permittedTagToAttributes = {
div: ['data-mx-maths'],
a: ['name', 'target', 'href', 'rel'],
img: ['width', 'height', 'alt', 'title', 'src', 'data-mx-emoticon'],
o: ['start'],
ol: ['start'],
code: ['class'],
};