1 import schema from "./schema";
2 import markdownit from "markdown-it";
3 import {MarkdownParser, defaultMarkdownParser} from "prosemirror-markdown";
4 import {htmlToDoc, KeyedMultiStack} from "./util";
6 const tokens = defaultMarkdownParser.tokens;
8 // These are really a placeholder on the object to allow the below
9 // parser.tokenHandlers.html_[block/inline] hacks to work as desired.
10 tokens.html_block = {block: "callout", noCloseToken: true};
11 tokens.html_inline = {mark: "underline"};
13 const tokenizer = markdownit("commonmark", {html: true});
14 const parser = new MarkdownParser(schema, tokenizer, tokens);
16 // When we come across HTML blocks we use the document schema to parse them
17 // into nodes then re-add those back into the parser state.
18 parser.tokenHandlers.html_block = function(state, tok, tokens, i) {
19 const contentDoc = htmlToDoc(tok.content || '');
20 for (const node of contentDoc.content.content) {
21 state.addNode(node.type, node.attrs, node.content);
25 // When we come across inline HTML we parse out the tag and keep track of
26 // that in a stack, along with the marks they parse out to.
27 // We open/close the marks within the state depending on the tag open/close type.
28 const tagStack = new KeyedMultiStack();
29 parser.tokenHandlers.html_inline = function(state, tok, tokens, i) {
30 const isClosing = tok.content.startsWith('</');
31 const isSelfClosing = tok.content.endsWith('/>');
32 const tagName = parseTagNameFromHtmlTokenContent(tok.content);
35 const completeTag = isSelfClosing ? tok.content : `${tok.content}a</${tagName}>`;
36 const marks = extractMarksFromHtml(completeTag);
37 tagStack.push(tagName, marks);
38 for (const mark of marks) {
43 if (isSelfClosing || isClosing) {
44 const marks = (tagStack.pop(tagName) || []).reverse();
45 for (const mark of marks) {
46 state.closeMark(mark);
52 * @param {String} html
55 function extractMarksFromHtml(html) {
56 const contentDoc = htmlToDoc('<p>' + (html || '') + '</p>');
57 const marks = contentDoc?.content?.content?.[0]?.content?.content?.[0]?.marks;
62 * @param {string} tokenContent
65 function parseTagNameFromHtmlTokenContent(tokenContent) {
66 return tokenContent.split(' ')[0].replace(/[<>\/]/g, '').toLowerCase();
69 export default parser;