feat(markdown): switch to turnish instead of turndown

This commit is contained in:
Elian Doran 2026-01-17 12:44:30 +02:00
parent 5600f1b7b1
commit aa01bc1457
No known key found for this signature in database
5 changed files with 101 additions and 89 deletions

View File

@ -126,7 +126,7 @@
"swagger-jsdoc": "6.2.8",
"time2fa": "1.4.2",
"tmp": "0.2.5",
"turndown": "7.2.2",
"turnish": "1.7.1",
"unescape": "1.0.1",
"vite": "7.3.1",
"ws": "8.19.0",

View File

@ -387,4 +387,23 @@ describe("Markdown export", () => {
expect(markdownExportService.toMarkdown(html)).toBe(expected);
});
it("maintains escaped HTML tags", () => {
const html = /*html*/`<p>&lt;div&gt;Hello World&lt;/div&gt;</p>`;
const expected = `\\<div\\>Hello World\\</div\\>`;
expect(markdownExportService.toMarkdown(html)).toBe(expected);
});
it("escapes HTML tags inside list", () => {
const html = trimIndentation/*html*/`\
<ul>
<li data-list-item-id="e07fda078f7dd7103a3b9017f49eb1589">
&lt;note&gt; is note.
</li>
</ul>
`;
const expected = trimIndentation`\
* \\<note\\> is note.`;
expect(markdownExportService.toMarkdown(html)).toBe(expected);
});
});

View File

@ -1,9 +1,7 @@
"use strict";
import TurndownService, { type Rule } from "turndown";
import { gfm } from "@triliumnext/turndown-plugin-gfm";
import Turnish, { type Rule } from "turnish";
let instance: TurndownService | null = null;
let instance: Turnish | null = null;
// TODO: Move this to a dedicated file someday.
export const ADMONITION_TYPE_MAPPINGS: Record<string, string> = {
@ -16,12 +14,12 @@ export const ADMONITION_TYPE_MAPPINGS: Record<string, string> = {
export const DEFAULT_ADMONITION_TYPE = ADMONITION_TYPE_MAPPINGS.note;
const fencedCodeBlockFilter: TurndownService.Rule = {
filter: function (node, options) {
const fencedCodeBlockFilter: Turnish.Rule = {
filter (node, options) {
return options.codeBlockStyle === "fenced" && node.nodeName === "PRE" && node.firstChild !== null && node.firstChild.nodeName === "CODE";
},
replacement: function (content, node, options) {
replacement (content, node, options) {
if (!node.firstChild || !("getAttribute" in node.firstChild) || typeof node.firstChild.getAttribute !== "function") {
return content;
}
@ -29,14 +27,15 @@ const fencedCodeBlockFilter: TurndownService.Rule = {
const className = node.firstChild.getAttribute("class") || "";
const language = rewriteLanguageTag((className.match(/language-(\S+)/) || [null, ""])[1]);
return "\n\n" + options.fence + language + "\n" + node.firstChild.textContent + "\n" + options.fence + "\n\n";
return `\n\n${ options.fence }${language }\n${ node.firstChild.textContent }\n${ options.fence }\n\n`;
}
};
function toMarkdown(content: string) {
if (instance === null) {
instance = new TurndownService({
instance = new Turnish({
headingStyle: "atx",
bulletListMarker: "*",
codeBlockStyle: "fenced",
blankReplacement(content, node, options) {
if (node.nodeName === "SECTION" && (node as HTMLElement).classList.contains("include-note")) {
@ -44,7 +43,7 @@ function toMarkdown(content: string) {
}
// Original implementation as per https://github.com/mixmark-io/turndown/blob/master/src/turndown.js.
return ("isBlock" in node && node.isBlock) ? '\n\n' : ''
return ("isBlock" in node && node.isBlock) ? '\n\n' : '';
}
});
// Filter is heavily based on: https://github.com/mixmark-io/turndown/issues/274#issuecomment-458730974
@ -59,7 +58,7 @@ function toMarkdown(content: string) {
instance.keep([ "kbd", "sup", "sub" ]);
}
return instance.turndown(content);
return instance.render(content);
}
function rewriteLanguageTag(source: string) {
@ -85,14 +84,14 @@ function buildImageFilter() {
const ESCAPE_PATTERNS = {
before: /([\\*`[\]_]|(?:^[-+>])|(?:^~~~)|(?:^#{1-6}))/g,
after: /((?:^\d+(?=\.)))/
}
};
const escapePattern = new RegExp('(?:' + ESCAPE_PATTERNS.before.source + '|' + ESCAPE_PATTERNS.after.source + ')', 'g');
const escapePattern = new RegExp(`(?:${ ESCAPE_PATTERNS.before.source }|${ ESCAPE_PATTERNS.after.source })`, 'g');
function escapeMarkdown (content: string) {
return content.replace(escapePattern, function (match, before, after) {
return before ? '\\' + before : after + '\\'
})
return content.replace(escapePattern, (match, before, after) => {
return before ? `\\${ before}` : `${after }\\`;
});
}
function escapeLinkDestination(destination: string) {
@ -102,10 +101,10 @@ function buildImageFilter() {
}
function escapeLinkTitle (title: string) {
return title.replace(/"/g, '\\"')
return title.replace(/"/g, '\\"');
}
const imageFilter: TurndownService.Rule = {
const imageFilter: Turnish.Rule = {
filter: "img",
replacement(content, _node) {
const node = _node as HTMLElement;
@ -117,12 +116,12 @@ function buildImageFilter() {
// TODO: Deduplicate with upstream.
const untypedNode = (node as any);
const alt = escapeMarkdown(cleanAttribute(untypedNode.getAttribute('alt')))
const src = escapeLinkDestination(untypedNode.getAttribute('src') || '')
const title = cleanAttribute(untypedNode.getAttribute('title'))
const titlePart = title ? ' "' + escapeLinkTitle(title) + '"' : ''
const alt = escapeMarkdown(cleanAttribute(untypedNode.getAttribute('alt')));
const src = escapeLinkDestination(untypedNode.getAttribute('src') || '');
const title = cleanAttribute(untypedNode.getAttribute('title'));
const titlePart = title ? ` "${ escapeLinkTitle(title) }"` : '';
return src ? '![' + alt + ']' + '(' + src + titlePart + ')' : ''
return src ? `![${ alt }]` + `(${ src }${titlePart })` : '';
}
};
return imageFilter;
@ -151,7 +150,7 @@ function buildAdmonitionFilter() {
return DEFAULT_ADMONITION_TYPE;
}
const admonitionFilter: TurndownService.Rule = {
const admonitionFilter: Turnish.Rule = {
filter(node, options) {
return node.nodeName === "ASIDE" && node.classList.contains("admonition");
},
@ -161,11 +160,11 @@ function buildAdmonitionFilter() {
content = content.replace(/^\n+|\n+$/g, '');
content = content.replace(/^/gm, '> ');
content = `> [!${admonitionType}]\n` + content;
content = `> [!${admonitionType}]\n${ content}`;
return "\n\n" + content + "\n\n";
return `\n\n${ content }\n\n`;
}
}
};
return admonitionFilter;
}
@ -178,15 +177,15 @@ function buildAdmonitionFilter() {
*/
function buildInlineLinkFilter(): Rule {
return {
filter: function (node, options) {
filter (node, options) {
return (
options.linkStyle === 'inlined' &&
node.nodeName === 'A' &&
!!node.getAttribute('href')
)
);
},
replacement: function (content, _node) {
replacement (content, _node) {
const node = _node as HTMLElement;
// Return reference links verbatim.
@ -196,13 +195,13 @@ function buildInlineLinkFilter(): Rule {
// Otherwise treat as normal.
// TODO: Call super() somehow instead of duplicating the implementation.
let href = node.getAttribute('href')
if (href) href = href.replace(/([()])/g, '\\$1')
let title = cleanAttribute(node.getAttribute('title'))
if (title) title = ' "' + title.replace(/"/g, '\\"') + '"'
return '[' + content + '](' + href + title + ')'
let href = node.getAttribute('href');
if (href) href = href.replace(/([()])/g, '\\$1');
let title = cleanAttribute(node.getAttribute('title'));
if (title) title = ` "${ title.replace(/"/g, '\\"') }"`;
return `[${ content }](${ href }${title })`;
}
}
};
}
function buildFigureFilter(): Rule {
@ -214,7 +213,7 @@ function buildFigureFilter(): Rule {
replacement(content, node) {
return (node as HTMLElement).outerHTML;
}
}
};
}
// Keep in line with https://github.com/mixmark-io/turndown/blob/master/src/commonmark-rules.js.
@ -224,13 +223,13 @@ function buildListItemFilter(): Rule {
replacement(content, node, options) {
content = content
.trim()
.replace(/\n/gm, '\n ') // indent
let prefix = options.bulletListMarker + ' '
.replace(/\n/gm, '\n '); // indent
let prefix = `${options.bulletListMarker } `;
const parent = node.parentNode as HTMLElement;
if (parent.nodeName === 'OL') {
var start = parent.getAttribute('start')
var index = Array.prototype.indexOf.call(parent.children, node)
prefix = (start ? Number(start) + index : index + 1) + '. '
const start = parent.getAttribute('start');
const index = Array.prototype.indexOf.call(parent.children, node);
prefix = `${start ? Number(start) + index : index + 1 }. `;
} else if (parent.classList.contains("todo-list")) {
const isChecked = node.querySelector("input[type=checkbox]:checked");
prefix = (isChecked ? "- [x] " : "- [ ] ");
@ -239,7 +238,7 @@ function buildListItemFilter(): Rule {
const result = prefix + content + (node.nextSibling && !/\n$/.test(content) ? '\n' : '');
return result;
}
}
};
}
function buildMathFilter(): Rule {
@ -270,13 +269,13 @@ function buildMathFilter(): Rule {
// Unknown.
return content;
}
}
};
}
// Taken from upstream since it's not exposed.
// https://github.com/mixmark-io/turndown/blob/master/src/commonmark-rules.js
function cleanAttribute(attribute: string | null | undefined) {
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : ''
return attribute ? attribute.replace(/(\n+\s*)+/g, '\n') : '';
}
export default {

View File

@ -314,4 +314,9 @@ $$`;
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
});
it("doesn't unescape HTML in list", () => {
const input = `* &lt;note&gt; is note.`;
const expected = /*html*/`<ul><li>&lt;note&gt; is note.</li></ul>`;
expect(markdownService.renderToHtml(input, "Title")).toStrictEqual(expected);
});
});

75
pnpm-lock.yaml generated
View File

@ -792,9 +792,9 @@ importers:
tmp:
specifier: 0.2.5
version: 0.2.5
turndown:
specifier: 7.2.2
version: 7.2.2
turnish:
specifier: 1.7.1
version: 1.7.1
unescape:
specifier: 1.0.1
version: 1.0.1
@ -1461,6 +1461,9 @@ importers:
packages:
'@adobe/css-tools@4.4.4':
resolution: {integrity: sha512-Elp+iwUx5rN5+Y8xLt5/GRoG20WGoDCQ/1Fb+1LiGtvwbDavuSk0jhD/eZdckHAuzcDzccnkv+rEjyWfRx18gg==}
'@ampproject/remapping@2.3.0':
resolution: {integrity: sha512-30iZtAPgz+LTIYoeivqYo853f02jBYSd5uGnGpkFV0M3xOt9aN73erkgYAmZU43x4VfqcnLxW9Kpg3R5LC4YYw==}
engines: {node: '>=6.0.0'}
@ -13466,6 +13469,9 @@ packages:
turndown@7.2.2:
resolution: {integrity: sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==}
turnish@1.7.1:
resolution: {integrity: sha512-NgyY7pIDABjKyg2isRgZyFPav6tOyvmqpTx3HROsKrOaE3JccP4C1P2IhAtkAZ8DkQb/O1R7HOFAkxY8uaJmcQ==}
type-check@0.4.0:
resolution: {integrity: sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==}
engines: {node: '>= 0.8.0'}
@ -14407,6 +14413,8 @@ packages:
snapshots:
'@adobe/css-tools@4.4.4': {}
'@ampproject/remapping@2.3.0':
dependencies:
'@jridgewell/gen-mapping': 0.3.13
@ -15078,6 +15086,8 @@ snapshots:
'@ckeditor/ckeditor5-core': 47.4.0
'@ckeditor/ckeditor5-upload': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-ai@47.4.0(bufferutil@4.0.9)(utf-8-validate@6.0.5)':
dependencies:
@ -15218,12 +15228,16 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
'@ckeditor/ckeditor5-widget': 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-cloud-services@47.4.0':
dependencies:
'@ckeditor/ckeditor5-core': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-code-block@47.4.0(patch_hash=2361d8caad7d6b5bddacc3a3b4aa37dbfba260b1c1b22a450413a79c1bb1ce95)':
dependencies:
@ -15416,6 +15430,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-editor-classic@47.4.0':
dependencies:
@ -15425,6 +15441,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-editor-decoupled@47.4.0':
dependencies:
@ -15434,6 +15452,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-editor-inline@47.4.0':
dependencies:
@ -15467,8 +15487,6 @@ snapshots:
'@ckeditor/ckeditor5-table': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-emoji@47.4.0':
dependencies:
@ -15525,8 +15543,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-export-word@47.4.0':
dependencies:
@ -15551,6 +15567,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-font@47.4.0':
dependencies:
@ -15625,6 +15643,8 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
'@ckeditor/ckeditor5-widget': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-html-embed@47.4.0':
dependencies:
@ -15670,8 +15690,6 @@ snapshots:
'@ckeditor/ckeditor5-widget': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-import-word@47.4.0':
dependencies:
@ -15684,8 +15702,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-indent@47.4.0':
dependencies:
@ -15697,8 +15713,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-inspector@5.0.0': {}
@ -15708,8 +15722,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-line-height@47.4.0':
dependencies:
@ -15734,8 +15746,6 @@ snapshots:
'@ckeditor/ckeditor5-widget': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-list-multi-level@47.4.0':
dependencies:
@ -15759,8 +15769,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-markdown-gfm@47.4.0':
dependencies:
@ -15798,8 +15806,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
'@ckeditor/ckeditor5-widget': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-mention@47.4.0(patch_hash=5981fb59ba35829e4dff1d39cf771000f8a8fdfa7a34b51d8af9549541f2d62d)':
dependencies:
@ -15809,8 +15815,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-merge-fields@47.4.0':
dependencies:
@ -15823,8 +15827,6 @@ snapshots:
'@ckeditor/ckeditor5-widget': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-minimap@47.4.0':
dependencies:
@ -15833,8 +15835,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-operations-compressor@47.4.0':
dependencies:
@ -15889,8 +15889,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
'@ckeditor/ckeditor5-widget': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-pagination@47.4.0':
dependencies:
@ -15998,8 +15996,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-slash-command@47.4.0':
dependencies:
@ -16012,8 +16008,6 @@ snapshots:
'@ckeditor/ckeditor5-ui': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-source-editing-enhanced@47.4.0':
dependencies:
@ -16061,8 +16055,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-table@47.4.0':
dependencies:
@ -16075,8 +16067,6 @@ snapshots:
'@ckeditor/ckeditor5-widget': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-template@47.4.0':
dependencies:
@ -16187,8 +16177,6 @@ snapshots:
'@ckeditor/ckeditor5-engine': 47.4.0
'@ckeditor/ckeditor5-utils': 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@ckeditor/ckeditor5-widget@47.4.0':
dependencies:
@ -16208,8 +16196,6 @@ snapshots:
'@ckeditor/ckeditor5-utils': 47.4.0
ckeditor5: 47.4.0
es-toolkit: 1.39.5
transitivePeerDependencies:
- supports-color
'@codemirror/autocomplete@6.18.6':
dependencies:
@ -21676,8 +21662,6 @@ snapshots:
ckeditor5-collaboration@47.4.0:
dependencies:
'@ckeditor/ckeditor5-collaboration-core': 47.4.0
transitivePeerDependencies:
- supports-color
ckeditor5-premium-features@47.4.0(bufferutil@4.0.9)(ckeditor5@47.4.0)(utf-8-validate@6.0.5):
dependencies:
@ -30073,6 +30057,11 @@ snapshots:
dependencies:
'@mixmark-io/domino': 2.2.0
turnish@1.7.1:
dependencies:
'@adobe/css-tools': 4.4.4
'@mixmark-io/domino': 2.2.0
type-check@0.4.0:
dependencies:
prelude-ls: 1.2.1