From f8c61ecde912821b2e7090c14f08e04f2f6c8020 Mon Sep 17 00:00:00 2001 From: Elian Doran Date: Wed, 29 Oct 2025 08:45:47 +0200 Subject: [PATCH] feat(export/share): use better content parsing for search --- apps/server/package.json | 3 +- .../src/services/export/zip/share_theme.ts | 5 +- pnpm-lock.yaml | 62 ++++++++++++++++--- 3 files changed, 60 insertions(+), 10 deletions(-) diff --git a/apps/server/package.json b/apps/server/package.json index e2f0ef82e..ed536368d 100644 --- a/apps/server/package.json +++ b/apps/server/package.json @@ -26,6 +26,7 @@ }, "dependencies": { "better-sqlite3": "12.4.1", + "html-to-text": "9.0.5", "node-html-parser": "7.0.1" }, "devDependencies": { @@ -35,8 +36,8 @@ "@preact/preset-vite": "2.10.2", "@triliumnext/commons": "workspace:*", "@triliumnext/express-partial-content": "workspace:*", - "@triliumnext/turndown-plugin-gfm": "workspace:*", "@triliumnext/highlightjs": "workspace:*", + "@triliumnext/turndown-plugin-gfm": "workspace:*", "@types/archiver": "7.0.0", "@types/better-sqlite3": "7.6.13", "@types/cls-hooked": "4.3.9", diff --git a/apps/server/src/services/export/zip/share_theme.ts b/apps/server/src/services/export/zip/share_theme.ts index 11ea3d2cc..bc02dff12 100644 --- a/apps/server/src/services/export/zip/share_theme.ts +++ b/apps/server/src/services/export/zip/share_theme.ts @@ -8,6 +8,7 @@ import { renderNoteForExport } from "../../../share/content_renderer"; import type BNote from "../../../becca/entities/bnote.js"; import type BBranch from "../../../becca/entities/bbranch.js"; import { getShareThemeAssetDir } from "../../../routes/assets"; +import { convert as convertToText } from "html-to-text"; const shareThemeAssetDir = getShareThemeAssetDir(); @@ -57,7 +58,9 @@ export default class ShareThemeExportProvider extends ZipExportProvider { if (note) { // Prepare search index. - searchContent = typeof content === "string" ? utils.stripTags(content) : ""; + searchContent = typeof content === "string" ? convertToText(content, { + whitespaceCharacters: "\t\r\n\f\u200b\u00a0\u2002" + }) : ""; content = renderNoteForExport(note, branch, basePath, noteMeta.notePath.slice(0, -1)); if (typeof content === "string") { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 9f5480959..7ca3f39de 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -458,6 +458,9 @@ importers: better-sqlite3: specifier: 12.4.1 version: 12.4.1 + html-to-text: + specifier: 9.0.5 + version: 9.0.5 node-html-parser: specifier: 7.0.1 version: 7.0.1 @@ -4354,6 +4357,9 @@ packages: '@scarf/scarf@1.4.0': resolution: {integrity: sha512-xxeapPiUXdZAE3che6f3xogoJPeZgig6omHEy1rIY5WVsB3H2BHNnZH+gHG6x91SCWyQCzWGsuL2Hh3ClO5/qQ==} + '@selderee/plugin-htmlparser2@0.11.0': + resolution: {integrity: sha512-P33hHGdldxGabLFjPPpaTxVolMrzrcegejx+0GxjrIb9Zv48D8yAIA/QTDR2dFl7Uz7urX8aX6+5bCZslr+gWQ==} + '@sideway/address@4.1.5': resolution: {integrity: sha512-IqO/DUQHUkPeixNQ8n0JA6102hT9CmaljNTPmQ1u8MEhBo/R4Q8eKLN/vGZxuebwOroDB4cbpjheD4+/sKFK4Q==} @@ -8491,6 +8497,10 @@ packages: resolution: {integrity: sha512-ztqyC3kLto0e9WbNp0aeP+M3kTt+nbaIveGmUxAtZa+8iFgKLUOD4YKM5j+f3QD89bra7UeumolZHKuOXnTmeQ==} engines: {node: '>=8'} + html-to-text@9.0.5: + resolution: {integrity: sha512-qY60FjREgVZL03vJU6IfMV4GDjGBIoOyvuFdpBDIX9yTlDw0TjxVBQp+P8NvpdIXNJvfWBTNul7fsAQJq2FNpg==} + engines: {node: '>=14'} + html-void-elements@3.0.0: resolution: {integrity: sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg==} @@ -9324,6 +9334,9 @@ packages: resolution: {integrity: sha512-b94GiNHQNy6JNTrt5w6zNyffMrNkXZb3KTkCZJb2V1xaEGCk093vkZ2jk3tpaeP33/OiXC+WvK9AxUebnf5nbw==} engines: {node: '>= 0.6.3'} + leac@0.6.0: + resolution: {integrity: sha512-y+SqErxb8h7nE/fiEX07jsbuhrpO9lL8eca7/Y1nuWV2moNlXhyd59iDGcRf6moVyDMbmTNzL40SUyrFU/yDpg==} + leaflet-gpx@2.2.0: resolution: {integrity: sha512-iVUx6o0ydLn2ikYSVLuWnr0k/CDAOIUtmvQ91AI24/PXuIFIb+iEIJMHTQfvGhMCKcFrwd9ZaFYH7P/46tgGhw==} @@ -10592,6 +10605,9 @@ packages: parse5@7.3.0: resolution: {integrity: sha512-IInvU7fabl34qmi9gY8XOVxhYyMyuH2xUNpb2q8/Y+7552KlejkRvqvD19nMoUW/uQGGbqNpA6Tufu5FL5BZgw==} + parseley@0.12.1: + resolution: {integrity: sha512-e6qHKe3a9HWr0oMRVDTRhKce+bRO8VGQR3NyVwcjwrbhMmFCX9KszEV35+rn4AdilFAq9VPxP/Fe1wC9Qjd2lw==} + parseurl@1.3.3: resolution: {integrity: sha512-CiyeOxFT/JZyN5m0z9PfXw4SCBJ6Sygz1Dpl0wqjlhDEGGBP1GnsUVEL0p63hoG1fcj3fHynXi9NYO4nWOL+qQ==} engines: {node: '>= 0.8'} @@ -10673,6 +10689,9 @@ packages: resolution: {integrity: sha512-nh39Mo1eGWmZS7y+mK/dQIqg7S1lp38DpRxkyoHf0ZcUs/HDc+yyTjuOtTvSMZHmfSLuSQaX945u05Y2Q6UWZg==} engines: {node: '>=14', npm: '>=7'} + peberminta@0.9.0: + resolution: {integrity: sha512-XIxfHpEuSJbITd1H3EeQwpcZbTLHc+VVr8ANI9t5sit565tsI4/xK3KWTUFE2e6QiangUkh3B0jihzmGnNrRsQ==} + peek-readable@4.1.0: resolution: {integrity: sha512-ZI3LnwUv5nOGbQzD9c2iDG6toheuXSZP5esSHBjopsXH4dg19soufvpUGA3uohi5anFtGb2lhAVdHzH6R/Evvg==} engines: {node: '>=8'} @@ -12321,6 +12340,9 @@ packages: secure-compare@3.0.1: resolution: {integrity: sha512-AckIIV90rPDcBcglUwXPF3kg0P0qmPsPXAj6BBEENQE1p5yA1xfmDJzfi1Tappj37Pv2mVbKpL3Z1T+Nn7k1Qw==} + selderee@0.11.0: + resolution: {integrity: sha512-5TF+l7p4+OsnP8BCCvSyZiSPc4x4//p5uPwK8TCnVPJYRmU2aYKMpOXvw8zM5a5JvuuCGN1jmsMwuU2W02ukfA==} + select-hose@2.0.0: resolution: {integrity: sha512-mEugaLK+YfkijB4fx0e6kImuJdCIt2LxCRcbEYPqRGCs4F2ogyfZU5IAZRdjCP8JPq2AtdNoC/Dux63d9Kiryg==} @@ -15081,6 +15103,8 @@ snapshots: '@ckeditor/ckeditor5-utils': 47.1.0 '@ckeditor/ckeditor5-watchdog': 47.1.0 es-toolkit: 1.39.5 + transitivePeerDependencies: + - supports-color '@ckeditor/ckeditor5-dev-build-tools@43.1.0(@swc/helpers@0.5.17)(tslib@2.8.1)(typescript@5.9.3)': dependencies: @@ -15245,6 +15269,8 @@ snapshots: '@ckeditor/ckeditor5-utils': 47.1.0 ckeditor5: 47.1.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41) es-toolkit: 1.39.5 + transitivePeerDependencies: + - supports-color '@ckeditor/ckeditor5-editor-classic@47.1.0': dependencies: @@ -15272,6 +15298,8 @@ snapshots: '@ckeditor/ckeditor5-utils': 47.1.0 ckeditor5: 47.1.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41) es-toolkit: 1.39.5 + transitivePeerDependencies: + - supports-color '@ckeditor/ckeditor5-editor-multi-root@47.1.0': dependencies: @@ -15524,8 +15552,6 @@ snapshots: '@ckeditor/ckeditor5-ui': 47.1.0 '@ckeditor/ckeditor5-utils': 47.1.0 ckeditor5: 47.1.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41) - transitivePeerDependencies: - - supports-color '@ckeditor/ckeditor5-line-height@47.1.0': dependencies: @@ -15768,8 +15794,6 @@ snapshots: '@ckeditor/ckeditor5-ui': 47.1.0 '@ckeditor/ckeditor5-utils': 47.1.0 ckeditor5: 47.1.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41) - transitivePeerDependencies: - - supports-color '@ckeditor/ckeditor5-restricted-editing@47.1.0': dependencies: @@ -15814,8 +15838,6 @@ snapshots: '@ckeditor/ckeditor5-ui': 47.1.0 '@ckeditor/ckeditor5-utils': 47.1.0 ckeditor5: 47.1.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41) - transitivePeerDependencies: - - supports-color '@ckeditor/ckeditor5-slash-command@47.1.0': dependencies: @@ -16024,8 +16046,6 @@ snapshots: '@ckeditor/ckeditor5-utils': 47.1.0 ckeditor5: 47.1.0(patch_hash=8331a09d41443b39ea1c784daaccfeb0da4f9065ed556e7de92e9c77edd9eb41) es-toolkit: 1.39.5 - transitivePeerDependencies: - - supports-color '@codemirror/autocomplete@6.18.6': dependencies: @@ -18703,6 +18723,11 @@ snapshots: '@scarf/scarf@1.4.0': {} + '@selderee/plugin-htmlparser2@0.11.0': + dependencies: + domhandler: 5.0.3 + selderee: 0.11.0 + '@sideway/address@4.1.5': dependencies: '@hapi/hoek': 9.3.0 @@ -24154,6 +24179,14 @@ snapshots: html-tags@3.3.1: {} + html-to-text@9.0.5: + dependencies: + '@selderee/plugin-htmlparser2': 0.11.0 + deepmerge: 4.3.1 + dom-serializer: 2.0.0 + htmlparser2: 8.0.2 + selderee: 0.11.0 + html-void-elements@3.0.0: {} html2plaintext@2.1.4: @@ -25061,6 +25094,8 @@ snapshots: dependencies: readable-stream: 2.3.8 + leac@0.6.0: {} + leaflet-gpx@2.2.0: {} leaflet@1.9.4: {} @@ -26731,6 +26766,11 @@ snapshots: dependencies: entities: 6.0.0 + parseley@0.12.1: + dependencies: + leac: 0.6.0 + peberminta: 0.9.0 + parseurl@1.3.3: {} path-browserify@1.0.1: {} @@ -26789,6 +26829,8 @@ snapshots: pe-library@1.0.1: {} + peberminta@0.9.0: {} + peek-readable@4.1.0: {} peek-readable@7.0.0: {} @@ -28549,6 +28591,10 @@ snapshots: secure-compare@3.0.1: {} + selderee@0.11.0: + dependencies: + parseley: 0.12.1 + select-hose@2.0.0: {} selfsigned@2.4.1: