mirror of
https://github.com/zadam/trilium.git
synced 2025-10-20 15:19:01 +02:00
feat(import/single): support UTF-16 LE with BOM for HTML
This commit is contained in:
parent
39f00bd568
commit
fd4f35e879
30
package-lock.json
generated
30
package-lock.json
generated
@ -31,6 +31,7 @@
|
||||
"better-sqlite3": "11.8.1",
|
||||
"bootstrap": "5.3.3",
|
||||
"boxicons": "2.1.4",
|
||||
"chardet": "2.0.0",
|
||||
"cheerio": "1.0.0",
|
||||
"chokidar": "4.0.3",
|
||||
"cls-hooked": "4.2.2",
|
||||
@ -97,6 +98,7 @@
|
||||
"source-map-support": "0.5.21",
|
||||
"split.js": "1.6.5",
|
||||
"stream-throttle": "0.1.3",
|
||||
"strip-bom": "5.0.0",
|
||||
"striptags": "3.2.0",
|
||||
"swagger-ui-express": "5.0.1",
|
||||
"tmp": "0.2.3",
|
||||
@ -6175,6 +6177,12 @@
|
||||
"url": "https://github.com/chalk/chalk?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/chardet": {
|
||||
"version": "2.0.0",
|
||||
"resolved": "https://registry.npmjs.org/chardet/-/chardet-2.0.0.tgz",
|
||||
"integrity": "sha512-xVgPpulCooDjY6zH4m9YW3jbkaBe3FKIAvF5sj5t7aBNsVl2ljIE+xwJ4iNgiDZHFQvNIpjdKdVOQvvk5ZfxbQ==",
|
||||
"license": "MIT"
|
||||
},
|
||||
"node_modules/check-error": {
|
||||
"version": "2.1.1",
|
||||
"resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.1.tgz",
|
||||
@ -11889,6 +11897,16 @@
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/load-json-file/node_modules/strip-bom": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
|
||||
"integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==",
|
||||
"dev": true,
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/loader-runner": {
|
||||
"version": "4.3.0",
|
||||
"resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.0.tgz",
|
||||
@ -15917,13 +15935,15 @@
|
||||
}
|
||||
},
|
||||
"node_modules/strip-bom": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz",
|
||||
"integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==",
|
||||
"dev": true,
|
||||
"version": "5.0.0",
|
||||
"resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-5.0.0.tgz",
|
||||
"integrity": "sha512-p+byADHF7SzEcVnLvc/r3uognM1hUhObuHXxJcgLCfD194XAkaLbjq3Wzb0N5G2tgIjH0dgT708Z51QxMeu60A==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
"node": ">=12"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/sindresorhus"
|
||||
}
|
||||
},
|
||||
"node_modules/strip-eof": {
|
||||
|
10
package.json
10
package.json
@ -26,7 +26,6 @@
|
||||
"server:start-test": "npm run server:switch && rimraf ./data-test && cross-env TRILIUM_DATA_DIR=./data-test TRILIUM_SYNC_SERVER_HOST=http://tsyncserver:4000 TRILIUM_ENV=dev TRILIUM_PORT=9999 nodemon src/main.ts",
|
||||
"server:qstart": "npm run server:switch && npm run server:start",
|
||||
"server:switch": "rimraf ./node_modules/better-sqlite3 && npm install",
|
||||
|
||||
"electron:start": "cross-env NODE_OPTIONS=\"--import tsx\" TRILIUM_DATA_DIR=./data TRILIUM_SYNC_SERVER_HOST=http://tsyncserver:4000 TRILIUM_ENV=dev electron ./electron-main.ts --inspect=5858 .",
|
||||
"electron:start-no-dir": "cross-env NODE_OPTIONS=\"--import tsx\" TRILIUM_ENV=dev electron --inspect=5858 .",
|
||||
"electron:start-nix": "electron-rebuild --version 33.3.1 && cross-env NODE_OPTIONS=\"--import tsx\" TRILIUM_DATA_DIR=./data TRILIUM_SYNC_SERVER_HOST=http://tsyncserver:4000 TRILIUM_ENV=dev nix-shell -p electron_33 --run \"electron ./electron-main.ts --inspect=5858 .\"",
|
||||
@ -37,30 +36,23 @@
|
||||
"electron:start-prod-nix-no-dir": "electron-rebuild --version 33.3.1 && npm run build:prepare-dist && cross-env TRILIUM_ENV=dev nix-shell -p electron_33 --run \"electron ./dist/electron-main.js --inspect=5858 .\"",
|
||||
"electron:qstart": "npm run electron:switch && npm run electron:start",
|
||||
"electron:switch": "electron-rebuild",
|
||||
|
||||
"electron-forge:start": "npm run build:prepare-dist && electron-forge start",
|
||||
"electron-forge:make": "npm run build:prepare-dist && electron-forge make",
|
||||
"electron-forge:package": "npm run build:prepare-dist && electron-forge package",
|
||||
|
||||
"docs:build-backend": "rimraf ./docs/backend_api && typedoc ./docs/backend_api src/becca/entities/*.ts src/services/backend_script_api.ts src/services/sql.ts",
|
||||
"docs:build-frontend": "rimraf ./docs/frontend_api && jsdoc -c jsdoc-conf.json -d ./docs/frontend_api src/public/app/entities/*.js src/public/app/services/frontend_script_api.js src/public/app/widgets/basic_widget.js src/public/app/widgets/note_context_aware_widget.js src/public/app/widgets/right_panel_widget.js",
|
||||
"docs:build": "npm run docs:build-backend && npm run docs:build-frontend",
|
||||
|
||||
"build:webpack": "tsx node_modules/webpack/bin/webpack.js -c webpack.config.ts",
|
||||
"build:prepare-dist": "npm run build:webpack && rimraf ./dist && tsc && tsx ./bin/copy-dist.ts",
|
||||
|
||||
"test": "cross-env TRILIUM_DATA_DIR=./integration-tests/db TRILIUM_INTEGRATION_TEST=memory vitest",
|
||||
"test:coverage": "cross-env TRILIUM_DATA_DIR=./integration-tests/db vitest --coverage",
|
||||
"test:playwright": "playwright test",
|
||||
|
||||
"test:integration-edit-db": "cross-env TRILIUM_INTEGRATION_TEST=edit TRILIUM_PORT=8081 TRILIUM_ENV=dev TRILIUM_DATA_DIR=./integration-tests/db nodemon src/main.ts",
|
||||
"test:integration-mem-db": "cross-env TRILIUM_INTEGRATION_TEST=memory TRILIUM_PORT=8082 TRILIUM_DATA_DIR=./integration-tests/db nodemon src/main.ts",
|
||||
"test:integration-mem-db-dev": "cross-env TRILIUM_INTEGRATION_TEST=memory TRILIUM_PORT=8082 TRILIUM_ENV=dev TRILIUM_DATA_DIR=./integration-tests/db nodemon src/main.ts",
|
||||
|
||||
"dev:watch-dist": "tsx ./bin/watch-dist.ts",
|
||||
"dev:prettier-check": "prettier . --check",
|
||||
"dev:prettier-fix": "prettier . --write",
|
||||
|
||||
"chore:update-build-info": "tsx bin/update-build-info.ts",
|
||||
"chore:ci-update-nightly-version": "tsx ./bin/update-nightly-version.ts",
|
||||
"chore:generate-document": "cross-env nodemon ./bin/generate_document.ts 1000",
|
||||
@ -89,6 +81,7 @@
|
||||
"better-sqlite3": "11.8.1",
|
||||
"bootstrap": "5.3.3",
|
||||
"boxicons": "2.1.4",
|
||||
"chardet": "2.0.0",
|
||||
"cheerio": "1.0.0",
|
||||
"chokidar": "4.0.3",
|
||||
"cls-hooked": "4.2.2",
|
||||
@ -155,6 +148,7 @@
|
||||
"source-map-support": "0.5.21",
|
||||
"split.js": "1.6.5",
|
||||
"stream-throttle": "0.1.3",
|
||||
"strip-bom": "5.0.0",
|
||||
"striptags": "3.2.0",
|
||||
"swagger-ui-express": "5.0.1",
|
||||
"tmp": "0.2.3",
|
||||
|
BIN
src/services/import/samples/IREN Reports Q2 FY25 Results.htm
Normal file
BIN
src/services/import/samples/IREN Reports Q2 FY25 Results.htm
Normal file
Binary file not shown.
@ -1,4 +1,4 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { beforeAll, describe, expect, it } from "vitest";
|
||||
import fs from "fs";
|
||||
import path from "path";
|
||||
import { fileURLToPath } from "url";
|
||||
@ -12,38 +12,47 @@ import { initializeTranslations } from "../i18n.js";
|
||||
import single from "./single.js";
|
||||
const scriptDir = dirname(fileURLToPath(import.meta.url));
|
||||
|
||||
describe("processNoteContent", () => {
|
||||
it("treats single MDX as Markdown", async () => {
|
||||
const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", "Text Note.mdx"));
|
||||
async function testImport(fileName: string, mimetype: string): Promise<BNote> {
|
||||
const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", fileName));
|
||||
const taskContext = TaskContext.getInstance("import-mdx", "import", {
|
||||
textImportedAsText: true
|
||||
});
|
||||
|
||||
await new Promise<void>((resolve, reject) => {
|
||||
return new Promise<BNote>((resolve, reject) => {
|
||||
cls.init(async () => {
|
||||
initializeTranslations();
|
||||
sql_init.initializeDb();
|
||||
await sql_init.dbReady;
|
||||
|
||||
const rootNote = becca.getNote("root");
|
||||
if (!rootNote) {
|
||||
reject("Missing root note.");
|
||||
}
|
||||
|
||||
const importedNote = single.importSingleFile(taskContext, {
|
||||
originalname: "Text Note.mdx",
|
||||
mimetype: "text/mdx",
|
||||
originalname: fileName,
|
||||
mimetype,
|
||||
buffer: mdxSample
|
||||
}, rootNote as BNote);
|
||||
try {
|
||||
resolve(importedNote);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
describe("processNoteContent", () => {
|
||||
beforeAll(async () => {
|
||||
initializeTranslations();
|
||||
sql_init.initializeDb();
|
||||
await sql_init.dbReady;
|
||||
});
|
||||
|
||||
it("treats single MDX as Markdown", async () => {
|
||||
const importedNote = await testImport("Text Note.mdx", "text/mdx");
|
||||
expect(importedNote.mime).toBe("text/html");
|
||||
expect(importedNote.type).toBe("text");
|
||||
expect(importedNote.title).toBe("Text Note");
|
||||
} catch (e) {
|
||||
reject(e);
|
||||
}
|
||||
resolve();
|
||||
});
|
||||
});
|
||||
|
||||
it("supports HTML note with UTF-16 (w/ BOM) from Microsoft Outlook", async () => {
|
||||
const importedNote = await testImport("IREN Reports Q2 FY25 Results.htm", "text/html");
|
||||
expect(importedNote.mime).toBe("text/html");
|
||||
expect(importedNote.title).toBe("IREN Reports Q2 FY25 Results");
|
||||
expect(importedNote.getContent().toString().substring(0, 5)).toEqual("<html");
|
||||
});
|
||||
})
|
||||
|
@ -3,6 +3,8 @@
|
||||
import type BNote from "../../becca/entities/bnote.js";
|
||||
import type TaskContext from "../task_context.js";
|
||||
|
||||
import chardet from "chardet";
|
||||
import stripBom from "strip-bom";
|
||||
import noteService from "../../services/notes.js";
|
||||
import imageService from "../../services/image.js";
|
||||
import protectedSessionService from "../protected_session.js";
|
||||
@ -146,8 +148,23 @@ function importMarkdown(taskContext: TaskContext, file: File, parentNote: BNote)
|
||||
return note;
|
||||
}
|
||||
|
||||
function processStringOrBuffer(data: string | Buffer) {
|
||||
if (!Buffer.isBuffer(data)) {
|
||||
return data;
|
||||
}
|
||||
|
||||
const detectedEncoding = chardet.detect(data);
|
||||
switch (detectedEncoding) {
|
||||
case "UTF-16LE":
|
||||
return stripBom(data.toString("utf-16le"));
|
||||
case "UTF-8":
|
||||
default:
|
||||
return data.toString("utf-8");
|
||||
}
|
||||
}
|
||||
|
||||
function importHtml(taskContext: TaskContext, file: File, parentNote: BNote) {
|
||||
let content = file.buffer.toString("utf-8");
|
||||
let content = processStringOrBuffer(file.buffer);
|
||||
|
||||
// Try to get title from HTML first, fall back to filename
|
||||
// We do this before sanitization since that turns all <h1>s into <h2>
|
||||
|
Loading…
x
Reference in New Issue
Block a user