diff --git a/package-lock.json b/package-lock.json index 894c82f64..8eb6db485 100644 --- a/package-lock.json +++ b/package-lock.json @@ -31,6 +31,7 @@ "better-sqlite3": "11.8.1", "bootstrap": "5.3.3", "boxicons": "2.1.4", + "chardet": "2.0.0", "cheerio": "1.0.0", "chokidar": "4.0.3", "cls-hooked": "4.2.2", @@ -97,6 +98,7 @@ "source-map-support": "0.5.21", "split.js": "1.6.5", "stream-throttle": "0.1.3", + "strip-bom": "5.0.0", "striptags": "3.2.0", "swagger-ui-express": "5.0.1", "tmp": "0.2.3", @@ -6175,6 +6177,12 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/chardet": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/chardet/-/chardet-2.0.0.tgz", + "integrity": "sha512-xVgPpulCooDjY6zH4m9YW3jbkaBe3FKIAvF5sj5t7aBNsVl2ljIE+xwJ4iNgiDZHFQvNIpjdKdVOQvvk5ZfxbQ==", + "license": "MIT" + }, "node_modules/check-error": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/check-error/-/check-error-2.1.1.tgz", @@ -11889,6 +11897,16 @@ "node": ">=4" } }, + "node_modules/load-json-file/node_modules/strip-bom": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", + "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=4" + } + }, "node_modules/loader-runner": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.0.tgz", @@ -15917,13 +15935,15 @@ } }, "node_modules/strip-bom": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-3.0.0.tgz", - "integrity": "sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==", - "dev": true, + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/strip-bom/-/strip-bom-5.0.0.tgz", + "integrity": "sha512-p+byADHF7SzEcVnLvc/r3uognM1hUhObuHXxJcgLCfD194XAkaLbjq3Wzb0N5G2tgIjH0dgT708Z51QxMeu60A==", "license": "MIT", "engines": { - "node": ">=4" + "node": ">=12" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" } }, "node_modules/strip-eof": { diff --git a/package.json b/package.json index e14d45979..401f3d5ad 100644 --- a/package.json +++ b/package.json @@ -26,7 +26,6 @@ "server:start-test": "npm run server:switch && rimraf ./data-test && cross-env TRILIUM_DATA_DIR=./data-test TRILIUM_SYNC_SERVER_HOST=http://tsyncserver:4000 TRILIUM_ENV=dev TRILIUM_PORT=9999 nodemon src/main.ts", "server:qstart": "npm run server:switch && npm run server:start", "server:switch": "rimraf ./node_modules/better-sqlite3 && npm install", - "electron:start": "cross-env NODE_OPTIONS=\"--import tsx\" TRILIUM_DATA_DIR=./data TRILIUM_SYNC_SERVER_HOST=http://tsyncserver:4000 TRILIUM_ENV=dev electron ./electron-main.ts --inspect=5858 .", "electron:start-no-dir": "cross-env NODE_OPTIONS=\"--import tsx\" TRILIUM_ENV=dev electron --inspect=5858 .", "electron:start-nix": "electron-rebuild --version 33.3.1 && cross-env NODE_OPTIONS=\"--import tsx\" TRILIUM_DATA_DIR=./data TRILIUM_SYNC_SERVER_HOST=http://tsyncserver:4000 TRILIUM_ENV=dev nix-shell -p electron_33 --run \"electron ./electron-main.ts --inspect=5858 .\"", @@ -37,30 +36,23 @@ "electron:start-prod-nix-no-dir": "electron-rebuild --version 33.3.1 && npm run build:prepare-dist && cross-env TRILIUM_ENV=dev nix-shell -p electron_33 --run \"electron ./dist/electron-main.js --inspect=5858 .\"", "electron:qstart": "npm run electron:switch && npm run electron:start", "electron:switch": "electron-rebuild", - "electron-forge:start": "npm run build:prepare-dist && electron-forge start", "electron-forge:make": "npm run build:prepare-dist && electron-forge make", "electron-forge:package": "npm run build:prepare-dist && electron-forge package", - "docs:build-backend": "rimraf ./docs/backend_api && typedoc ./docs/backend_api src/becca/entities/*.ts src/services/backend_script_api.ts src/services/sql.ts", "docs:build-frontend": "rimraf ./docs/frontend_api && jsdoc -c jsdoc-conf.json -d ./docs/frontend_api src/public/app/entities/*.js src/public/app/services/frontend_script_api.js src/public/app/widgets/basic_widget.js src/public/app/widgets/note_context_aware_widget.js src/public/app/widgets/right_panel_widget.js", "docs:build": "npm run docs:build-backend && npm run docs:build-frontend", - "build:webpack": "tsx node_modules/webpack/bin/webpack.js -c webpack.config.ts", "build:prepare-dist": "npm run build:webpack && rimraf ./dist && tsc && tsx ./bin/copy-dist.ts", - "test": "cross-env TRILIUM_DATA_DIR=./integration-tests/db TRILIUM_INTEGRATION_TEST=memory vitest", "test:coverage": "cross-env TRILIUM_DATA_DIR=./integration-tests/db vitest --coverage", "test:playwright": "playwright test", - "test:integration-edit-db": "cross-env TRILIUM_INTEGRATION_TEST=edit TRILIUM_PORT=8081 TRILIUM_ENV=dev TRILIUM_DATA_DIR=./integration-tests/db nodemon src/main.ts", "test:integration-mem-db": "cross-env TRILIUM_INTEGRATION_TEST=memory TRILIUM_PORT=8082 TRILIUM_DATA_DIR=./integration-tests/db nodemon src/main.ts", "test:integration-mem-db-dev": "cross-env TRILIUM_INTEGRATION_TEST=memory TRILIUM_PORT=8082 TRILIUM_ENV=dev TRILIUM_DATA_DIR=./integration-tests/db nodemon src/main.ts", - "dev:watch-dist": "tsx ./bin/watch-dist.ts", "dev:prettier-check": "prettier . --check", "dev:prettier-fix": "prettier . --write", - "chore:update-build-info": "tsx bin/update-build-info.ts", "chore:ci-update-nightly-version": "tsx ./bin/update-nightly-version.ts", "chore:generate-document": "cross-env nodemon ./bin/generate_document.ts 1000", @@ -89,6 +81,7 @@ "better-sqlite3": "11.8.1", "bootstrap": "5.3.3", "boxicons": "2.1.4", + "chardet": "2.0.0", "cheerio": "1.0.0", "chokidar": "4.0.3", "cls-hooked": "4.2.2", @@ -155,6 +148,7 @@ "source-map-support": "0.5.21", "split.js": "1.6.5", "stream-throttle": "0.1.3", + "strip-bom": "5.0.0", "striptags": "3.2.0", "swagger-ui-express": "5.0.1", "tmp": "0.2.3", diff --git a/src/services/import/samples/IREN Reports Q2 FY25 Results.htm b/src/services/import/samples/IREN Reports Q2 FY25 Results.htm new file mode 100644 index 000000000..361ceb340 Binary files /dev/null and b/src/services/import/samples/IREN Reports Q2 FY25 Results.htm differ diff --git a/src/services/import/single.spec.ts b/src/services/import/single.spec.ts index 716eb7b91..e650c8e98 100644 --- a/src/services/import/single.spec.ts +++ b/src/services/import/single.spec.ts @@ -1,4 +1,4 @@ -import { describe, expect, it } from "vitest"; +import { beforeAll, describe, expect, it } from "vitest"; import fs from "fs"; import path from "path"; import { fileURLToPath } from "url"; @@ -12,38 +12,47 @@ import { initializeTranslations } from "../i18n.js"; import single from "./single.js"; const scriptDir = dirname(fileURLToPath(import.meta.url)); -describe("processNoteContent", () => { - it("treats single MDX as Markdown", async () => { - const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", "Text Note.mdx")); - const taskContext = TaskContext.getInstance("import-mdx", "import", { - textImportedAsText: true - }); +async function testImport(fileName: string, mimetype: string): Promise { + const mdxSample = fs.readFileSync(path.join(scriptDir, "samples", fileName)); + const taskContext = TaskContext.getInstance("import-mdx", "import", { + textImportedAsText: true + }); - await new Promise((resolve, reject) => { - cls.init(async () => { - initializeTranslations(); - sql_init.initializeDb(); - await sql_init.dbReady; + return new Promise((resolve, reject) => { + cls.init(async () => { + const rootNote = becca.getNote("root"); + if (!rootNote) { + reject("Missing root note."); + } - const rootNote = becca.getNote("root"); - if (!rootNote) { - reject("Missing root note."); - } - - const importedNote = single.importSingleFile(taskContext, { - originalname: "Text Note.mdx", - mimetype: "text/mdx", - buffer: mdxSample - }, rootNote as BNote); - try { - expect(importedNote.mime).toBe("text/html"); - expect(importedNote.type).toBe("text"); - expect(importedNote.title).toBe("Text Note"); - } catch (e) { - reject(e); - } - resolve(); - }); + const importedNote = single.importSingleFile(taskContext, { + originalname: fileName, + mimetype, + buffer: mdxSample + }, rootNote as BNote); + resolve(importedNote); }); }); +} + +describe("processNoteContent", () => { + beforeAll(async () => { + initializeTranslations(); + sql_init.initializeDb(); + await sql_init.dbReady; + }); + + it("treats single MDX as Markdown", async () => { + const importedNote = await testImport("Text Note.mdx", "text/mdx"); + expect(importedNote.mime).toBe("text/html"); + expect(importedNote.type).toBe("text"); + expect(importedNote.title).toBe("Text Note"); + }); + + it("supports HTML note with UTF-16 (w/ BOM) from Microsoft Outlook", async () => { + const importedNote = await testImport("IREN Reports Q2 FY25 Results.htm", "text/html"); + expect(importedNote.mime).toBe("text/html"); + expect(importedNote.title).toBe("IREN Reports Q2 FY25 Results"); + expect(importedNote.getContent().toString().substring(0, 5)).toEqual("s into