From ba9108211569db2573959055a01818939957427f Mon Sep 17 00:00:00 2001 From: Panagiotis Papadopoulos Date: Sat, 18 Jan 2025 18:16:15 +0100 Subject: [PATCH 1/5] test(import/utils): add test for extractHtmlTitle --- src/services/import/utils.spec.ts | 53 +++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 src/services/import/utils.spec.ts diff --git a/src/services/import/utils.spec.ts b/src/services/import/utils.spec.ts new file mode 100644 index 000000000..0da7dd6ca --- /dev/null +++ b/src/services/import/utils.spec.ts @@ -0,0 +1,53 @@ +import { describe, it, expect } from "vitest"; +import importUtils from "./utils.js"; + + +describe("#extractHtmlTitle", () => { + + const htmlWithNotTitle = ` + + +
abc
+ + `; + + const htmlWithTitle = ` + + Test Title + + +
abc
+ + `; + + + const htmlWithTitleWOpeningBracket = ` + + Test < Title + + +
abc
+ + `; + + type TestCaseExtractHtmlTitle = [htmlContent: string, expected: string | null, description: string]; + + const testCases: TestCaseExtractHtmlTitle[] = [ + [htmlWithTitle, "Test Title", "with existing tag"], + [htmlWithTitleWOpeningBracket, null, "with existing <title> tag, that includes '<'"], + [htmlWithNotTitle, null, "without existing <title> tag"], + ["", null, "with empty content"] + ]; + + testCases.forEach((testCase) => { + return it(`${(testCase[2])}, it should return '${testCase[1]}'`, () => { + const [value, expected] = testCase; + const actual = importUtils.extractHtmlTitle(value); + expect(actual).toStrictEqual(expected); + }); + }); +}) + +describe.todo("#handleH1", () => { + //TODO +}) From e1c949aa10c912fdb7b2c9c1f0beeecba8db5d94 Mon Sep 17 00:00:00 2001 From: Panagiotis Papadopoulos <pano_90@gmx.net> Date: Sat, 18 Jan 2025 23:46:54 +0100 Subject: [PATCH 2/5] test(import/utils): add test for #handleH1/rework previous tests --- src/services/import/utils.spec.ts | 90 ++++++++++++++++++++++++------- 1 file changed, 71 insertions(+), 19 deletions(-) diff --git a/src/services/import/utils.spec.ts b/src/services/import/utils.spec.ts index 0da7dd6ca..ae6fe45b2 100644 --- a/src/services/import/utils.spec.ts +++ b/src/services/import/utils.spec.ts @@ -1,17 +1,17 @@ import { describe, it, expect } from "vitest"; import importUtils from "./utils.js"; +type TestCase<T extends (...args: any) => any> = [desc: string, fnParams: Parameters<T>, expected: ReturnType<T>]; describe("#extractHtmlTitle", () => { - - const htmlWithNotTitle = ` + const htmlWithNoTitle = ` <html> <body> <div>abc</div> </body> </html>`; - const htmlWithTitle = ` + const htmlWithTitle = ` <html><head> <title>Test Title @@ -20,8 +20,7 @@ describe("#extractHtmlTitle", () => { `; - - const htmlWithTitleWOpeningBracket = ` + const htmlWithTitleWOpeningBracket = ` Test < Title @@ -30,24 +29,77 @@ describe("#extractHtmlTitle", () => { `; - type TestCaseExtractHtmlTitle = [htmlContent: string, expected: string | null, description: string]; + type TestCaseExtractHtmlTitle = [htmlContent: string, expected: string | null, description: string]; - const testCases: TestCaseExtractHtmlTitle[] = [ - [htmlWithTitle, "Test Title", "with existing tag"], - [htmlWithTitleWOpeningBracket, null, "with existing <title> tag, that includes '<'"], - [htmlWithNotTitle, null, "without existing <title> tag"], - ["", null, "with empty content"] - ]; + // prettier-ignore + const testCases: TestCase<typeof importUtils.extractHtmlTitle>[] = [ + [ + "w/ existing <title> tag, it should return the content of the title tag", + [htmlWithTitle], + "Test Title" + ], + [ + // @TriliumNextTODO: this seems more like an unwanted behaviour to me – check if this needs rather fixing + "with existing <title> tag, that includes an opening HTML tag '<', it should return null", + [htmlWithTitleWOpeningBracket], + null + ], + [ + "w/o an existing <title> tag, it should reutrn null", + [htmlWithNoTitle], + null + ], + [ + "w/ empty string content, it should return null", + [""], + null + ] + ]; testCases.forEach((testCase) => { - return it(`${(testCase[2])}, it should return '${testCase[1]}'`, () => { - const [value, expected] = testCase; - const actual = importUtils.extractHtmlTitle(value); + const [desc, fnParams, expected] = testCase; + return it(desc, () => { + const actual = importUtils.extractHtmlTitle(...fnParams); expect(actual).toStrictEqual(expected); }); }); -}) +}); -describe.todo("#handleH1", () => { - //TODO -}) +describe("#handleH1", () => { + // prettier-ignore + const testCases: TestCase<typeof importUtils.handleH1>[] = [ + [ + "w/ single <h1> tag w/ identical text content as the title tag: the <h1> tag should be stripped", + ["<h1>Title</h1>", "Title"], + "" + ], + [ + "w/ multiple <h1> tags, with the fist matching the title tag: the first <h1> tag should be stripped and subsequent tags converted to <h2>", + ["<h1>Title</h1><h1>Header 1</h1><h1>Header 2</h1>", "Title"], + "<h2>Header 1</h2><h2>Header 2</h2>" + ], + [ + "w/ no <h1> tag and only <h2> tags, it should not cause any changes and return the same content", + ["<h2>Heading 1</h2><h2>Heading 2</h2>", "Title"], + "<h2>Heading 1</h2><h2>Heading 2</h2>" + ], + [ + "w/ multiple <h1> tags, and the 1st matching the title tag, it should strip ONLY the very first occurence of the <h1> tags in the returned content", + ["<h1>Topic ABC</h1><h1>Heading 1</h1><h1>Topic ABC</h1>", "Topic ABC"], + "<h2>Heading 1</h2><h2>Topic ABC</h2>" + ], + [ + "w/ multiple <h1> tags, and the 1st matching NOT the title tag, it should NOT strip any other <h1> tags", + ["<h1>Introduction</h1><h1>Topic ABC</h1><h1>Summary</h1>", "Topic ABC"], + "<h2>Introduction</h2><h2>Topic ABC</h2><h2>Summary</h2>" + ] + ]; + + testCases.forEach((testCase) => { + const [desc, fnParams, expected] = testCase; + return it(desc, () => { + const actual = importUtils.handleH1(...fnParams); + expect(actual).toStrictEqual(expected); + }); + }); +}); From 1de9bc7c6f955772c23f00647127bd6d1c097bbd Mon Sep 17 00:00:00 2001 From: Panagiotis Papadopoulos <pano_90@gmx.net> Date: Tue, 21 Jan 2025 00:04:05 +0100 Subject: [PATCH 3/5] fix(import/utils.handleH1): fix stripping of all <h1> tags that match title now it will only strip the very first tag that if it matches the title, otherwise it gets turned into a h2 tag fixes #1016 --- src/services/import/utils.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/services/import/utils.ts b/src/services/import/utils.ts index ec4bbf35a..2f4227f0a 100644 --- a/src/services/import/utils.ts +++ b/src/services/import/utils.ts @@ -1,10 +1,14 @@ "use strict"; function handleH1(content: string, title: string) { + let isFirstH1Handled = false; + content = content.replace(/<h1[^>]*>([^<]*)<\/h1>/gi, (match, text) => { - if (title.trim() === text.trim()) { + if (title.trim() === text.trim() && !isFirstH1Handled) { + isFirstH1Handled = true; return ""; // remove whole H1 tag } else { + isFirstH1Handled = true; return `<h2>${text}</h2>`; } }); From 2296d1a6ba351c861c8552ff6f3fdf3b453c4cac Mon Sep 17 00:00:00 2001 From: Panagiotis Papadopoulos <pano_90@gmx.net> Date: Tue, 21 Jan 2025 00:25:46 +0100 Subject: [PATCH 4/5] refactor(import/utils.handleH1): simplify handleH1 --- src/services/import/utils.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/services/import/utils.ts b/src/services/import/utils.ts index 2f4227f0a..41c42ae22 100644 --- a/src/services/import/utils.ts +++ b/src/services/import/utils.ts @@ -3,16 +3,17 @@ function handleH1(content: string, title: string) { let isFirstH1Handled = false; - content = content.replace(/<h1[^>]*>([^<]*)<\/h1>/gi, (match, text) => { - if (title.trim() === text.trim() && !isFirstH1Handled) { + return content.replace(/<h1[^>]*>([^<]*)<\/h1>/gi, (match, text) => { + const convertedContent = `<h2>${text}</h2>`; + + // strip away very first found h1 tag, if it matches the title + if (!isFirstH1Handled) { isFirstH1Handled = true; - return ""; // remove whole H1 tag - } else { - isFirstH1Handled = true; - return `<h2>${text}</h2>`; + return title.trim() === text.trim() ? "" : convertedContent; } + + return convertedContent; }); - return content; } function extractHtmlTitle(content: string): string | null { From 05b433d44eb2c086b576c47a235c5805b065e8e1 Mon Sep 17 00:00:00 2001 From: Panagiotis Papadopoulos <pano_90@gmx.net> Date: Tue, 21 Jan 2025 00:33:45 +0100 Subject: [PATCH 5/5] test(import/utils): remove leftover unused type --- src/services/import/utils.spec.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/services/import/utils.spec.ts b/src/services/import/utils.spec.ts index ae6fe45b2..abd33e025 100644 --- a/src/services/import/utils.spec.ts +++ b/src/services/import/utils.spec.ts @@ -29,8 +29,6 @@ describe("#extractHtmlTitle", () => { </body> </html>`; - type TestCaseExtractHtmlTitle = [htmlContent: string, expected: string | null, description: string]; - // prettier-ignore const testCases: TestCase<typeof importUtils.extractHtmlTitle>[] = [ [