From 45b94ecaebc00c9b110aed6368d8ca9326f222c2 Mon Sep 17 00:00:00 2001 From: zadam Date: Mon, 26 Dec 2022 10:38:31 +0100 Subject: [PATCH] zip import refactoring --- src/services/help_import.js | 6 +- src/services/import/zip.js | 171 +++++++++++++++++------------------- 2 files changed, 84 insertions(+), 93 deletions(-) diff --git a/src/services/help_import.js b/src/services/help_import.js index 99a700db4..e32e183e6 100644 --- a/src/services/help_import.js +++ b/src/services/help_import.js @@ -1,20 +1,18 @@ -const sqlInit = require("./sql_init"); const cls = require("./cls"); const zipImport = require("../services/import/zip"); const TaskContext = require("./task_context"); const becca = require("../becca/becca"); +const beccaLoader = require("../becca/becca_loader"); const fs = require("fs").promises; const HELP_FILE_PATH = '/home/adam/Downloads/Help1.zip'; -sqlInit.dbReady.then(() => { +beccaLoader.beccaLoaded.then(() => { cls.init(async () => { const helpRoot = becca.getNote("_help"); const taskContext = new TaskContext('no-progress-reporting', null, {}); const data = await fs.readFile(HELP_FILE_PATH, "binary"); - console.log("BUGGER LENGTH", data.length); - await zipImport.importZip(taskContext, Buffer.from(data, 'binary'), helpRoot); }); }); diff --git a/src/services/import/zip.js b/src/services/import/zip.js index c8913b67c..393fad484 100644 --- a/src/services/import/zip.js +++ b/src/services/import/zip.js @@ -235,6 +235,84 @@ async function importZip(taskContext, fileBuffer, importRootNote) { return targetNoteId; } + function processNoteContent(content, noteTitle, filePath, noteMeta) { + function isUrlAbsolute(url) { + return /^(?:[a-z]+:)?\/\//i.test(url); + } + + content = content.replace(/

([^<]*)<\/h1>/gi, (match, text) => { + if (noteTitle.trim() === text.trim()) { + return ""; // remove whole H1 tag + } else { + return `

${text}

`; + } + }); + + content = htmlSanitizer.sanitize(content); + + content = content.replace(/]*>/gis, ""); + content = content.replace(/<\/body>.*<\/html>/gis, ""); + + content = content.replace(/src="([^"]*)"/g, (match, url) => { + try { + url = decodeURIComponent(url); + } catch (e) { + log.error(`Cannot parse image URL '${url}', keeping original (${e}).`); + return `src="${url}"`; + } + + if (isUrlAbsolute(url) || url.startsWith("/")) { + return match; + } + + const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); + + return `src="api/images/${targetNoteId}/${path.basename(url)}"`; + }); + + content = content.replace(/href="([^"]*)"/g, (match, url) => { + try { + url = decodeURIComponent(url); + } catch (e) { + log.error(`Cannot parse link URL '${url}', keeping original (${e}).`); + return `href="${url}"`; + } + + if (url.startsWith('#') || isUrlAbsolute(url)) { + return match; + } + + const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); + + return `href="#root/${targetNoteId}"`; + }); + + content = content.replace(/data-note-path="([^"]*)"/g, (match, notePath) => { + const noteId = notePath.split("/").pop(); + + let targetNoteId; + + if (noteId === 'root' || noteId.startsWith("_")) { // named noteIds stay identical across instances + targetNoteId = noteId; + } else { + targetNoteId = noteIdMap[noteId]; + } + + return `data-note-path="root/${targetNoteId}"`; + }); + + if (noteMeta) { + const includeNoteLinks = (noteMeta.attributes || []) + .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink'); + + for (const link of includeNoteLinks) { + // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters + content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value)); + } + } + return content; + } + function saveNote(filePath, content) { const {parentNoteMeta, noteMeta} = getMeta(filePath); @@ -280,81 +358,7 @@ async function importZip(taskContext, fileBuffer, importRootNote) { const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta); if (type === 'text') { - function isUrlAbsolute(url) { - return /^(?:[a-z]+:)?\/\//i.test(url); - } - - content = content.replace(/

([^<]*)<\/h1>/gi, (match, text) => { - if (noteTitle.trim() === text.trim()) { - return ""; // remove whole H1 tag - } - else { - return `

${text}

`; - } - }); - - content = htmlSanitizer.sanitize(content); - - content = content.replace(/]*>/gis, ""); - content = content.replace(/<\/body>.*<\/html>/gis, ""); - - content = content.replace(/src="([^"]*)"/g, (match, url) => { - try { - url = decodeURIComponent(url); - } catch (e) { - log.error(`Cannot parse image URL '${url}', keeping original (${e}).`); - return `src="${url}"`; - } - - if (isUrlAbsolute(url) || url.startsWith("/")) { - return match; - } - - const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); - - return `src="api/images/${targetNoteId}/${path.basename(url)}"`; - }); - - content = content.replace(/href="([^"]*)"/g, (match, url) => { - try { - url = decodeURIComponent(url); - } catch (e) { - log.error(`Cannot parse link URL '${url}', keeping original (${e}).`); - return `href="${url}"`; - } - - if (url.startsWith('#') || isUrlAbsolute(url)) { - return match; - } - - const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); - - return `href="#root/${targetNoteId}"`; - }); - - content = content.replace(/data-note-path="([^"]*)"/g, (match, notePath) => { - const noteId = notePath.split("/").pop(); - - let targetNoteId; - - if (noteId === 'root' || noteId.startsWith("_")) { // named noteIds stay identical across instances - targetNoteId = noteId; - } else { - targetNoteId = noteIdMap[noteId]; - } - - return `data-note-path="root/${targetNoteId}"`; - }); - - if (noteMeta) { - const includeNoteLinks = (noteMeta.attributes || []) - .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink'); - - for (const link of includeNoteLinks) { - // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters - content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value)); - } - } + content = processNoteContent(content, noteTitle, filePath, noteMeta); } if (type === 'relationMap' && noteMeta) { @@ -368,17 +372,6 @@ async function importZip(taskContext, fileBuffer, importRootNote) { } } - if (type === 'text' && noteMeta) { - const includeNoteLinks = (noteMeta.attributes || []) - .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink'); - - // this will replace relation map links - for (const link of includeNoteLinks) { - // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters - content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value)); - } - } - let note = becca.getNote(noteId); const isProtected = importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable(); @@ -523,7 +516,7 @@ async function importZip(taskContext, fileBuffer, importRootNote) { if (!metaFile) { // if there's no meta file then the notes are created based on the order in that zip file but that - // is usually quite random so we sort the notes in the way they would appear in the file manager + // is usually quite random, so we sort the notes in the way they would appear in the file manager treeService.sortNotes(noteId, 'title', false, true); } @@ -533,11 +526,11 @@ async function importZip(taskContext, fileBuffer, importRootNote) { // we're saving attributes and links only now so that all relation and link target notes // are already in the database (we don't want to have "broken" relations, not even transitionally) for (const attr of attributes) { - if (attr.type !== 'relation' || attr.value in createdNoteIds) { + if (attr.type !== 'relation' || attr.value in becca.notes) { new Attribute(attr).save(); } else { - log.info(`Relation not imported since target note doesn't exist: ${JSON.stringify(attr)}`); + log.info(`Relation not imported since the target note doesn't exist: ${JSON.stringify(attr)}`); } }