From 45b94ecaebc00c9b110aed6368d8ca9326f222c2 Mon Sep 17 00:00:00 2001
From: zadam <zadam.apps@gmail.com>
Date: Mon, 26 Dec 2022 10:38:31 +0100
Subject: [PATCH] zip import refactoring

---
 src/services/help_import.js |   6 +-
 src/services/import/zip.js  | 171 +++++++++++++++++-------------------
 2 files changed, 84 insertions(+), 93 deletions(-)
diff --git a/src/services/help_import.js b/src/services/help_import.js
index 99a700db4..e32e183e6 100644
--- a/src/services/help_import.js
+++ b/src/services/help_import.js
@@ -1,20 +1,18 @@
-const sqlInit = require("./sql_init");
 const cls = require("./cls");
 const zipImport = require("../services/import/zip");
 const TaskContext = require("./task_context");
 const becca = require("../becca/becca");
+const beccaLoader = require("../becca/becca_loader");
 const fs = require("fs").promises;
 
 const HELP_FILE_PATH = '/home/adam/Downloads/Help1.zip';
 
-sqlInit.dbReady.then(() => {
+beccaLoader.beccaLoaded.then(() => {
     cls.init(async () => {
         const helpRoot = becca.getNote("_help");
         const taskContext = new TaskContext('no-progress-reporting', null, {});
         const data = await fs.readFile(HELP_FILE_PATH, "binary");
 
-        console.log("BUGGER LENGTH", data.length);
-
         await zipImport.importZip(taskContext, Buffer.from(data, 'binary'), helpRoot);
     });
 });
diff --git a/src/services/import/zip.js b/src/services/import/zip.js
index c8913b67c..393fad484 100644
--- a/src/services/import/zip.js
+++ b/src/services/import/zip.js
@@ -235,6 +235,84 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
         return targetNoteId;
     }
 
+    function processNoteContent(content, noteTitle, filePath, noteMeta) {
+        function isUrlAbsolute(url) {
+            return /^(?:[a-z]+:)?\/\//i.test(url);
+        }
+
+        content = content.replace(/<h1>([^<]*)<\/h1>/gi, (match, text) => {
+            if (noteTitle.trim() === text.trim()) {
+                return ""; // remove whole H1 tag
+            } else {
+                return `<h2>${text}</h2>`;
+            }
+        });
+
+        content = htmlSanitizer.sanitize(content);
+
+        content = content.replace(/<html.*<body[^>]*>/gis, "");
+        content = content.replace(/<\/body>.*<\/html>/gis, "");
+
+        content = content.replace(/src="([^"]*)"/g, (match, url) => {
+            try {
+                url = decodeURIComponent(url);
+            } catch (e) {
+                log.error(`Cannot parse image URL '${url}', keeping original (${e}).`);
+                return `src="${url}"`;
+            }
+
+            if (isUrlAbsolute(url) || url.startsWith("/")) {
+                return match;
+            }
+
+            const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
+
+            return `src="api/images/${targetNoteId}/${path.basename(url)}"`;
+        });
+
+        content = content.replace(/href="([^"]*)"/g, (match, url) => {
+            try {
+                url = decodeURIComponent(url);
+            } catch (e) {
+                log.error(`Cannot parse link URL '${url}', keeping original (${e}).`);
+                return `href="${url}"`;
+            }
+
+            if (url.startsWith('#') || isUrlAbsolute(url)) {
+                return match;
+            }
+
+            const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
+
+            return `href="#root/${targetNoteId}"`;
+        });
+
+        content = content.replace(/data-note-path="([^"]*)"/g, (match, notePath) => {
+            const noteId = notePath.split("/").pop();
+
+            let targetNoteId;
+
+            if (noteId === 'root' || noteId.startsWith("_")) { // named noteIds stay identical across instances
+                targetNoteId = noteId;
+            } else {
+                targetNoteId = noteIdMap[noteId];
+            }
+
+            return `data-note-path="root/${targetNoteId}"`;
+        });
+
+        if (noteMeta) {
+            const includeNoteLinks = (noteMeta.attributes || [])
+                .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink');
+
+            for (const link of includeNoteLinks) {
+                // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
+                content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
+            }
+        }
+        return content;
+    }
+
     function saveNote(filePath, content) {
         const {parentNoteMeta, noteMeta} = getMeta(filePath);
 
@@ -280,81 +358,7 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
         const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta);
 
         if (type === 'text') {
-            function isUrlAbsolute(url) {
-                return /^(?:[a-z]+:)?\/\//i.test(url);
-            }
-
-            content = content.replace(/<h1>([^<]*)<\/h1>/gi, (match, text) => {
-                if (noteTitle.trim() === text.trim()) {
-                    return ""; // remove whole H1 tag
-                }
-                else {
-                    return `<h2>${text}</h2>`;
-                }
-            });
-
-            content = htmlSanitizer.sanitize(content);
-
-            content = content.replace(/<html.*<body[^>]*>/gis, "");
-            content = content.replace(/<\/body>.*<\/html>/gis, "");
-
-            content = content.replace(/src="([^"]*)"/g, (match, url) => {
-                try {
-                    url = decodeURIComponent(url);
-                } catch (e) {
-                    log.error(`Cannot parse image URL '${url}', keeping original (${e}).`);
-                    return `src="${url}"`;
-                }
-
-                if (isUrlAbsolute(url) || url.startsWith("/")) {
-                    return match;
-                }
-
-                const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
-
-                return `src="api/images/${targetNoteId}/${path.basename(url)}"`;
-            });
-
-            content = content.replace(/href="([^"]*)"/g, (match, url) => {
-                try {
-                    url = decodeURIComponent(url);
-                } catch (e) {
-                    log.error(`Cannot parse link URL '${url}', keeping original (${e}).`);
-                    return `href="${url}"`;
-                }
-
-                if (url.startsWith('#') || isUrlAbsolute(url)) {
-                    return match;
-                }
-
-                const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
-
-                return `href="#root/${targetNoteId}"`;
-            });
-
-            content = content.replace(/data-note-path="([^"]*)"/g, (match, notePath) => {
-                const noteId = notePath.split("/").pop();
-
-                let targetNoteId;
-
-                if (noteId === 'root' || noteId.startsWith("_")) { // named noteIds stay identical across instances
-                    targetNoteId = noteId;
-                } else {
-                    targetNoteId = noteIdMap[noteId];
-                }
-
-                return `data-note-path="root/${targetNoteId}"`;
-            });
-
-            if (noteMeta) {
-                const includeNoteLinks = (noteMeta.attributes || [])
-                    .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink');
-
-                for (const link of includeNoteLinks) {
-                    // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
-                    content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
-                }
-            }
+            content = processNoteContent(content, noteTitle, filePath, noteMeta);
         }
 
         if (type === 'relationMap' && noteMeta) {
@@ -368,17 +372,6 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
             }
         }
 
-        if (type === 'text' && noteMeta) {
-            const includeNoteLinks = (noteMeta.attributes || [])
-                .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink');
-
-            // this will replace relation map links
-            for (const link of includeNoteLinks) {
-                // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
-                content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
-            }
-        }
-
         let note = becca.getNote(noteId);
 
         const isProtected = importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable();
@@ -523,7 +516,7 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
 
         if (!metaFile) {
             // if there's no meta file then the notes are created based on the order in that zip file but that
-            // is usually quite random so we sort the notes in the way they would appear in the file manager
+            // is usually quite random, so we sort the notes in the way they would appear in the file manager
             treeService.sortNotes(noteId, 'title', false, true);
         }
 
@@ -533,11 +526,11 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
     // we're saving attributes and links only now so that all relation and link target notes
     // are already in the database (we don't want to have "broken" relations, not even transitionally)
     for (const attr of attributes) {
-        if (attr.type !== 'relation' || attr.value in createdNoteIds) {
+        if (attr.type !== 'relation' || attr.value in becca.notes) {
             new Attribute(attr).save();
         }
         else {
-            log.info(`Relation not imported since target note doesn't exist: ${JSON.stringify(attr)}`);
+            log.info(`Relation not imported since the target note doesn't exist: ${JSON.stringify(attr)}`);
         }
     }