diff --git a/src/services/help_import.js b/src/services/help_import.js index e32e183e6..d17b62992 100644 --- a/src/services/help_import.js +++ b/src/services/help_import.js @@ -1,18 +1,449 @@ +"use strict" + const cls = require("./cls"); -const zipImport = require("../services/import/zip"); -const TaskContext = require("./task_context"); const becca = require("../becca/becca"); const beccaLoader = require("../becca/becca_loader"); const fs = require("fs").promises; +const Attribute = require('../becca/entities/attribute'); +const utils = require('./utils'); +const log = require('./log'); +const noteService = require('./notes'); +const attributeService = require('./attributes'); +const Branch = require('../becca/entities/branch'); +const path = require('path'); +const yauzl = require("yauzl"); +const htmlSanitizer = require('./html_sanitizer'); +const sql = require('./sql'); const HELP_FILE_PATH = '/home/adam/Downloads/Help1.zip'; beccaLoader.beccaLoaded.then(() => { cls.init(async () => { const helpRoot = becca.getNote("_help"); - const taskContext = new TaskContext('no-progress-reporting', null, {}); const data = await fs.readFile(HELP_FILE_PATH, "binary"); - await zipImport.importZip(taskContext, Buffer.from(data, 'binary'), helpRoot); + await importZip(Buffer.from(data, 'binary'), helpRoot); }); }); + +async function importZip(fileBuffer, importRootNote) { + // maps from original noteId (in ZIP file) to newly generated noteId + const noteIdMap = {}; + const attributes = []; + let metaFile = null; + + function getNewNoteId(origNoteId) { + if (origNoteId === 'root' || origNoteId.startsWith("_")) { + // these "named" noteIds don't differ between Trilium instances + return origNoteId; + } + + if (!noteIdMap[origNoteId]) { + noteIdMap[origNoteId] = utils.newEntityId(); + } + + return noteIdMap[origNoteId]; + } + + function getMeta(filePath) { + const pathSegments = filePath.split(/[\/\\]/g); + + let cursor = { + isImportRoot: true, + children: metaFile.files + }; + + let parent; + + for (const segment of pathSegments) { + if (!cursor || !cursor.children || cursor.children.length === 0) { + return {}; + } + + parent = cursor; + cursor = cursor.children.find(file => file.dataFileName === segment || file.dirFileName === segment); + } + + return { + parentNoteMeta: parent, + noteMeta: cursor + }; + } + + function getParentNoteId(filePath, parentNoteMeta) { + return parentNoteMeta.isImportRoot ? importRootNote.noteId : getNewNoteId(parentNoteMeta.noteId); + } + + function getNoteId(noteMeta) { + const helpNoteId = noteMeta.attributes?.find(attr => attr.type === 'label' && attr.name === 'helpNoteId')?.value; + + const noteId = helpNoteId || noteMeta.noteId; + noteIdMap[noteMeta.noteId] = noteId; + + return noteId; + } + + function saveAttributes(note, noteMeta) { + if (!noteMeta) { + return; + } + + for (const attr of noteMeta.attributes) { + attr.noteId = note.noteId; + + if (attr.type === 'label-definition') { + attr.type = 'label'; + attr.name = `label:${attr.name}`; + } + else if (attr.type === 'relation-definition') { + attr.type = 'label'; + attr.name = `relation:${attr.name}`; + } + + if (!attributeService.isAttributeType(attr.type)) { + log.error(`Unrecognized attribute type ${attr.type}`); + continue; + } + + if (attr.type === 'relation' && ['internalLink', 'imageLink', 'relationMapLink', 'includeNoteLink'].includes(attr.name)) { + // these relations are created automatically and as such don't need to be duplicated in the import + continue; + } + + if (attr.type === 'relation') { + attr.value = getNewNoteId(attr.value); + } + + attributes.push(attr); + } + } + + function saveDirectory(filePath) { + const { parentNoteMeta, noteMeta } = getMeta(filePath); + + const noteId = getNoteId(noteMeta); + const parentNoteId = getParentNoteId(filePath, parentNoteMeta); + + let note = becca.getNote(noteId); + + if (note) { + return; + } + + ({note} = noteService.createNewNote({ + parentNoteId: parentNoteId, + title: noteMeta.title, + content: '', + noteId: noteId, + type: noteMeta.type, + mime: noteMeta.mime, + prefix: noteMeta.prefix, + isExpanded: noteMeta.isExpanded, + notePosition: noteMeta.notePosition, + isProtected: false, + })); + + saveAttributes(note, noteMeta); + + return noteId; + } + + function getNoteIdFromRelativeUrl(url, filePath) { + while (url.startsWith("./")) { + url = url.substr(2); + } + + let absUrl = path.dirname(filePath); + + while (url.startsWith("../")) { + absUrl = path.dirname(absUrl); + + url = url.substr(3); + } + + if (absUrl === '.') { + absUrl = ''; + } + + absUrl += `${absUrl.length > 0 ? '/' : ''}${url}`; + + const {noteMeta} = getMeta(absUrl); + const targetNoteId = getNoteId(noteMeta); + return targetNoteId; + } + + function processTextNoteContent(content, filePath, noteMeta) { + function isUrlAbsolute(url) { + return /^(?:[a-z]+:)?\/\//i.test(url); + } + + content = content.replace(/

([^<]*)<\/h1>/gi, (match, text) => { + if (noteMeta.title.trim() === text.trim()) { + return ""; // remove whole H1 tag + } else { + return `

${text}

`; + } + }); + + content = htmlSanitizer.sanitize(content); + + content = content.replace(/]*>/gis, ""); + content = content.replace(/<\/body>.*<\/html>/gis, ""); + + content = content.replace(/src="([^"]*)"/g, (match, url) => { + try { + url = decodeURIComponent(url); + } catch (e) { + log.error(`Cannot parse image URL '${url}', keeping original (${e}).`); + return `src="${url}"`; + } + + if (isUrlAbsolute(url) || url.startsWith("/")) { + return match; + } + + const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); + + return `src="api/images/${targetNoteId}/${path.basename(url)}"`; + }); + + content = content.replace(/href="([^"]*)"/g, (match, url) => { + try { + url = decodeURIComponent(url); + } catch (e) { + log.error(`Cannot parse link URL '${url}', keeping original (${e}).`); + return `href="${url}"`; + } + + if (url.startsWith('#') // already a note path (probably) + || isUrlAbsolute(url)) { + return match; + } + + const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); + + return `href="#root/${targetNoteId}"`; + }); + + content = content.replace(/data-note-path="([^"]*)"/g, (match, notePath) => { + const noteId = notePath.split("/").pop(); + + let targetNoteId; + + if (noteId === 'root' || noteId.startsWith("_")) { // named noteIds stay identical across instances + targetNoteId = noteId; + } else { + targetNoteId = noteIdMap[noteId]; + } + + return `data-note-path="root/${targetNoteId}"`; + }); + + if (noteMeta) { + const includeNoteLinks = (noteMeta.attributes || []) + .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink'); + + for (const link of includeNoteLinks) { + // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters + content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value)); + } + } + return content; + } + + function processNoteContent(noteMeta, type, mime, content, filePath) { + if (type === 'text') { + content = processTextNoteContent(content, filePath, noteMeta); + } + + if (type === 'relationMap') { + const relationMapLinks = (noteMeta.attributes || []) + .filter(attr => attr.type === 'relation' && attr.name === 'relationMapLink'); + + // this will replace relation map links + for (const link of relationMapLinks) { + // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters + content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value)); + } + } + + return content; + } + + function saveNote(filePath, content) { + const {parentNoteMeta, noteMeta} = getMeta(filePath); + + if (noteMeta?.noImport) { + return; + } + + const noteId = getNoteId(noteMeta); + const parentNoteId = getParentNoteId(filePath, parentNoteMeta); + + if (!parentNoteId) { + throw new Error(`Cannot find parentNoteId for ${filePath}`); + } + + if (noteMeta?.isClone) { + if (!becca.getBranchFromChildAndParent(noteId, parentNoteId)) { + new Branch({ + noteId, + parentNoteId, + isExpanded: noteMeta.isExpanded, + prefix: noteMeta.prefix, + notePosition: noteMeta.notePosition + }).save(); + } + + return; + } + + let {type, mime} = noteMeta; + + if (type !== 'file' && type !== 'image') { + content = content.toString("UTF-8"); + } + + content = processNoteContent(noteMeta, type, mime, content, filePath); + + let note = becca.getNote(noteId); + + if (note) { + // only skeleton was created because of altered order of cloned notes in ZIP, we need to update + // https://github.com/zadam/trilium/issues/2440 + if (note.type === undefined) { + note.type = type; + note.mime = mime; + note.title = noteMeta.title; + note.isProtected = false; + note.save(); + } + + note.setContent(content); + + if (!becca.getBranchFromChildAndParent(noteId, parentNoteId)) { + new Branch({ + noteId, + parentNoteId, + isExpanded: noteMeta.isExpanded, + prefix: noteMeta.prefix, + notePosition: noteMeta.notePosition + }).save(); + } + } + else { + ({note} = noteService.createNewNote({ + parentNoteId: parentNoteId, + title: noteMeta.title, + content: content, + noteId, + type, + mime, + prefix: noteMeta.prefix, + isExpanded: noteMeta.isExpanded, + notePosition: noteMeta.notePosition, + isProtected: false, + })); + + saveAttributes(note, noteMeta); + } + } + + const entries = []; + + await readZipFile(fileBuffer, async (zipfile, entry) => { + const filePath = normalizeFilePath(entry.fileName); + + if (/\/$/.test(entry.fileName)) { + entries.push({ + type: 'directory', + filePath + }); + } + else { + entries.push({ + type: 'file', + filePath, + content: await readContent(zipfile, entry) + }); + } + + zipfile.readEntry(); + }); + + metaFile = JSON.parse(entries.find(entry => entry.type === 'file' && entry.filePath === '!!!meta.json').content); + + sql.transactional(() => { + for (const {type, filePath, content} of entries) { + + console.log(filePath); + + + if (type === 'directory') { + saveDirectory(filePath); + } else if (type === 'file') { + if (filePath === '!!!meta.json') { + continue; + } + + saveNote(filePath, content); + } else { + throw new Error(`Unknown type ${type}`) + } + } + }); + + // we're saving attributes and links only now so that all relation and link target notes + // are already in the database (we don't want to have "broken" relations, not even transitionally) + for (const attr of attributes) { + if (attr.type !== 'relation' || attr.value in becca.notes) { + new Attribute(attr).save(); + } + else { + log.info(`Relation not imported since the target note doesn't exist: ${JSON.stringify(attr)}`); + } + } +} + +/** @returns {string} path without leading or trailing slash and backslashes converted to forward ones */ +function normalizeFilePath(filePath) { + filePath = filePath.replace(/\\/g, "/"); + + if (filePath.startsWith("/")) { + filePath = filePath.substr(1); + } + + if (filePath.endsWith("/")) { + filePath = filePath.substr(0, filePath.length - 1); + } + + return filePath; +} + +function streamToBuffer(stream) { + const chunks = []; + stream.on('data', chunk => chunks.push(chunk)); + + return new Promise((res, rej) => stream.on('end', () => res(Buffer.concat(chunks)))); +} + +function readContent(zipfile, entry) { + return new Promise((res, rej) => { + zipfile.openReadStream(entry, function(err, readStream) { + if (err) rej(err); + + streamToBuffer(readStream).then(res); + }); + }); +} + +function readZipFile(buffer, processEntryCallback) { + return new Promise((res, rej) => { + yauzl.fromBuffer(buffer, {lazyEntries: true, validateEntrySizes: false}, function(err, zipfile) { + if (err) throw err; + zipfile.readEntry(); + zipfile.on("entry", entry => processEntryCallback(zipfile, entry)); + zipfile.on("end", res); + }); + }); +} diff --git a/src/services/import/zip.js b/src/services/import/zip.js index de25d5194..d1997f31f 100644 --- a/src/services/import/zip.js +++ b/src/services/import/zip.js @@ -22,7 +22,7 @@ const becca = require("../../becca/becca"); * @return {Promise<*>} */ async function importZip(taskContext, fileBuffer, importRootNote) { - // maps from original noteId (in tar file) to newly generated noteId + // maps from original noteId (in ZIP file) to newly generated noteId const noteIdMap = {}; const attributes = []; // path => noteId, used only when meta file is not available @@ -96,8 +96,7 @@ async function importZip(taskContext, fileBuffer, importRootNote) { parentNoteId = createdPaths[parentPath]; } else { - // tar allows creating out of order records - i.e. file in a directory can appear in the tar stream before actual directory - // (out-of-order-directory-records.tar in test set) + // ZIP allows creating out of order records - i.e. file in a directory can appear in the ZIP stream before actual directory parentNoteId = saveDirectory(parentPath); } }