diff --git a/src/services/help_import.js b/src/services/help_import.js
index e32e183e6..d17b62992 100644
--- a/src/services/help_import.js
+++ b/src/services/help_import.js
@@ -1,18 +1,449 @@
+"use strict"
+
const cls = require("./cls");
-const zipImport = require("../services/import/zip");
-const TaskContext = require("./task_context");
const becca = require("../becca/becca");
const beccaLoader = require("../becca/becca_loader");
const fs = require("fs").promises;
+const Attribute = require('../becca/entities/attribute');
+const utils = require('./utils');
+const log = require('./log');
+const noteService = require('./notes');
+const attributeService = require('./attributes');
+const Branch = require('../becca/entities/branch');
+const path = require('path');
+const yauzl = require("yauzl");
+const htmlSanitizer = require('./html_sanitizer');
+const sql = require('./sql');
const HELP_FILE_PATH = '/home/adam/Downloads/Help1.zip';
beccaLoader.beccaLoaded.then(() => {
cls.init(async () => {
const helpRoot = becca.getNote("_help");
- const taskContext = new TaskContext('no-progress-reporting', null, {});
const data = await fs.readFile(HELP_FILE_PATH, "binary");
- await zipImport.importZip(taskContext, Buffer.from(data, 'binary'), helpRoot);
+ await importZip(Buffer.from(data, 'binary'), helpRoot);
});
});
+
+async function importZip(fileBuffer, importRootNote) {
+ // maps from original noteId (in ZIP file) to newly generated noteId
+ const noteIdMap = {};
+ const attributes = [];
+ let metaFile = null;
+
+ function getNewNoteId(origNoteId) {
+ if (origNoteId === 'root' || origNoteId.startsWith("_")) {
+ // these "named" noteIds don't differ between Trilium instances
+ return origNoteId;
+ }
+
+ if (!noteIdMap[origNoteId]) {
+ noteIdMap[origNoteId] = utils.newEntityId();
+ }
+
+ return noteIdMap[origNoteId];
+ }
+
+ function getMeta(filePath) {
+ const pathSegments = filePath.split(/[\/\\]/g);
+
+ let cursor = {
+ isImportRoot: true,
+ children: metaFile.files
+ };
+
+ let parent;
+
+ for (const segment of pathSegments) {
+ if (!cursor || !cursor.children || cursor.children.length === 0) {
+ return {};
+ }
+
+ parent = cursor;
+ cursor = cursor.children.find(file => file.dataFileName === segment || file.dirFileName === segment);
+ }
+
+ return {
+ parentNoteMeta: parent,
+ noteMeta: cursor
+ };
+ }
+
+ function getParentNoteId(filePath, parentNoteMeta) {
+ return parentNoteMeta.isImportRoot ? importRootNote.noteId : getNewNoteId(parentNoteMeta.noteId);
+ }
+
+ function getNoteId(noteMeta) {
+ const helpNoteId = noteMeta.attributes?.find(attr => attr.type === 'label' && attr.name === 'helpNoteId')?.value;
+
+ const noteId = helpNoteId || noteMeta.noteId;
+ noteIdMap[noteMeta.noteId] = noteId;
+
+ return noteId;
+ }
+
+ function saveAttributes(note, noteMeta) {
+ if (!noteMeta) {
+ return;
+ }
+
+ for (const attr of noteMeta.attributes) {
+ attr.noteId = note.noteId;
+
+ if (attr.type === 'label-definition') {
+ attr.type = 'label';
+ attr.name = `label:${attr.name}`;
+ }
+ else if (attr.type === 'relation-definition') {
+ attr.type = 'label';
+ attr.name = `relation:${attr.name}`;
+ }
+
+ if (!attributeService.isAttributeType(attr.type)) {
+ log.error(`Unrecognized attribute type ${attr.type}`);
+ continue;
+ }
+
+ if (attr.type === 'relation' && ['internalLink', 'imageLink', 'relationMapLink', 'includeNoteLink'].includes(attr.name)) {
+ // these relations are created automatically and as such don't need to be duplicated in the import
+ continue;
+ }
+
+ if (attr.type === 'relation') {
+ attr.value = getNewNoteId(attr.value);
+ }
+
+ attributes.push(attr);
+ }
+ }
+
+ function saveDirectory(filePath) {
+ const { parentNoteMeta, noteMeta } = getMeta(filePath);
+
+ const noteId = getNoteId(noteMeta);
+ const parentNoteId = getParentNoteId(filePath, parentNoteMeta);
+
+ let note = becca.getNote(noteId);
+
+ if (note) {
+ return;
+ }
+
+ ({note} = noteService.createNewNote({
+ parentNoteId: parentNoteId,
+ title: noteMeta.title,
+ content: '',
+ noteId: noteId,
+ type: noteMeta.type,
+ mime: noteMeta.mime,
+ prefix: noteMeta.prefix,
+ isExpanded: noteMeta.isExpanded,
+ notePosition: noteMeta.notePosition,
+ isProtected: false,
+ }));
+
+ saveAttributes(note, noteMeta);
+
+ return noteId;
+ }
+
+ function getNoteIdFromRelativeUrl(url, filePath) {
+ while (url.startsWith("./")) {
+ url = url.substr(2);
+ }
+
+ let absUrl = path.dirname(filePath);
+
+ while (url.startsWith("../")) {
+ absUrl = path.dirname(absUrl);
+
+ url = url.substr(3);
+ }
+
+ if (absUrl === '.') {
+ absUrl = '';
+ }
+
+ absUrl += `${absUrl.length > 0 ? '/' : ''}${url}`;
+
+ const {noteMeta} = getMeta(absUrl);
+ const targetNoteId = getNoteId(noteMeta);
+ return targetNoteId;
+ }
+
+ function processTextNoteContent(content, filePath, noteMeta) {
+ function isUrlAbsolute(url) {
+ return /^(?:[a-z]+:)?\/\//i.test(url);
+ }
+
+ content = content.replace(/
([^<]*)<\/h1>/gi, (match, text) => {
+ if (noteMeta.title.trim() === text.trim()) {
+ return ""; // remove whole H1 tag
+ } else {
+ return `${text}
`;
+ }
+ });
+
+ content = htmlSanitizer.sanitize(content);
+
+ content = content.replace(/]*>/gis, "");
+ content = content.replace(/<\/body>.*<\/html>/gis, "");
+
+ content = content.replace(/src="([^"]*)"/g, (match, url) => {
+ try {
+ url = decodeURIComponent(url);
+ } catch (e) {
+ log.error(`Cannot parse image URL '${url}', keeping original (${e}).`);
+ return `src="${url}"`;
+ }
+
+ if (isUrlAbsolute(url) || url.startsWith("/")) {
+ return match;
+ }
+
+ const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
+
+ return `src="api/images/${targetNoteId}/${path.basename(url)}"`;
+ });
+
+ content = content.replace(/href="([^"]*)"/g, (match, url) => {
+ try {
+ url = decodeURIComponent(url);
+ } catch (e) {
+ log.error(`Cannot parse link URL '${url}', keeping original (${e}).`);
+ return `href="${url}"`;
+ }
+
+ if (url.startsWith('#') // already a note path (probably)
+ || isUrlAbsolute(url)) {
+ return match;
+ }
+
+ const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
+
+ return `href="#root/${targetNoteId}"`;
+ });
+
+ content = content.replace(/data-note-path="([^"]*)"/g, (match, notePath) => {
+ const noteId = notePath.split("/").pop();
+
+ let targetNoteId;
+
+ if (noteId === 'root' || noteId.startsWith("_")) { // named noteIds stay identical across instances
+ targetNoteId = noteId;
+ } else {
+ targetNoteId = noteIdMap[noteId];
+ }
+
+ return `data-note-path="root/${targetNoteId}"`;
+ });
+
+ if (noteMeta) {
+ const includeNoteLinks = (noteMeta.attributes || [])
+ .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink');
+
+ for (const link of includeNoteLinks) {
+ // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
+ content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
+ }
+ }
+ return content;
+ }
+
+ function processNoteContent(noteMeta, type, mime, content, filePath) {
+ if (type === 'text') {
+ content = processTextNoteContent(content, filePath, noteMeta);
+ }
+
+ if (type === 'relationMap') {
+ const relationMapLinks = (noteMeta.attributes || [])
+ .filter(attr => attr.type === 'relation' && attr.name === 'relationMapLink');
+
+ // this will replace relation map links
+ for (const link of relationMapLinks) {
+ // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
+ content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
+ }
+ }
+
+ return content;
+ }
+
+ function saveNote(filePath, content) {
+ const {parentNoteMeta, noteMeta} = getMeta(filePath);
+
+ if (noteMeta?.noImport) {
+ return;
+ }
+
+ const noteId = getNoteId(noteMeta);
+ const parentNoteId = getParentNoteId(filePath, parentNoteMeta);
+
+ if (!parentNoteId) {
+ throw new Error(`Cannot find parentNoteId for ${filePath}`);
+ }
+
+ if (noteMeta?.isClone) {
+ if (!becca.getBranchFromChildAndParent(noteId, parentNoteId)) {
+ new Branch({
+ noteId,
+ parentNoteId,
+ isExpanded: noteMeta.isExpanded,
+ prefix: noteMeta.prefix,
+ notePosition: noteMeta.notePosition
+ }).save();
+ }
+
+ return;
+ }
+
+ let {type, mime} = noteMeta;
+
+ if (type !== 'file' && type !== 'image') {
+ content = content.toString("UTF-8");
+ }
+
+ content = processNoteContent(noteMeta, type, mime, content, filePath);
+
+ let note = becca.getNote(noteId);
+
+ if (note) {
+ // only skeleton was created because of altered order of cloned notes in ZIP, we need to update
+ // https://github.com/zadam/trilium/issues/2440
+ if (note.type === undefined) {
+ note.type = type;
+ note.mime = mime;
+ note.title = noteMeta.title;
+ note.isProtected = false;
+ note.save();
+ }
+
+ note.setContent(content);
+
+ if (!becca.getBranchFromChildAndParent(noteId, parentNoteId)) {
+ new Branch({
+ noteId,
+ parentNoteId,
+ isExpanded: noteMeta.isExpanded,
+ prefix: noteMeta.prefix,
+ notePosition: noteMeta.notePosition
+ }).save();
+ }
+ }
+ else {
+ ({note} = noteService.createNewNote({
+ parentNoteId: parentNoteId,
+ title: noteMeta.title,
+ content: content,
+ noteId,
+ type,
+ mime,
+ prefix: noteMeta.prefix,
+ isExpanded: noteMeta.isExpanded,
+ notePosition: noteMeta.notePosition,
+ isProtected: false,
+ }));
+
+ saveAttributes(note, noteMeta);
+ }
+ }
+
+ const entries = [];
+
+ await readZipFile(fileBuffer, async (zipfile, entry) => {
+ const filePath = normalizeFilePath(entry.fileName);
+
+ if (/\/$/.test(entry.fileName)) {
+ entries.push({
+ type: 'directory',
+ filePath
+ });
+ }
+ else {
+ entries.push({
+ type: 'file',
+ filePath,
+ content: await readContent(zipfile, entry)
+ });
+ }
+
+ zipfile.readEntry();
+ });
+
+ metaFile = JSON.parse(entries.find(entry => entry.type === 'file' && entry.filePath === '!!!meta.json').content);
+
+ sql.transactional(() => {
+ for (const {type, filePath, content} of entries) {
+
+ console.log(filePath);
+
+
+ if (type === 'directory') {
+ saveDirectory(filePath);
+ } else if (type === 'file') {
+ if (filePath === '!!!meta.json') {
+ continue;
+ }
+
+ saveNote(filePath, content);
+ } else {
+ throw new Error(`Unknown type ${type}`)
+ }
+ }
+ });
+
+ // we're saving attributes and links only now so that all relation and link target notes
+ // are already in the database (we don't want to have "broken" relations, not even transitionally)
+ for (const attr of attributes) {
+ if (attr.type !== 'relation' || attr.value in becca.notes) {
+ new Attribute(attr).save();
+ }
+ else {
+ log.info(`Relation not imported since the target note doesn't exist: ${JSON.stringify(attr)}`);
+ }
+ }
+}
+
+/** @returns {string} path without leading or trailing slash and backslashes converted to forward ones */
+function normalizeFilePath(filePath) {
+ filePath = filePath.replace(/\\/g, "/");
+
+ if (filePath.startsWith("/")) {
+ filePath = filePath.substr(1);
+ }
+
+ if (filePath.endsWith("/")) {
+ filePath = filePath.substr(0, filePath.length - 1);
+ }
+
+ return filePath;
+}
+
+function streamToBuffer(stream) {
+ const chunks = [];
+ stream.on('data', chunk => chunks.push(chunk));
+
+ return new Promise((res, rej) => stream.on('end', () => res(Buffer.concat(chunks))));
+}
+
+function readContent(zipfile, entry) {
+ return new Promise((res, rej) => {
+ zipfile.openReadStream(entry, function(err, readStream) {
+ if (err) rej(err);
+
+ streamToBuffer(readStream).then(res);
+ });
+ });
+}
+
+function readZipFile(buffer, processEntryCallback) {
+ return new Promise((res, rej) => {
+ yauzl.fromBuffer(buffer, {lazyEntries: true, validateEntrySizes: false}, function(err, zipfile) {
+ if (err) throw err;
+ zipfile.readEntry();
+ zipfile.on("entry", entry => processEntryCallback(zipfile, entry));
+ zipfile.on("end", res);
+ });
+ });
+}
diff --git a/src/services/import/zip.js b/src/services/import/zip.js
index de25d5194..d1997f31f 100644
--- a/src/services/import/zip.js
+++ b/src/services/import/zip.js
@@ -22,7 +22,7 @@ const becca = require("../../becca/becca");
* @return {Promise<*>}
*/
async function importZip(taskContext, fileBuffer, importRootNote) {
- // maps from original noteId (in tar file) to newly generated noteId
+ // maps from original noteId (in ZIP file) to newly generated noteId
const noteIdMap = {};
const attributes = [];
// path => noteId, used only when meta file is not available
@@ -96,8 +96,7 @@ async function importZip(taskContext, fileBuffer, importRootNote) {
parentNoteId = createdPaths[parentPath];
}
else {
- // tar allows creating out of order records - i.e. file in a directory can appear in the tar stream before actual directory
- // (out-of-order-directory-records.tar in test set)
+ // ZIP allows creating out of order records - i.e. file in a directory can appear in the ZIP stream before actual directory
parentNoteId = saveDirectory(parentPath);
}
}