"use strict"; const Attribute = require('../../entities/attribute'); const utils = require('../../services/utils'); const log = require('../../services/log'); const repository = require('../../services/repository'); const noteService = require('../../services/notes'); const attributeService = require('../../services/attributes'); const Branch = require('../../entities/branch'); const path = require('path'); const commonmark = require('commonmark'); const TaskContext = require('../task_context.js'); const protectedSessionService = require('../protected_session'); const mimeService = require("./mime"); const treeService = require("../tree"); const yauzl = require("yauzl"); /** * @param {TaskContext} taskContext * @param {Buffer} fileBuffer * @param {Note} importRootNote * @return {Promise<*>} */ async function importZip(taskContext, fileBuffer, importRootNote) { // maps from original noteId (in tar file) to newly generated noteId const noteIdMap = {}; const attributes = []; // path => noteId, used only when meta file is not available const createdPaths = { '/': importRootNote.noteId, '\\': importRootNote.noteId }; const mdReader = new commonmark.Parser(); const mdWriter = new commonmark.HtmlRenderer(); let metaFile = null; let firstNote = null; const createdNoteIds = {}; function getNewNoteId(origNoteId) { // in case the original noteId is empty. This probably shouldn't happen, but still good to have this precaution if (!origNoteId.trim()) { return ""; } if (!noteIdMap[origNoteId]) { noteIdMap[origNoteId] = utils.newEntityId(); } return noteIdMap[origNoteId]; } function getMeta(filePath) { if (!metaFile) { return {}; } const pathSegments = filePath.split(/[\/\\]/g); let cursor = { isImportRoot: true, children: metaFile.files }; let parent; for (const segment of pathSegments) { if (!cursor || !cursor.children || cursor.children.length === 0) { return {}; } parent = cursor; cursor = cursor.children.find(file => file.dataFileName === segment || file.dirFileName === segment); } return { parentNoteMeta: parent, noteMeta: cursor }; } async function getParentNoteId(filePath, parentNoteMeta) { let parentNoteId; if (parentNoteMeta) { parentNoteId = parentNoteMeta.isImportRoot ? importRootNote.noteId : getNewNoteId(parentNoteMeta.noteId); } else { const parentPath = path.dirname(filePath); if (parentPath === '.') { parentNoteId = importRootNote.noteId; } else if (parentPath in createdPaths) { parentNoteId = createdPaths[parentPath]; } else { // tar allows creating out of order records - i.e. file in a directory can appear in the tar stream before actual directory // (out-of-order-directory-records.tar in test set) parentNoteId = await saveDirectory(parentPath); } } return parentNoteId; } function getNoteTitle(filePath, noteMeta) { if (noteMeta) { return noteMeta.title; } else { const basename = path.basename(filePath); return getTextFileWithoutExtension(basename); } } function getNoteId(noteMeta, filePath) { if (noteMeta) { return getNewNoteId(noteMeta.noteId); } const filePathNoExt = getTextFileWithoutExtension(filePath); if (filePathNoExt in createdPaths) { return createdPaths[filePathNoExt]; } const noteId = utils.newEntityId(); createdPaths[filePathNoExt] = noteId; return noteId; } function detectFileTypeAndMime(taskContext, filePath) { const mime = mimeService.getMime(filePath) || "application/octet-stream"; const type = mimeService.getType(taskContext.data, mime); return { mime, type }; } async function saveAttributes(note, noteMeta) { if (!noteMeta) { return; } for (const attr of noteMeta.attributes) { attr.noteId = note.noteId; if (!attributeService.isAttributeType(attr.type)) { log.error("Unrecognized attribute type " + attr.type); continue; } if (attr.type === 'relation' && ['internalLink', 'imageLink', 'relationMapLink', 'includeNoteLink'].includes(attr.name)) { // these relations are created automatically and as such don't need to be duplicated in the import continue; } if (attr.type === 'relation') { attr.value = getNewNoteId(attr.value); } if (taskContext.data.safeImport && attributeService.isAttributeDangerous(attr.type, attr.name)) { attr.name = 'disabled-' + attr.name; } attributes.push(attr); } } async function saveDirectory(filePath) { const { parentNoteMeta, noteMeta } = getMeta(filePath); const noteId = getNoteId(noteMeta, filePath); const noteTitle = getNoteTitle(filePath, noteMeta); const parentNoteId = await getParentNoteId(filePath, parentNoteMeta); let note = await repository.getNote(noteId); if (note) { return; } ({note} = await noteService.createNewNote({ parentNoteId: parentNoteId, title: noteTitle, content: '', noteId: noteId, type: noteMeta ? noteMeta.type : 'text', mime: noteMeta ? noteMeta.mime : 'text/html', prefix: noteMeta ? noteMeta.prefix : '', isExpanded: noteMeta ? noteMeta.isExpanded : false, isProtected: importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable(), })); createdNoteIds[note.noteId] = true; await saveAttributes(note, noteMeta); if (!firstNote) { firstNote = note; } return noteId; } function getTextFileWithoutExtension(filePath) { const extension = path.extname(filePath).toLowerCase(); if (extension === '.md' || extension === '.html') { return filePath.substr(0, filePath.length - extension.length); } else { return filePath; } } function getNoteIdFromRelativeUrl(url, filePath) { while (url.startsWith("./")) { url = url.substr(2); } let absUrl = path.dirname(filePath); while (url.startsWith("../")) { absUrl = path.dirname(absUrl); url = url.substr(3); } if (absUrl === '.') { absUrl = ''; } absUrl += (absUrl.length > 0 ? '/' : '') + url; const {noteMeta} = getMeta(absUrl); const targetNoteId = getNoteId(noteMeta, absUrl); return targetNoteId; } async function saveNote(filePath, content) { const {parentNoteMeta, noteMeta} = getMeta(filePath); if (noteMeta && noteMeta.noImport) { return; } const noteId = getNoteId(noteMeta, filePath); const parentNoteId = await getParentNoteId(filePath, parentNoteMeta); if (!parentNoteId) { throw new Error(`Cannot find parentNoteId for ${filePath}`); } if (noteMeta && noteMeta.isClone) { await new Branch({ noteId, parentNoteId, isExpanded: noteMeta.isExpanded, prefix: noteMeta.prefix, notePosition: noteMeta.notePosition }).save(); return; } const {type, mime} = noteMeta ? noteMeta : detectFileTypeAndMime(taskContext, filePath); if (type !== 'file' && type !== 'image') { content = content.toString("UTF-8"); } if ((noteMeta && noteMeta.format === 'markdown') || (!noteMeta && taskContext.data.textImportedAsText && ['text/markdown', 'text/x-markdown'].includes(mime))) { const parsed = mdReader.parse(content); content = mdWriter.render(parsed); } const noteTitle = getNoteTitle(filePath, noteMeta); if (type === 'text') { function isUrlAbsolute(url) { return /^(?:[a-z]+:)?\/\//i.test(url); } content = content.replace(/]*>/gis, ""); content = content.replace(/<\/body>.*<\/html>/gis, ""); content = content.replace(/src="([^"]*)"/g, (match, url) => { url = decodeURIComponent(url); if (isUrlAbsolute(url) || url.startsWith("/")) { return match; } const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); return `src="api/images/${targetNoteId}/${path.basename(url)}"`; }); content = content.replace(/href="([^"]*)"/g, (match, url) => { url = decodeURIComponent(url); if (isUrlAbsolute(url)) { return match; } const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); return `href="#root/${targetNoteId}"`; }); content = content.replace(/

([^<]*)<\/h1>/gi, (match, text) => { if (noteTitle.trim() === text.trim()) { return ""; // remove whole H1 tag } else { return match; } }); const includeNoteLinks = (noteMeta.attributes || []) .filter(attr => attr.type === 'relation' && attr.name === 'includeNoteLink'); for (const link of includeNoteLinks) { // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value)); } } if (type === 'relation-map' && noteMeta) { const relationMapLinks = (noteMeta.attributes || []) .filter(attr => attr.type === 'relation' && attr.name === 'relationMapLink'); // this will replace relation map links for (const link of relationMapLinks) { // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value)); } } let note = await repository.getNote(noteId); if (note) { await note.setContent(content); } else { ({note} = await noteService.createNewNote({ parentNoteId: parentNoteId, title: noteTitle, content: content, noteId, type, mime, prefix: noteMeta ? noteMeta.prefix : '', isExpanded: noteMeta ? noteMeta.isExpanded : false, notePosition: noteMeta ? noteMeta.notePosition : false, isProtected: importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable(), })); createdNoteIds[note.noteId] = true; await saveAttributes(note, noteMeta); if (!firstNote) { firstNote = note; } if (type === 'text') { filePath = getTextFileWithoutExtension(filePath); } } if (!noteMeta && (type === 'file' || type === 'image')) { attributes.push({ noteId, type: 'label', name: 'originalFileName', value: path.basename(filePath) }); } } /** @return {string} path without leading or trailing slash and backslashes converted to forward ones*/ function normalizeFilePath(filePath) { filePath = filePath.replace(/\\/g, "/"); if (filePath.startsWith("/")) { filePath = filePath.substr(1); } if (filePath.endsWith("/")) { filePath = filePath.substr(0, filePath.length - 1); } return filePath; } function streamToBuffer(stream) { const chunks = []; stream.on('data', chunk => chunks.push(chunk)); return new Promise((res, rej) => stream.on('end', () => res(Buffer.concat(chunks)))); } function readContent(zipfile, entry) { return new Promise((res, rej) => { zipfile.openReadStream(entry, function(err, readStream) { if (err) rej(err); streamToBuffer(readStream).then(res); }); }); } function readZipFile(buffer, processEntryCallback) { return new Promise((res, rej) => { yauzl.fromBuffer(buffer, {lazyEntries: true, validateEntrySizes: false}, function(err, zipfile) { if (err) throw err; zipfile.readEntry(); zipfile.on("entry", entry => processEntryCallback(zipfile, entry)); zipfile.on("end", res); }); }); } // we're running two passes to make sure that the meta file is loaded before the rest of the files is processed. await readZipFile(fileBuffer, async (zipfile, entry) => { const filePath = normalizeFilePath(entry.fileName); if (filePath === '!!!meta.json') { const content = await readContent(zipfile, entry); metaFile = JSON.parse(content.toString("UTF-8")); } zipfile.readEntry(); }); await readZipFile(fileBuffer, async (zipfile, entry) => { const filePath = normalizeFilePath(entry.fileName); if (/\/$/.test(entry.fileName)) { await saveDirectory(filePath); } else if (filePath !== '!!!meta.json') { const content = await readContent(zipfile, entry); await saveNote(filePath, content); } taskContext.increaseProgressCount(); zipfile.readEntry(); }); for (const noteId in createdNoteIds) { // now the noteIds are unique await noteService.scanForLinks(await repository.getNotes(noteId)); if (!metaFile) { // if there's no meta file then the notes are created based on the order in that tar file but that // is usually quite random so we sort the notes in the way they would appear in the file manager await treeService.sortNotesAlphabetically(noteId, true); } taskContext.increaseProgressCount(); } // we're saving attributes and links only now so that all relation and link target notes // are already in the database (we don't want to have "broken" relations, not even transitionally) for (const attr of attributes) { if (attr.type !== 'relation' || attr.value in createdNoteIds) { await new Attribute(attr).save(); } else { log.info("Relation not imported since target note doesn't exist: " + JSON.stringify(attr)); } } return firstNote; } module.exports = { importZip };