From 055db325108bf2447f4f83744dfe8ce71b8c7e6b Mon Sep 17 00:00:00 2001 From: zadam Date: Thu, 9 Jul 2020 21:46:33 +0200 Subject: [PATCH] removed tar import --- package.json | 3 +- src/routes/api/import.js | 5 +- src/services/import/tar.js | 446 ------------------------------------- 3 files changed, 2 insertions(+), 452 deletions(-) delete mode 100644 src/services/import/tar.js diff --git a/package.json b/package.json index 6d97d69fb..f63f95d70 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,7 @@ "test-es6": "node -r esm spec-es6/attribute_parser.spec.js " }, "dependencies": { - "async-mutex": "0.2.3", + "async-mutex": "0.2.4", "axios": "0.19.2", "better-sqlite3": "^7.1.0", "body-parser": "1.19.0", @@ -66,7 +66,6 @@ "serve-favicon": "2.5.0", "session-file-store": "1.4.0", "string-similarity": "4.0.1", - "tar-stream": "2.1.2", "turndown": "6.0.0", "turndown-plugin-gfm": "1.0.2", "unescape": "1.0.1", diff --git a/src/routes/api/import.js b/src/routes/api/import.js index 668725515..abd24a864 100644 --- a/src/routes/api/import.js +++ b/src/routes/api/import.js @@ -3,7 +3,6 @@ const repository = require('../../services/repository'); const enexImportService = require('../../services/import/enex'); const opmlImportService = require('../../services/import/opml'); -const tarImportService = require('../../services/import/tar'); const zipImportService = require('../../services/import/zip'); const singleImportService = require('../../services/import/single'); const cls = require('../../services/cls'); @@ -48,9 +47,7 @@ async function importToBranch(req) { const taskContext = TaskContext.getInstance(taskId, 'import', options); try { - if (extension === '.tar' && options.explodeArchives) { - note = await tarImportService.importTar(taskContext, file.buffer, parentNote); - } else if (extension === '.zip' && options.explodeArchives) { + if (extension === '.zip' && options.explodeArchives) { note = await zipImportService.importZip(taskContext, file.buffer, parentNote); } else if (extension === '.opml' && options.explodeArchives) { note = await opmlImportService.importOpml(taskContext, file.buffer, parentNote); diff --git a/src/services/import/tar.js b/src/services/import/tar.js deleted file mode 100644 index a7c862c73..000000000 --- a/src/services/import/tar.js +++ /dev/null @@ -1,446 +0,0 @@ -"use strict"; - -const Attribute = require('../../entities/attribute'); -const utils = require('../../services/utils'); -const log = require('../../services/log'); -const repository = require('../../services/repository'); -const noteService = require('../../services/notes'); -const attributeService = require('../../services/attributes'); -const Branch = require('../../entities/branch'); -const tar = require('tar-stream'); -const stream = require('stream'); -const path = require('path'); -const commonmark = require('commonmark'); -const TaskContext = require('../task_context.js'); -const protectedSessionService = require('../protected_session'); -const mimeService = require("./mime"); -const sql = require("../sql"); -const treeService = require("../tree"); -const htmlSanitizer = require("../html_sanitizer"); - -/** - * @param {TaskContext} taskContext - * @param {Buffer} fileBuffer - * @param {Note} importRootNote - * @return {Promise<*>} - */ -async function importTar(taskContext, fileBuffer, importRootNote) { - // maps from original noteId (in tar file) to newly generated noteId - const noteIdMap = {}; - const attributes = []; - // path => noteId - const createdPaths = { '/': importRootNote.noteId, '\\': importRootNote.noteId }; - const mdReader = new commonmark.Parser(); - const mdWriter = new commonmark.HtmlRenderer(); - let metaFile = null; - let firstNote = null; - - const extract = tar.extract(); - - function getNewNoteId(origNoteId) { - // in case the original noteId is empty. This probably shouldn't happen, but still good to have this precaution - if (!origNoteId.trim()) { - return ""; - } - - if (!noteIdMap[origNoteId]) { - noteIdMap[origNoteId] = utils.newEntityId(); - } - - return noteIdMap[origNoteId]; - } - - function getMeta(filePath) { - if (!metaFile) { - return {}; - } - - const pathSegments = filePath.split(/[\/\\]/g); - - let cursor = { - isImportRoot: true, - children: metaFile.files - }; - - let parent; - - for (const segment of pathSegments) { - if (!cursor || !cursor.children || cursor.children.length === 0) { - return {}; - } - - parent = cursor; - cursor = cursor.children.find(file => file.dataFileName === segment || file.dirFileName === segment); - } - - return { - parentNoteMeta: parent, - noteMeta: cursor - }; - } - - function getParentNoteId(filePath, parentNoteMeta) { - let parentNoteId; - - if (parentNoteMeta) { - parentNoteId = parentNoteMeta.isImportRoot ? importRootNote.noteId : getNewNoteId(parentNoteMeta.noteId); - } - else { - const parentPath = path.dirname(filePath); - - if (parentPath === '.') { - parentNoteId = importRootNote.noteId; - } - else if (parentPath in createdPaths) { - parentNoteId = createdPaths[parentPath]; - } - else { - // tar allows creating out of order records - i.e. file in a directory can appear in the tar stream before actual directory - // (out-of-order-directory-records.tar in test set) - parentNoteId = saveDirectory(parentPath); - } - } - - return parentNoteId; - } - - function getNoteId(noteMeta, filePath) { - const filePathNoExt = utils.removeTextFileExtension(filePath); - - if (filePathNoExt in createdPaths) { - return createdPaths[filePathNoExt]; - } - - const noteId = noteMeta ? getNewNoteId(noteMeta.noteId) : utils.newEntityId(); - - createdPaths[filePathNoExt] = noteId; - - return noteId; - } - - function detectFileTypeAndMime(taskContext, filePath) { - const mime = mimeService.getMime(filePath) || "application/octet-stream"; - const type = mimeService.getType(taskContext.data, mime); - - return { mime, type }; - } - - function saveAttributes(note, noteMeta) { - if (!noteMeta) { - return; - } - - for (const attr of noteMeta.attributes) { - attr.noteId = note.noteId; - - if (!attributeService.isAttributeType(attr.type)) { - log.error("Unrecognized attribute type " + attr.type); - continue; - } - - if (attr.type === 'relation' && ['internalLink', 'imageLink', 'relationMapLink', 'includeNoteLink'].includes(attr.name)) { - // these relations are created automatically and as such don't need to be duplicated in the import - continue; - } - - if (attr.type === 'relation') { - attr.value = getNewNoteId(attr.value); - } - - if (taskContext.data.safeImport && attributeService.isAttributeDangerous(attr.type, attr.name)) { - attr.name = 'disabled-' + attr.name; - } - - attributes.push(attr); - } - } - - function saveDirectory(filePath) { - const { parentNoteMeta, noteMeta } = getMeta(filePath); - - const noteId = getNoteId(noteMeta, filePath); - const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta); - const parentNoteId = getParentNoteId(filePath, parentNoteMeta); - - let note = repository.getNote(noteId); - - if (note) { - return; - } - - sql.transactional(() => { - ({note} = noteService.createNewNote({ - parentNoteId: parentNoteId, - title: noteTitle, - content: '', - noteId: noteId, - type: noteMeta ? noteMeta.type : 'text', - mime: noteMeta ? noteMeta.mime : 'text/html', - prefix: noteMeta ? noteMeta.prefix : '', - isExpanded: noteMeta ? noteMeta.isExpanded : false, - isProtected: importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable(), - })); - - saveAttributes(note, noteMeta); - }); - - if (!firstNote) { - firstNote = note; - } - - return noteId; - } - - function getNoteIdFromRelativeUrl(url, filePath) { - while (url.startsWith("./")) { - url = url.substr(2); - } - - let absUrl = path.dirname(filePath); - - while (url.startsWith("../")) { - absUrl = path.dirname(absUrl); - - url = url.substr(3); - } - - if (absUrl === '.') { - absUrl = ''; - } - - absUrl += (absUrl.length > 0 ? '/' : '') + url; - - const {noteMeta} = getMeta(absUrl); - const targetNoteId = getNoteId(noteMeta, absUrl); - return targetNoteId; - } - - function saveNote(filePath, content) { - const {parentNoteMeta, noteMeta} = getMeta(filePath); - - if (noteMeta && noteMeta.noImport) { - return; - } - - const noteId = getNoteId(noteMeta, filePath); - const parentNoteId = getParentNoteId(filePath, parentNoteMeta); - - if (noteMeta && noteMeta.isClone) { - new Branch({ - noteId, - parentNoteId, - isExpanded: noteMeta.isExpanded, - prefix: noteMeta.prefix, - notePosition: noteMeta.notePosition - }).save(); - - return; - } - - const {type, mime} = noteMeta ? noteMeta : detectFileTypeAndMime(taskContext, filePath); - - if (type !== 'file' && type !== 'image') { - content = content.toString("UTF-8"); - } - - if ((noteMeta && noteMeta.format === 'markdown') - || (!noteMeta && taskContext.data.textImportedAsText && ['text/markdown', 'text/x-markdown'].includes(mime))) { - const parsed = mdReader.parse(content); - content = mdWriter.render(parsed); - } - - const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta); - - if (type === 'text') { - function isUrlAbsolute(url) { - return /^(?:[a-z]+:)?\/\//i.test(url); - } - - content = htmlSanitizer.sanitize(content); - - content = content.replace(/]*>/gis, ""); - content = content.replace(/<\/body>.*<\/html>/gis, ""); - - content = content.replace(/src="([^"]*)"/g, (match, url) => { - url = decodeURIComponent(url); - - if (isUrlAbsolute(url) || url.startsWith("/")) { - return match; - } - - const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); - - return `src="api/images/${targetNoteId}/${path.basename(url)}"`; - }); - - content = content.replace(/href="([^"]*)"/g, (match, url) => { - url = decodeURIComponent(url); - - if (isUrlAbsolute(url)) { - return match; - } - - const targetNoteId = getNoteIdFromRelativeUrl(url, filePath); - - return `href="#root/${targetNoteId}"`; - }); - - content = content.replace(/

([^<]*)<\/h1>/gi, (match, text) => { - if (noteTitle.trim() === text.trim()) { - return ""; // remove whole H1 tag - } - else { - return match; - } - }); - } - - if (type === 'relation-map' && noteMeta) { - const relationMapLinks = (noteMeta.attributes || []) - .filter(attr => attr.type === 'relation' && attr.name === 'relationMapLink'); - - // this will replace relation map links - for (const link of relationMapLinks) { - // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters - content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value)); - } - } - - let note = repository.getNote(noteId); - - if (note) { - note.setContent(content); - } - else { - ({note} = noteService.createNewNote({ - parentNoteId: parentNoteId, - title: noteTitle, - content: content, - noteId, - type, - mime, - prefix: noteMeta ? noteMeta.prefix : '', - isExpanded: noteMeta ? noteMeta.isExpanded : false, - notePosition: noteMeta ? noteMeta.notePosition : false, - isProtected: importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable(), - })); - - saveAttributes(note, noteMeta); - - if (!firstNote) { - firstNote = note; - } - - if (type === 'text') { - filePath = utils.removeTextFileExtension(filePath); - } - } - - if (!noteMeta && (type === 'file' || type === 'image')) { - attributes.push({ - noteId, - type: 'label', - name: 'originalFileName', - value: path.basename(filePath) - }); - } - } - - /** @return {string} path without leading or trailing slash and backslashes converted to forward ones*/ - function normalizeFilePath(filePath) { - filePath = filePath.replace(/\\/g, "/"); - - if (filePath.startsWith("/")) { - filePath = filePath.substr(1); - } - - if (filePath.endsWith("/")) { - filePath = filePath.substr(0, filePath.length - 1); - } - - return filePath; - } - - extract.on('entry', function(header, stream, next) { - const chunks = []; - - stream.on("data", function (chunk) { - chunks.push(chunk); - }); - - // header is the tar header - // stream is the content body (might be an empty stream) - // call next when you are done with this entry - - stream.on('end', function() { - const filePath = normalizeFilePath(header.name); - - const content = Buffer.concat(chunks); - - if (filePath === '!!!meta.json') { - metaFile = JSON.parse(content.toString("UTF-8")); - } - else if (header.type === 'directory') { - saveDirectory(filePath); - } - else if (header.type === 'file') { - saveNote(filePath, content); - } - else { - log.info("Ignoring tar import entry with type " + header.type); - } - - taskContext.increaseProgressCount(); - - next(); // ready for next entry - }); - - stream.resume(); // just auto drain the stream - }); - - return new Promise(resolve => { - extract.on('finish', function() { - const createdNoteIds = {}; - - for (const path in createdPaths) { - const noteId = createdPaths[path]; - - createdNoteIds[noteId] = true; - } - - for (const noteId in createdNoteIds) { // now the noteIds are unique - noteService.scanForLinks(repository.getNote(noteId)); - - if (!metaFile) { - // if there's no meta file then the notes are created based on the order in that tar file but that - // is usually quite random so we sort the notes in the way they would appear in the file manager - treeService.sortNotesAlphabetically(noteId, true); - } - - taskContext.increaseProgressCount(); - } - - // we're saving attributes and links only now so that all relation and link target notes - // are already in the database (we don't want to have "broken" relations, not even transitionally) - for (const attr of attributes) { - if (attr.type !== 'relation' || attr.value in createdNoteIds) { - new Attribute(attr).save(); - } - else { - log.info("Relation not imported since target note doesn't exist: " + JSON.stringify(attr)); - } - } - - resolve(firstNote); - }); - - const bufferStream = new stream.PassThrough(); - bufferStream.end(fileBuffer); - - bufferStream.pipe(extract); - }); -} - -module.exports = { - importTar -};