removed tar import

2025-12-22 07:14:24 +01:00 · 2020-07-09 21:46:33 +02:00 · 2020-07-09 21:46:33 +02:00 · 055db32510
commit 055db32510
parent 054420225f
3 changed files with 2 additions and 452 deletions
--- a/package.json
+++ b/package.json
@ -23,7 +23,7 @@
    "test-es6": "node -r esm spec-es6/attribute_parser.spec.js "
  },
  "dependencies": {
-    "async-mutex": "0.2.3",
+    "async-mutex": "0.2.4",
    "axios": "0.19.2",
    "better-sqlite3": "^7.1.0",
    "body-parser": "1.19.0",
@ -66,7 +66,6 @@
    "serve-favicon": "2.5.0",
    "session-file-store": "1.4.0",
    "string-similarity": "4.0.1",
-    "tar-stream": "2.1.2",
    "turndown": "6.0.0",
    "turndown-plugin-gfm": "1.0.2",
    "unescape": "1.0.1",
--- a/src/routes/api/import.js
+++ b/src/routes/api/import.js
@ -3,7 +3,6 @@
 const repository = require('../../services/repository');
 const enexImportService = require('../../services/import/enex');
 const opmlImportService = require('../../services/import/opml');
-const tarImportService = require('../../services/import/tar');
 const zipImportService = require('../../services/import/zip');
 const singleImportService = require('../../services/import/single');
 const cls = require('../../services/cls');
@ -48,9 +47,7 @@ async function importToBranch(req) {
    const taskContext = TaskContext.getInstance(taskId, 'import', options);

    try {
-        if (extension === '.tar' && options.explodeArchives) {
-            note = await tarImportService.importTar(taskContext, file.buffer, parentNote);
-        } else if (extension === '.zip' && options.explodeArchives) {
+        if (extension === '.zip' && options.explodeArchives) {
            note = await zipImportService.importZip(taskContext, file.buffer, parentNote);
        } else if (extension === '.opml' && options.explodeArchives) {
            note = await opmlImportService.importOpml(taskContext, file.buffer, parentNote);
--- a/src/services/import/tar.js
+++ b/src/services/import/tar.js
@ -1,446 +0,0 @@
-"use strict";
-
-const Attribute = require('../../entities/attribute');
-const utils = require('../../services/utils');
-const log = require('../../services/log');
-const repository = require('../../services/repository');
-const noteService = require('../../services/notes');
-const attributeService = require('../../services/attributes');
-const Branch = require('../../entities/branch');
-const tar = require('tar-stream');
-const stream = require('stream');
-const path = require('path');
-const commonmark = require('commonmark');
-const TaskContext = require('../task_context.js');
-const protectedSessionService = require('../protected_session');
-const mimeService = require("./mime");
-const sql = require("../sql");
-const treeService = require("../tree");
-const htmlSanitizer = require("../html_sanitizer");
-
-/**
- * @param {TaskContext} taskContext
- * @param {Buffer} fileBuffer
- * @param {Note} importRootNote
- * @return {Promise<*>}
- */
-async function importTar(taskContext, fileBuffer, importRootNote) {
-    // maps from original noteId (in tar file) to newly generated noteId
-    const noteIdMap = {};
-    const attributes = [];
-    // path => noteId
-    const createdPaths = { '/': importRootNote.noteId, '\\': importRootNote.noteId };
-    const mdReader = new commonmark.Parser();
-    const mdWriter = new commonmark.HtmlRenderer();
-    let metaFile = null;
-    let firstNote = null;
-
-    const extract = tar.extract();
-
-    function getNewNoteId(origNoteId) {
-        // in case the original noteId is empty. This probably shouldn't happen, but still good to have this precaution
-        if (!origNoteId.trim()) {
-            return "";
-        }
-
-        if (!noteIdMap[origNoteId]) {
-            noteIdMap[origNoteId] = utils.newEntityId();
-        }
-
-        return noteIdMap[origNoteId];
-    }
-
-    function getMeta(filePath) {
-        if (!metaFile) {
-            return {};
-        }
-
-        const pathSegments = filePath.split(/[\/\\]/g);
-
-        let cursor = {
-            isImportRoot: true,
-            children: metaFile.files
-        };
-
-        let parent;
-
-        for (const segment of pathSegments) {
-            if (!cursor || !cursor.children || cursor.children.length === 0) {
-                return {};
-            }
-
-            parent = cursor;
-            cursor = cursor.children.find(file => file.dataFileName === segment || file.dirFileName === segment);
-        }
-
-        return {
-            parentNoteMeta: parent,
-            noteMeta: cursor
-        };
-    }
-
-    function getParentNoteId(filePath, parentNoteMeta) {
-        let parentNoteId;
-
-        if (parentNoteMeta) {
-            parentNoteId = parentNoteMeta.isImportRoot ? importRootNote.noteId : getNewNoteId(parentNoteMeta.noteId);
-        }
-        else {
-            const parentPath = path.dirname(filePath);
-
-            if (parentPath === '.') {
-                parentNoteId = importRootNote.noteId;
-            }
-            else if (parentPath in createdPaths) {
-                parentNoteId = createdPaths[parentPath];
-            }
-            else {
-                // tar allows creating out of order records - i.e. file in a directory can appear in the tar stream before actual directory
-                // (out-of-order-directory-records.tar in test set)
-                parentNoteId = saveDirectory(parentPath);
-            }
-        }
-
-        return parentNoteId;
-    }
-
-    function getNoteId(noteMeta, filePath) {
-        const filePathNoExt = utils.removeTextFileExtension(filePath);
-
-        if (filePathNoExt in createdPaths) {
-            return createdPaths[filePathNoExt];
-        }
-
-        const noteId = noteMeta ? getNewNoteId(noteMeta.noteId) : utils.newEntityId();
-
-        createdPaths[filePathNoExt] = noteId;
-
-        return noteId;
-    }
-
-    function detectFileTypeAndMime(taskContext, filePath) {
-        const mime = mimeService.getMime(filePath) || "application/octet-stream";
-        const type = mimeService.getType(taskContext.data, mime);
-
-        return { mime, type };
-    }
-
-    function saveAttributes(note, noteMeta) {
-        if (!noteMeta) {
-            return;
-        }
-
-        for (const attr of noteMeta.attributes) {
-            attr.noteId = note.noteId;
-
-            if (!attributeService.isAttributeType(attr.type)) {
-                log.error("Unrecognized attribute type " + attr.type);
-                continue;
-            }
-
-            if (attr.type === 'relation' && ['internalLink', 'imageLink', 'relationMapLink', 'includeNoteLink'].includes(attr.name)) {
-                // these relations are created automatically and as such don't need to be duplicated in the import
-                continue;
-            }
-
-            if (attr.type === 'relation') {
-                attr.value = getNewNoteId(attr.value);
-            }
-
-            if (taskContext.data.safeImport && attributeService.isAttributeDangerous(attr.type, attr.name)) {
-                attr.name = 'disabled-' + attr.name;
-            }
-
-            attributes.push(attr);
-        }
-    }
-
-    function saveDirectory(filePath) {
-        const { parentNoteMeta, noteMeta } = getMeta(filePath);
-
-        const noteId = getNoteId(noteMeta, filePath);
-        const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta);
-        const parentNoteId = getParentNoteId(filePath, parentNoteMeta);
-
-        let note = repository.getNote(noteId);
-
-        if (note) {
-            return;
-        }
-
-        sql.transactional(() => {
-            ({note} = noteService.createNewNote({
-                parentNoteId: parentNoteId,
-                title: noteTitle,
-                content: '',
-                noteId: noteId,
-                type: noteMeta ? noteMeta.type : 'text',
-                mime: noteMeta ? noteMeta.mime : 'text/html',
-                prefix: noteMeta ? noteMeta.prefix : '',
-                isExpanded: noteMeta ? noteMeta.isExpanded : false,
-                isProtected: importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable(),
-            }));
-
-            saveAttributes(note, noteMeta);
-        });
-
-        if (!firstNote) {
-            firstNote = note;
-        }
-
-        return noteId;
-    }
-
-    function getNoteIdFromRelativeUrl(url, filePath) {
-        while (url.startsWith("./")) {
-            url = url.substr(2);
-        }
-
-        let absUrl = path.dirname(filePath);
-
-        while (url.startsWith("../")) {
-            absUrl = path.dirname(absUrl);
-
-            url = url.substr(3);
-        }
-
-        if (absUrl === '.') {
-            absUrl = '';
-        }
-
-        absUrl += (absUrl.length > 0 ? '/' : '') + url;
-
-        const {noteMeta} = getMeta(absUrl);
-        const targetNoteId = getNoteId(noteMeta, absUrl);
-        return targetNoteId;
-    }
-
-    function saveNote(filePath, content) {
-        const {parentNoteMeta, noteMeta} = getMeta(filePath);
-
-        if (noteMeta && noteMeta.noImport) {
-            return;
-        }
-
-        const noteId = getNoteId(noteMeta, filePath);
-        const parentNoteId = getParentNoteId(filePath, parentNoteMeta);
-
-        if (noteMeta && noteMeta.isClone) {
-            new Branch({
-                noteId,
-                parentNoteId,
-                isExpanded: noteMeta.isExpanded,
-                prefix: noteMeta.prefix,
-                notePosition: noteMeta.notePosition
-            }).save();
-
-            return;
-        }
-
-        const {type, mime} = noteMeta ? noteMeta : detectFileTypeAndMime(taskContext, filePath);
-
-        if (type !== 'file' && type !== 'image') {
-            content = content.toString("UTF-8");
-        }
-
-        if ((noteMeta && noteMeta.format === 'markdown')
-            || (!noteMeta && taskContext.data.textImportedAsText && ['text/markdown', 'text/x-markdown'].includes(mime))) {
-            const parsed = mdReader.parse(content);
-            content = mdWriter.render(parsed);
-        }
-
-        const noteTitle = utils.getNoteTitle(filePath, taskContext.data.replaceUnderscoresWithSpaces, noteMeta);
-
-        if (type === 'text') {
-            function isUrlAbsolute(url) {
-                return /^(?:[a-z]+:)?\/\//i.test(url);
-            }
-
-            content = htmlSanitizer.sanitize(content);
-
-            content = content.replace(/<html.*<body[^>]*>/gis, "");
-            content = content.replace(/<\/body>.*<\/html>/gis, "");
-
-            content = content.replace(/src="([^"]*)"/g, (match, url) => {
-                url = decodeURIComponent(url);
-
-                if (isUrlAbsolute(url) || url.startsWith("/")) {
-                    return match;
-                }
-
-                const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
-
-                return `src="api/images/${targetNoteId}/${path.basename(url)}"`;
-            });
-
-            content = content.replace(/href="([^"]*)"/g, (match, url) => {
-                url = decodeURIComponent(url);
-
-                if (isUrlAbsolute(url)) {
-                    return match;
-                }
-
-                const targetNoteId = getNoteIdFromRelativeUrl(url, filePath);
-
-                return `href="#root/${targetNoteId}"`;
-            });
-
-            content = content.replace(/<h1>([^<]*)<\/h1>/gi, (match, text) => {
-                if (noteTitle.trim() === text.trim()) {
-                    return ""; // remove whole H1 tag
-                }
-                else {
-                    return match;
-                }
-            });
-        }
-
-        if (type === 'relation-map' && noteMeta) {
-            const relationMapLinks = (noteMeta.attributes || [])
-                .filter(attr => attr.type === 'relation' && attr.name === 'relationMapLink');
-
-            // this will replace relation map links
-            for (const link of relationMapLinks) {
-                // no need to escape the regexp find string since it's a noteId which doesn't contain any special characters
-                content = content.replace(new RegExp(link.value, "g"), getNewNoteId(link.value));
-            }
-        }
-
-        let note = repository.getNote(noteId);
-
-        if (note) {
-            note.setContent(content);
-        }
-        else {
-            ({note} = noteService.createNewNote({
-                parentNoteId: parentNoteId,
-                title: noteTitle,
-                content: content,
-                noteId,
-                type,
-                mime,
-                prefix: noteMeta ? noteMeta.prefix : '',
-                isExpanded: noteMeta ? noteMeta.isExpanded : false,
-                notePosition: noteMeta ? noteMeta.notePosition : false,
-                isProtected: importRootNote.isProtected && protectedSessionService.isProtectedSessionAvailable(),
-            }));
-
-            saveAttributes(note, noteMeta);
-
-            if (!firstNote) {
-                firstNote = note;
-            }
-
-            if (type === 'text') {
-                filePath = utils.removeTextFileExtension(filePath);
-            }
-        }
-
-        if (!noteMeta && (type === 'file' || type === 'image')) {
-            attributes.push({
-                noteId,
-                type: 'label',
-                name: 'originalFileName',
-                value: path.basename(filePath)
-            });
-        }
-    }
-
-    /** @return {string} path without leading or trailing slash and backslashes converted to forward ones*/
-    function normalizeFilePath(filePath) {
-        filePath = filePath.replace(/\\/g, "/");
-
-        if (filePath.startsWith("/")) {
-            filePath = filePath.substr(1);
-        }
-
-        if (filePath.endsWith("/")) {
-            filePath = filePath.substr(0, filePath.length - 1);
-        }
-
-        return filePath;
-    }
-
-    extract.on('entry', function(header, stream, next) {
-        const chunks = [];
-
-        stream.on("data", function (chunk) {
-            chunks.push(chunk);
-        });
-
-        // header is the tar header
-        // stream is the content body (might be an empty stream)
-        // call next when you are done with this entry
-
-        stream.on('end', function() {
-            const filePath = normalizeFilePath(header.name);
-
-            const content = Buffer.concat(chunks);
-
-            if (filePath === '!!!meta.json') {
-                metaFile = JSON.parse(content.toString("UTF-8"));
-            }
-            else if (header.type === 'directory') {
-                saveDirectory(filePath);
-            }
-            else if (header.type === 'file') {
-                saveNote(filePath, content);
-            }
-            else {
-                log.info("Ignoring tar import entry with type " + header.type);
-            }
-
-            taskContext.increaseProgressCount();
-
-            next(); // ready for next entry
-        });
-
-        stream.resume(); // just auto drain the stream
-    });
-
-    return new Promise(resolve => {
-        extract.on('finish', function() {
-            const createdNoteIds = {};
-
-            for (const path in createdPaths) {
-                const noteId = createdPaths[path];
-
-                createdNoteIds[noteId] = true;
-            }
-
-            for (const noteId in createdNoteIds) { // now the noteIds are unique
-                noteService.scanForLinks(repository.getNote(noteId));
-
-                if (!metaFile) {
-                    // if there's no meta file then the notes are created based on the order in that tar file but that
-                    // is usually quite random so we sort the notes in the way they would appear in the file manager
-                    treeService.sortNotesAlphabetically(noteId, true);
-                }
-
-                taskContext.increaseProgressCount();
-            }
-
-            // we're saving attributes and links only now so that all relation and link target notes
-            // are already in the database (we don't want to have "broken" relations, not even transitionally)
-            for (const attr of attributes) {
-                if (attr.type !== 'relation' || attr.value in createdNoteIds) {
-                    new Attribute(attr).save();
-                }
-                else {
-                    log.info("Relation not imported since target note doesn't exist: " + JSON.stringify(attr));
-                }
-            }
-
-            resolve(firstNote);
-        });
-
-        const bufferStream = new stream.PassThrough();
-        bufferStream.end(fileBuffer);
-
-        bufferStream.pipe(extract);
-    });
-}
-
-module.exports = {
-    importTar
-};