mirror of
				https://github.com/zadam/trilium.git
				synced 2025-11-04 13:39:01 +01:00 
			
		
		
		
	similar notes changes
This commit is contained in:
		
							parent
							
								
									f5216e4799
								
							
						
					
					
						commit
						eeacd8118f
					
				
							
								
								
									
										12
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										12
									
								
								package-lock.json
									
									
									
										generated
									
									
									
								
							@ -3143,9 +3143,9 @@
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    "electron": {
 | 
			
		||||
      "version": "9.3.0",
 | 
			
		||||
      "resolved": "https://registry.npmjs.org/electron/-/electron-9.3.0.tgz",
 | 
			
		||||
      "integrity": "sha512-7zPLEZ+kOjVJqfawMQ0vVuZZRqvZIeiID3tbjjbVybbxXIlFMpZ2jogoh7PV3rLrtm+dKRfu7Qc4E7ob1d0FqQ==",
 | 
			
		||||
      "version": "9.3.1",
 | 
			
		||||
      "resolved": "https://registry.npmjs.org/electron/-/electron-9.3.1.tgz",
 | 
			
		||||
      "integrity": "sha512-DScrhqBT4a54KfdF0EoipALpHmdQTn3m7SSCtbpTcEcG+UDUiXad2cOfW6DHeVH7N+CVDKDG12q2PhVJjXkFAA==",
 | 
			
		||||
      "dev": true,
 | 
			
		||||
      "requires": {
 | 
			
		||||
        "@electron/get": "^1.0.1",
 | 
			
		||||
@ -4184,9 +4184,9 @@
 | 
			
		||||
      }
 | 
			
		||||
    },
 | 
			
		||||
    "file-type": {
 | 
			
		||||
      "version": "15.0.0",
 | 
			
		||||
      "resolved": "https://registry.npmjs.org/file-type/-/file-type-15.0.0.tgz",
 | 
			
		||||
      "integrity": "sha512-l0JCuF5F7NIybCfa9G2H0lKhhGaf0z+HJyLOmB2feknY7/HBVNyD4PLesGKLGqznwyVXGNnfpIOr+Fvca6bOEg==",
 | 
			
		||||
      "version": "15.0.1",
 | 
			
		||||
      "resolved": "https://registry.npmjs.org/file-type/-/file-type-15.0.1.tgz",
 | 
			
		||||
      "integrity": "sha512-0LieQlSA3bWUdErNrxzxfI4rhsvNAVPBO06R8pTc1hp9SE6nhqlVyvhcaXoMmtXkBTPnQenbMPLW9X76hH76oQ==",
 | 
			
		||||
      "requires": {
 | 
			
		||||
        "readable-web-to-node-stream": "^2.0.0",
 | 
			
		||||
        "strtok3": "^6.0.3",
 | 
			
		||||
 | 
			
		||||
@ -41,7 +41,7 @@
 | 
			
		||||
    "electron-window-state": "5.0.3",
 | 
			
		||||
    "express": "4.17.1",
 | 
			
		||||
    "express-session": "1.17.1",
 | 
			
		||||
    "file-type": "15.0.0",
 | 
			
		||||
    "file-type": "15.0.1",
 | 
			
		||||
    "fs-extra": "9.0.1",
 | 
			
		||||
    "helmet": "4.1.1",
 | 
			
		||||
    "html": "1.0.0",
 | 
			
		||||
@ -66,7 +66,6 @@
 | 
			
		||||
    "semver": "7.3.2",
 | 
			
		||||
    "serve-favicon": "2.5.0",
 | 
			
		||||
    "session-file-store": "1.4.0",
 | 
			
		||||
    "string-similarity": "4.0.2",
 | 
			
		||||
    "striptags": "3.1.1",
 | 
			
		||||
    "turndown": "6.0.0",
 | 
			
		||||
    "turndown-plugin-gfm": "1.0.2",
 | 
			
		||||
@ -77,7 +76,7 @@
 | 
			
		||||
  },
 | 
			
		||||
  "devDependencies": {
 | 
			
		||||
    "cross-env": "7.0.2",
 | 
			
		||||
    "electron": "9.3.0",
 | 
			
		||||
    "electron": "9.3.1",
 | 
			
		||||
    "electron-builder": "22.8.0",
 | 
			
		||||
    "electron-packager": "15.1.0",
 | 
			
		||||
    "electron-rebuild": "2.0.3",
 | 
			
		||||
 | 
			
		||||
@ -141,7 +141,7 @@ export default class SimilarNotesWidget extends TabAwareWidget {
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            const $item = (await linkService.createNoteLink(similarNote.notePath.join("/")))
 | 
			
		||||
                .css("font-size", 24 * similarNote.coeff);
 | 
			
		||||
                .css("font-size", 24 * similarNote.score);
 | 
			
		||||
 | 
			
		||||
            $list.append($item);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
@ -3,9 +3,7 @@
 | 
			
		||||
const noteCache = require('./note_cache');
 | 
			
		||||
const hoistedNoteService = require('../hoisted_note');
 | 
			
		||||
const protectedSessionService = require('../protected_session');
 | 
			
		||||
const stringSimilarity = require('string-similarity');
 | 
			
		||||
const log = require('../log');
 | 
			
		||||
const dateUtils = require('../date_utils');
 | 
			
		||||
 | 
			
		||||
function isNotePathArchived(notePath) {
 | 
			
		||||
    const noteId = notePath[notePath.length - 1];
 | 
			
		||||
@ -175,87 +173,6 @@ function getNotePath(noteId) {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function evaluateSimilarity(sourceNote, candidateNote, dates, results) {
 | 
			
		||||
    let coeff = stringSimilarity.compareTwoStrings(sourceNote.flatText, candidateNote.flatText);
 | 
			
		||||
    const {utcDateCreated} = candidateNote;
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * We want to improve standing of notes which have been created in similar time to each other since
 | 
			
		||||
     * there's a good chance they are related.
 | 
			
		||||
     *
 | 
			
		||||
     * But there's an exception - if they were created really close to each other (withing few seconds) then
 | 
			
		||||
     * they are probably part of the import and not created by hand - these OTOH should not benefit.
 | 
			
		||||
     */
 | 
			
		||||
    if (utcDateCreated >= dates.minDate && utcDateCreated <= dates.maxDate
 | 
			
		||||
        && utcDateCreated < dates.minExcludedDate && utcDateCreated > dates.maxExcludedDate) {
 | 
			
		||||
 | 
			
		||||
        coeff += 0.3;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (coeff > 0.5) {
 | 
			
		||||
        const notePath = getSomePath(candidateNote);
 | 
			
		||||
 | 
			
		||||
        // this takes care of note hoisting
 | 
			
		||||
        if (!notePath) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (isNotePathArchived(notePath)) {
 | 
			
		||||
            coeff -= 0.2; // archived penalization
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        results.push({coeff, notePath, noteId: candidateNote.noteId});
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Point of this is to break up long running sync process to avoid blocking
 | 
			
		||||
 * see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/
 | 
			
		||||
 */
 | 
			
		||||
function setImmediatePromise() {
 | 
			
		||||
    return new Promise((resolve) => {
 | 
			
		||||
        setTimeout(() => resolve(), 0);
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function findSimilarNotes(noteId) {
 | 
			
		||||
    const results = [];
 | 
			
		||||
    let i = 0;
 | 
			
		||||
 | 
			
		||||
    const origNote = noteCache.notes[noteId];
 | 
			
		||||
 | 
			
		||||
    if (!origNote) {
 | 
			
		||||
        return [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const dateCreatedTs = dateUtils.parseDateTime(origNote.utcDateCreated);
 | 
			
		||||
 | 
			
		||||
    const dates = {
 | 
			
		||||
        minDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 1800)),
 | 
			
		||||
        minExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 5)),
 | 
			
		||||
        maxExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 5)),
 | 
			
		||||
        maxDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 1800)),
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    for (const note of Object.values(noteCache.notes)) {
 | 
			
		||||
        if (note.noteId === origNote.noteId) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        evaluateSimilarity(origNote, note, dates, results);
 | 
			
		||||
 | 
			
		||||
        i++;
 | 
			
		||||
 | 
			
		||||
        if (i % 200 === 0) {
 | 
			
		||||
            await setImmediatePromise();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    results.sort((a, b) => a.coeff > b.coeff ? -1 : 1);
 | 
			
		||||
 | 
			
		||||
    return results.length > 50 ? results.slice(0, 200) : results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @param noteId
 | 
			
		||||
 * @returns {boolean} - true if note exists (is not deleted) and is available in current note hoisting
 | 
			
		||||
@ -274,5 +191,5 @@ module.exports = {
 | 
			
		||||
    isAvailable,
 | 
			
		||||
    isArchived,
 | 
			
		||||
    isInAncestor,
 | 
			
		||||
    findSimilarNotes
 | 
			
		||||
    isNotePathArchived
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										180
									
								
								src/services/note_cache/similarity.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										180
									
								
								src/services/note_cache/similarity.js
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,180 @@
 | 
			
		||||
const noteCache = require('./note_cache');
 | 
			
		||||
const noteCacheService = require('./note_cache_service.js');
 | 
			
		||||
const dateUtils = require('../date_utils');
 | 
			
		||||
 | 
			
		||||
function computeScore(candidateNote, dates) {
 | 
			
		||||
    let score = 0;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    /**
 | 
			
		||||
     * We want to improve standing of notes which have been created in similar time to each other since
 | 
			
		||||
     * there's a good chance they are related.
 | 
			
		||||
     *
 | 
			
		||||
     * But there's an exception - if they were created really close to each other (withing few seconds) then
 | 
			
		||||
     * they are probably part of the import and not created by hand - these OTOH should not benefit.
 | 
			
		||||
     */
 | 
			
		||||
    const {utcDateCreated} = candidateNote;
 | 
			
		||||
 | 
			
		||||
    if (utcDateCreated >= dates.minDate && utcDateCreated <= dates.maxDate
 | 
			
		||||
        && utcDateCreated < dates.minExcludedDate && utcDateCreated > dates.maxExcludedDate) {
 | 
			
		||||
 | 
			
		||||
        score += 0.3;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return score;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function evaluateSimilarity(sourceNote, candidateNote, rewardMap, dates, results) {
 | 
			
		||||
    let score = computeScore(candidateNote, rewardMap, dates);
 | 
			
		||||
 | 
			
		||||
    if (score > 0.5) {
 | 
			
		||||
        const notePath = noteCacheService.getSomePath(candidateNote);
 | 
			
		||||
 | 
			
		||||
        // this takes care of note hoisting
 | 
			
		||||
        if (!notePath) {
 | 
			
		||||
            return;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (noteCacheService.isNotePathArchived(notePath)) {
 | 
			
		||||
            score -= 0.2; // archived penalization
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        results.push({score, notePath, noteId: candidateNote.noteId});
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * Point of this is to break up long running sync process to avoid blocking
 | 
			
		||||
 * see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/
 | 
			
		||||
 */
 | 
			
		||||
function setImmediatePromise() {
 | 
			
		||||
    return new Promise((resolve) => {
 | 
			
		||||
        setTimeout(() => resolve(), 0);
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
const IGNORED_ATTR_NAMES = [
 | 
			
		||||
    "includenotelink",
 | 
			
		||||
    "internallink",
 | 
			
		||||
    "imagelink",
 | 
			
		||||
    "relationmaplink"
 | 
			
		||||
];
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * @param {Note} note
 | 
			
		||||
 */
 | 
			
		||||
function buildRewardMap(note) {
 | 
			
		||||
    const map = {};
 | 
			
		||||
 | 
			
		||||
    for (const ancestorNote of note.ancestors) {
 | 
			
		||||
        updateMap(map, ancestorNote.title, 0.4);
 | 
			
		||||
 | 
			
		||||
        for (const branch of ancestorNote.parentBranches) {
 | 
			
		||||
            updateMap(map, branch.prefix, 0.4);
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    updateMap(map, note.type, 0.2);
 | 
			
		||||
    updateMap(map, processMime(note.mime), 0.3);
 | 
			
		||||
 | 
			
		||||
    updateMap(map, note.title, 1);
 | 
			
		||||
 | 
			
		||||
    for (const branch of note.parentBranches) {
 | 
			
		||||
        updateMap(map, branch.prefix, 1);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (const attr of note.attributes) {
 | 
			
		||||
        const reward = note.noteId === attr.noteId ? 0.8 : 0.5;
 | 
			
		||||
 | 
			
		||||
        if (!IGNORED_ATTR_NAMES.includes(attr.name)) {
 | 
			
		||||
            updateMap(map, attr.name, reward);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        updateMap(map, attr.value, reward);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return map;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function processMime(mime) {
 | 
			
		||||
    if (!mime) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const chunks = mime.split('/');
 | 
			
		||||
 | 
			
		||||
    if (chunks.length < 2) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // we're not interested in 'text/' or 'application/' prefix
 | 
			
		||||
    let str = chunks[1];
 | 
			
		||||
 | 
			
		||||
    if (str.startsWith('-x')) {
 | 
			
		||||
        str = str.substr(2);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return str;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function updateMap(map, text, baseReward) {
 | 
			
		||||
    if (!text) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    for (const word of text.split(/\W+/)) {
 | 
			
		||||
        map[word] = map[word] || 0;
 | 
			
		||||
 | 
			
		||||
        // reward grows with the length of matched string
 | 
			
		||||
        map[word] += baseReward * Math.sqrt(word.length);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
function tokenize(str) {
 | 
			
		||||
    return ;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async function findSimilarNotes(noteId) {
 | 
			
		||||
    const results = [];
 | 
			
		||||
    let i = 0;
 | 
			
		||||
 | 
			
		||||
    const baseNote = noteCache.notes[noteId];
 | 
			
		||||
 | 
			
		||||
    if (!baseNote) {
 | 
			
		||||
        return [];
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const dateCreatedTs = dateUtils.parseDateTime(baseNote.utcDateCreated);
 | 
			
		||||
 | 
			
		||||
    const dates = {
 | 
			
		||||
        minDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 1800)),
 | 
			
		||||
        minExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 5)),
 | 
			
		||||
        maxExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 5)),
 | 
			
		||||
        maxDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 1800)),
 | 
			
		||||
    };
 | 
			
		||||
 | 
			
		||||
    const rewardMap = buildRewardMap(baseNote);
 | 
			
		||||
 | 
			
		||||
    for (const candidateNote of Object.values(noteCache.notes)) {
 | 
			
		||||
        if (candidateNote.noteId === baseNote.noteId) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        evaluateSimilarity(baseNote, candidateNote, rewardMap, dates, results);
 | 
			
		||||
 | 
			
		||||
        i++;
 | 
			
		||||
 | 
			
		||||
        if (i % 200 === 0) {
 | 
			
		||||
            await setImmediatePromise();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    results.sort((a, b) => a.score > b.score ? -1 : 1);
 | 
			
		||||
 | 
			
		||||
    return results.length > 50 ? results.slice(0, 200) : results;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
module.exports = {
 | 
			
		||||
    findSimilarNotes
 | 
			
		||||
};
 | 
			
		||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user