mirror of
https://github.com/zadam/trilium.git
synced 2025-03-01 14:22:32 +01:00
251 lines
6.5 KiB
JavaScript
251 lines
6.5 KiB
JavaScript
const noteCache = require('./note_cache');
|
|
const noteCacheService = require('./note_cache_service.js');
|
|
const dateUtils = require('../date_utils');
|
|
|
|
const IGNORED_ATTR_NAMES = [
|
|
"includenotelink",
|
|
"internallink",
|
|
"imagelink",
|
|
"relationmaplink"
|
|
];
|
|
|
|
/**
|
|
* @param {Note} note
|
|
*/
|
|
function buildRewardMap(note) {
|
|
const map = {};
|
|
|
|
function addToRewardMap(text, baseReward) {
|
|
if (!text) {
|
|
return;
|
|
}
|
|
|
|
for (const word of text.toLowerCase().split(/\W+/)) {
|
|
if (word) {
|
|
map[word] = map[word] || 0;
|
|
|
|
// reward grows with the length of matched string
|
|
map[word] += baseReward * Math.sqrt(word.length);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (const ancestorNote of note.ancestors) {
|
|
if (ancestorNote.isDecrypted) {
|
|
addToRewardMap(ancestorNote.title, 0.3);
|
|
}
|
|
|
|
for (const branch of ancestorNote.parentBranches) {
|
|
addToRewardMap(branch.prefix, 0.3);
|
|
}
|
|
}
|
|
|
|
addToRewardMap(note.type, 0.2);
|
|
addToRewardMap(trimMime(note.mime), 0.3);
|
|
|
|
if (note.isDecrypted) {
|
|
addToRewardMap(note.title, 1);
|
|
}
|
|
|
|
for (const branch of note.parentBranches) {
|
|
addToRewardMap(branch.prefix, 1);
|
|
}
|
|
|
|
for (const attr of note.attributes) {
|
|
const reward = note.noteId === attr.noteId ? 0.8 : 0.5;
|
|
|
|
if (!IGNORED_ATTR_NAMES.includes(attr.name)) {
|
|
addToRewardMap(attr.name, reward);
|
|
}
|
|
|
|
addToRewardMap(attr.value, reward);
|
|
}
|
|
|
|
return map;
|
|
}
|
|
|
|
const mimeCache = {};
|
|
|
|
function trimMime(mime) {
|
|
if (!mime) {
|
|
return;
|
|
}
|
|
|
|
if (!(mime in mimeCache)) {
|
|
const chunks = mime.split('/');
|
|
|
|
let str = "";
|
|
|
|
if (chunks.length >= 2) {
|
|
// we're not interested in 'text/' or 'application/' prefix
|
|
str = chunks[1];
|
|
|
|
if (str.startsWith('-x')) {
|
|
str = str.substr(2);
|
|
}
|
|
}
|
|
|
|
mimeCache[mime] = str;
|
|
}
|
|
|
|
return mimeCache[mime];
|
|
}
|
|
|
|
function buildDateLimits(baseNote) {
|
|
const dateCreatedTs = dateUtils.parseDateTime(baseNote.utcDateCreated);
|
|
|
|
return {
|
|
minDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 1800)),
|
|
minExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 5)),
|
|
maxExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 5)),
|
|
maxDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 1800)),
|
|
};
|
|
}
|
|
|
|
const wordCache = {};
|
|
|
|
function findSimilarNotes(noteId) {
|
|
const results = [];
|
|
let i = 0;
|
|
|
|
const baseNote = noteCache.notes[noteId];
|
|
|
|
if (!baseNote) {
|
|
return [];
|
|
}
|
|
|
|
const dateLimits = buildDateLimits(baseNote);
|
|
const rewardMap = buildRewardMap(baseNote);
|
|
const ancestorRewardCache = {};
|
|
const ancestorNoteIds = new Set(baseNote.ancestors.map(note => note.noteId));
|
|
|
|
function gatherRewards(text, factor = 1) {
|
|
if (!text) {
|
|
return 0;
|
|
}
|
|
|
|
let words = wordCache[text];
|
|
|
|
if (!words) {
|
|
words = wordCache[text] = text.toLowerCase().split(/\W+/);
|
|
}
|
|
|
|
let counter = 0;
|
|
|
|
for (const word of words) {
|
|
counter += rewardMap[word] * factor || 0;
|
|
}
|
|
|
|
return counter;
|
|
}
|
|
|
|
function gatherAncestorRewards(note) {
|
|
if (!(note.noteId in ancestorRewardCache)) {
|
|
let score = 0;
|
|
|
|
for (const parentNote of note.parents) {
|
|
if (!ancestorNoteIds.has(parentNote.noteId)) {
|
|
if (parentNote.isDecrypted) {
|
|
score += gatherRewards(parentNote.title, 0.5);
|
|
}
|
|
|
|
for (const branch of parentNote.parentBranches) {
|
|
score += gatherRewards(branch.prefix, 0.5)
|
|
+ gatherAncestorRewards(branch.parentNote);
|
|
}
|
|
}
|
|
}
|
|
|
|
ancestorRewardCache[note.noteId] = score;
|
|
}
|
|
|
|
return ancestorRewardCache[note.noteId];
|
|
}
|
|
|
|
function computeScore(candidateNote) {
|
|
let score = gatherRewards(candidateNote.type)
|
|
+ gatherRewards(trimMime(candidateNote.mime))
|
|
+ gatherAncestorRewards(candidateNote);
|
|
|
|
if (candidateNote.isDecrypted) {
|
|
score += gatherRewards(candidateNote.title);
|
|
}
|
|
|
|
for (const branch of candidateNote.parentBranches) {
|
|
score += gatherRewards(branch.prefix);
|
|
}
|
|
|
|
for (const attr of candidateNote.attributes) {
|
|
if (!IGNORED_ATTR_NAMES.includes(attr.name)) {
|
|
score += gatherRewards(attr.name);
|
|
}
|
|
|
|
score += gatherRewards(attr.value);
|
|
}
|
|
|
|
/**
|
|
* We want to improve standing of notes which have been created in similar time to each other since
|
|
* there's a good chance they are related.
|
|
*
|
|
* But there's an exception - if they were created really close to each other (withing few seconds) then
|
|
* they are probably part of the import and not created by hand - these OTOH should not benefit.
|
|
*/
|
|
const {utcDateCreated} = candidateNote;
|
|
|
|
if (utcDateCreated >= dateLimits.minDate && utcDateCreated <= dateLimits.maxDate
|
|
&& utcDateCreated < dateLimits.minExcludedDate && utcDateCreated > dateLimits.maxExcludedDate) {
|
|
|
|
score += 3;
|
|
}
|
|
|
|
return score;
|
|
}
|
|
|
|
for (const candidateNote of Object.values(noteCache.notes)) {
|
|
if (candidateNote.noteId === baseNote.noteId) {
|
|
continue;
|
|
}
|
|
|
|
let score = computeScore(candidateNote);
|
|
|
|
if (score >= 4) {
|
|
const notePath = noteCacheService.getSomePath(candidateNote);
|
|
|
|
// this takes care of note hoisting
|
|
if (!notePath) {
|
|
return;
|
|
}
|
|
|
|
if (noteCacheService.isNotePathArchived(notePath)) {
|
|
score -= 1; // archived penalization
|
|
}
|
|
|
|
results.push({score, notePath, noteId: candidateNote.noteId});
|
|
}
|
|
|
|
i++;
|
|
|
|
if (i % 1000 === 0) {
|
|
//await setImmediatePromise();
|
|
}
|
|
}
|
|
|
|
results.sort((a, b) => a.score > b.score ? -1 : 1);
|
|
|
|
return results.length > 200 ? results.slice(0, 200) : results;
|
|
}
|
|
|
|
/**
|
|
* Point of this is to break up long running sync process to avoid blocking
|
|
* see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/
|
|
*/
|
|
function setImmediatePromise() {
|
|
return new Promise((resolve) => {
|
|
setTimeout(() => resolve(), 0);
|
|
});
|
|
}
|
|
|
|
module.exports = {
|
|
findSimilarNotes
|
|
};
|