mirror of
https://github.com/zadam/trilium.git
synced 2025-03-01 14:22:32 +01:00
similar notes changes
This commit is contained in:
parent
f5216e4799
commit
eeacd8118f
12
package-lock.json
generated
12
package-lock.json
generated
@ -3143,9 +3143,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"electron": {
|
"electron": {
|
||||||
"version": "9.3.0",
|
"version": "9.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/electron/-/electron-9.3.0.tgz",
|
"resolved": "https://registry.npmjs.org/electron/-/electron-9.3.1.tgz",
|
||||||
"integrity": "sha512-7zPLEZ+kOjVJqfawMQ0vVuZZRqvZIeiID3tbjjbVybbxXIlFMpZ2jogoh7PV3rLrtm+dKRfu7Qc4E7ob1d0FqQ==",
|
"integrity": "sha512-DScrhqBT4a54KfdF0EoipALpHmdQTn3m7SSCtbpTcEcG+UDUiXad2cOfW6DHeVH7N+CVDKDG12q2PhVJjXkFAA==",
|
||||||
"dev": true,
|
"dev": true,
|
||||||
"requires": {
|
"requires": {
|
||||||
"@electron/get": "^1.0.1",
|
"@electron/get": "^1.0.1",
|
||||||
@ -4184,9 +4184,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"file-type": {
|
"file-type": {
|
||||||
"version": "15.0.0",
|
"version": "15.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/file-type/-/file-type-15.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/file-type/-/file-type-15.0.1.tgz",
|
||||||
"integrity": "sha512-l0JCuF5F7NIybCfa9G2H0lKhhGaf0z+HJyLOmB2feknY7/HBVNyD4PLesGKLGqznwyVXGNnfpIOr+Fvca6bOEg==",
|
"integrity": "sha512-0LieQlSA3bWUdErNrxzxfI4rhsvNAVPBO06R8pTc1hp9SE6nhqlVyvhcaXoMmtXkBTPnQenbMPLW9X76hH76oQ==",
|
||||||
"requires": {
|
"requires": {
|
||||||
"readable-web-to-node-stream": "^2.0.0",
|
"readable-web-to-node-stream": "^2.0.0",
|
||||||
"strtok3": "^6.0.3",
|
"strtok3": "^6.0.3",
|
||||||
|
@ -41,7 +41,7 @@
|
|||||||
"electron-window-state": "5.0.3",
|
"electron-window-state": "5.0.3",
|
||||||
"express": "4.17.1",
|
"express": "4.17.1",
|
||||||
"express-session": "1.17.1",
|
"express-session": "1.17.1",
|
||||||
"file-type": "15.0.0",
|
"file-type": "15.0.1",
|
||||||
"fs-extra": "9.0.1",
|
"fs-extra": "9.0.1",
|
||||||
"helmet": "4.1.1",
|
"helmet": "4.1.1",
|
||||||
"html": "1.0.0",
|
"html": "1.0.0",
|
||||||
@ -66,7 +66,6 @@
|
|||||||
"semver": "7.3.2",
|
"semver": "7.3.2",
|
||||||
"serve-favicon": "2.5.0",
|
"serve-favicon": "2.5.0",
|
||||||
"session-file-store": "1.4.0",
|
"session-file-store": "1.4.0",
|
||||||
"string-similarity": "4.0.2",
|
|
||||||
"striptags": "3.1.1",
|
"striptags": "3.1.1",
|
||||||
"turndown": "6.0.0",
|
"turndown": "6.0.0",
|
||||||
"turndown-plugin-gfm": "1.0.2",
|
"turndown-plugin-gfm": "1.0.2",
|
||||||
@ -77,7 +76,7 @@
|
|||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"cross-env": "7.0.2",
|
"cross-env": "7.0.2",
|
||||||
"electron": "9.3.0",
|
"electron": "9.3.1",
|
||||||
"electron-builder": "22.8.0",
|
"electron-builder": "22.8.0",
|
||||||
"electron-packager": "15.1.0",
|
"electron-packager": "15.1.0",
|
||||||
"electron-rebuild": "2.0.3",
|
"electron-rebuild": "2.0.3",
|
||||||
|
@ -141,7 +141,7 @@ export default class SimilarNotesWidget extends TabAwareWidget {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const $item = (await linkService.createNoteLink(similarNote.notePath.join("/")))
|
const $item = (await linkService.createNoteLink(similarNote.notePath.join("/")))
|
||||||
.css("font-size", 24 * similarNote.coeff);
|
.css("font-size", 24 * similarNote.score);
|
||||||
|
|
||||||
$list.append($item);
|
$list.append($item);
|
||||||
}
|
}
|
||||||
|
@ -3,9 +3,7 @@
|
|||||||
const noteCache = require('./note_cache');
|
const noteCache = require('./note_cache');
|
||||||
const hoistedNoteService = require('../hoisted_note');
|
const hoistedNoteService = require('../hoisted_note');
|
||||||
const protectedSessionService = require('../protected_session');
|
const protectedSessionService = require('../protected_session');
|
||||||
const stringSimilarity = require('string-similarity');
|
|
||||||
const log = require('../log');
|
const log = require('../log');
|
||||||
const dateUtils = require('../date_utils');
|
|
||||||
|
|
||||||
function isNotePathArchived(notePath) {
|
function isNotePathArchived(notePath) {
|
||||||
const noteId = notePath[notePath.length - 1];
|
const noteId = notePath[notePath.length - 1];
|
||||||
@ -175,87 +173,6 @@ function getNotePath(noteId) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function evaluateSimilarity(sourceNote, candidateNote, dates, results) {
|
|
||||||
let coeff = stringSimilarity.compareTwoStrings(sourceNote.flatText, candidateNote.flatText);
|
|
||||||
const {utcDateCreated} = candidateNote;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* We want to improve standing of notes which have been created in similar time to each other since
|
|
||||||
* there's a good chance they are related.
|
|
||||||
*
|
|
||||||
* But there's an exception - if they were created really close to each other (withing few seconds) then
|
|
||||||
* they are probably part of the import and not created by hand - these OTOH should not benefit.
|
|
||||||
*/
|
|
||||||
if (utcDateCreated >= dates.minDate && utcDateCreated <= dates.maxDate
|
|
||||||
&& utcDateCreated < dates.minExcludedDate && utcDateCreated > dates.maxExcludedDate) {
|
|
||||||
|
|
||||||
coeff += 0.3;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (coeff > 0.5) {
|
|
||||||
const notePath = getSomePath(candidateNote);
|
|
||||||
|
|
||||||
// this takes care of note hoisting
|
|
||||||
if (!notePath) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (isNotePathArchived(notePath)) {
|
|
||||||
coeff -= 0.2; // archived penalization
|
|
||||||
}
|
|
||||||
|
|
||||||
results.push({coeff, notePath, noteId: candidateNote.noteId});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Point of this is to break up long running sync process to avoid blocking
|
|
||||||
* see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/
|
|
||||||
*/
|
|
||||||
function setImmediatePromise() {
|
|
||||||
return new Promise((resolve) => {
|
|
||||||
setTimeout(() => resolve(), 0);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async function findSimilarNotes(noteId) {
|
|
||||||
const results = [];
|
|
||||||
let i = 0;
|
|
||||||
|
|
||||||
const origNote = noteCache.notes[noteId];
|
|
||||||
|
|
||||||
if (!origNote) {
|
|
||||||
return [];
|
|
||||||
}
|
|
||||||
|
|
||||||
const dateCreatedTs = dateUtils.parseDateTime(origNote.utcDateCreated);
|
|
||||||
|
|
||||||
const dates = {
|
|
||||||
minDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 1800)),
|
|
||||||
minExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 5)),
|
|
||||||
maxExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 5)),
|
|
||||||
maxDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 1800)),
|
|
||||||
};
|
|
||||||
|
|
||||||
for (const note of Object.values(noteCache.notes)) {
|
|
||||||
if (note.noteId === origNote.noteId) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
evaluateSimilarity(origNote, note, dates, results);
|
|
||||||
|
|
||||||
i++;
|
|
||||||
|
|
||||||
if (i % 200 === 0) {
|
|
||||||
await setImmediatePromise();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
results.sort((a, b) => a.coeff > b.coeff ? -1 : 1);
|
|
||||||
|
|
||||||
return results.length > 50 ? results.slice(0, 200) : results;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param noteId
|
* @param noteId
|
||||||
* @returns {boolean} - true if note exists (is not deleted) and is available in current note hoisting
|
* @returns {boolean} - true if note exists (is not deleted) and is available in current note hoisting
|
||||||
@ -274,5 +191,5 @@ module.exports = {
|
|||||||
isAvailable,
|
isAvailable,
|
||||||
isArchived,
|
isArchived,
|
||||||
isInAncestor,
|
isInAncestor,
|
||||||
findSimilarNotes
|
isNotePathArchived
|
||||||
};
|
};
|
||||||
|
180
src/services/note_cache/similarity.js
Normal file
180
src/services/note_cache/similarity.js
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
const noteCache = require('./note_cache');
|
||||||
|
const noteCacheService = require('./note_cache_service.js');
|
||||||
|
const dateUtils = require('../date_utils');
|
||||||
|
|
||||||
|
function computeScore(candidateNote, dates) {
|
||||||
|
let score = 0;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* We want to improve standing of notes which have been created in similar time to each other since
|
||||||
|
* there's a good chance they are related.
|
||||||
|
*
|
||||||
|
* But there's an exception - if they were created really close to each other (withing few seconds) then
|
||||||
|
* they are probably part of the import and not created by hand - these OTOH should not benefit.
|
||||||
|
*/
|
||||||
|
const {utcDateCreated} = candidateNote;
|
||||||
|
|
||||||
|
if (utcDateCreated >= dates.minDate && utcDateCreated <= dates.maxDate
|
||||||
|
&& utcDateCreated < dates.minExcludedDate && utcDateCreated > dates.maxExcludedDate) {
|
||||||
|
|
||||||
|
score += 0.3;
|
||||||
|
}
|
||||||
|
|
||||||
|
return score;
|
||||||
|
}
|
||||||
|
|
||||||
|
function evaluateSimilarity(sourceNote, candidateNote, rewardMap, dates, results) {
|
||||||
|
let score = computeScore(candidateNote, rewardMap, dates);
|
||||||
|
|
||||||
|
if (score > 0.5) {
|
||||||
|
const notePath = noteCacheService.getSomePath(candidateNote);
|
||||||
|
|
||||||
|
// this takes care of note hoisting
|
||||||
|
if (!notePath) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (noteCacheService.isNotePathArchived(notePath)) {
|
||||||
|
score -= 0.2; // archived penalization
|
||||||
|
}
|
||||||
|
|
||||||
|
results.push({score, notePath, noteId: candidateNote.noteId});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Point of this is to break up long running sync process to avoid blocking
|
||||||
|
* see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/
|
||||||
|
*/
|
||||||
|
function setImmediatePromise() {
|
||||||
|
return new Promise((resolve) => {
|
||||||
|
setTimeout(() => resolve(), 0);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const IGNORED_ATTR_NAMES = [
|
||||||
|
"includenotelink",
|
||||||
|
"internallink",
|
||||||
|
"imagelink",
|
||||||
|
"relationmaplink"
|
||||||
|
];
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @param {Note} note
|
||||||
|
*/
|
||||||
|
function buildRewardMap(note) {
|
||||||
|
const map = {};
|
||||||
|
|
||||||
|
for (const ancestorNote of note.ancestors) {
|
||||||
|
updateMap(map, ancestorNote.title, 0.4);
|
||||||
|
|
||||||
|
for (const branch of ancestorNote.parentBranches) {
|
||||||
|
updateMap(map, branch.prefix, 0.4);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
updateMap(map, note.type, 0.2);
|
||||||
|
updateMap(map, processMime(note.mime), 0.3);
|
||||||
|
|
||||||
|
updateMap(map, note.title, 1);
|
||||||
|
|
||||||
|
for (const branch of note.parentBranches) {
|
||||||
|
updateMap(map, branch.prefix, 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const attr of note.attributes) {
|
||||||
|
const reward = note.noteId === attr.noteId ? 0.8 : 0.5;
|
||||||
|
|
||||||
|
if (!IGNORED_ATTR_NAMES.includes(attr.name)) {
|
||||||
|
updateMap(map, attr.name, reward);
|
||||||
|
}
|
||||||
|
|
||||||
|
updateMap(map, attr.value, reward);
|
||||||
|
}
|
||||||
|
|
||||||
|
return map;
|
||||||
|
}
|
||||||
|
|
||||||
|
function processMime(mime) {
|
||||||
|
if (!mime) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const chunks = mime.split('/');
|
||||||
|
|
||||||
|
if (chunks.length < 2) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// we're not interested in 'text/' or 'application/' prefix
|
||||||
|
let str = chunks[1];
|
||||||
|
|
||||||
|
if (str.startsWith('-x')) {
|
||||||
|
str = str.substr(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateMap(map, text, baseReward) {
|
||||||
|
if (!text) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const word of text.split(/\W+/)) {
|
||||||
|
map[word] = map[word] || 0;
|
||||||
|
|
||||||
|
// reward grows with the length of matched string
|
||||||
|
map[word] += baseReward * Math.sqrt(word.length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function tokenize(str) {
|
||||||
|
return ;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function findSimilarNotes(noteId) {
|
||||||
|
const results = [];
|
||||||
|
let i = 0;
|
||||||
|
|
||||||
|
const baseNote = noteCache.notes[noteId];
|
||||||
|
|
||||||
|
if (!baseNote) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const dateCreatedTs = dateUtils.parseDateTime(baseNote.utcDateCreated);
|
||||||
|
|
||||||
|
const dates = {
|
||||||
|
minDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 1800)),
|
||||||
|
minExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs - 5)),
|
||||||
|
maxExcludedDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 5)),
|
||||||
|
maxDate: dateUtils.utcDateStr(new Date(dateCreatedTs + 1800)),
|
||||||
|
};
|
||||||
|
|
||||||
|
const rewardMap = buildRewardMap(baseNote);
|
||||||
|
|
||||||
|
for (const candidateNote of Object.values(noteCache.notes)) {
|
||||||
|
if (candidateNote.noteId === baseNote.noteId) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
evaluateSimilarity(baseNote, candidateNote, rewardMap, dates, results);
|
||||||
|
|
||||||
|
i++;
|
||||||
|
|
||||||
|
if (i % 200 === 0) {
|
||||||
|
await setImmediatePromise();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
results.sort((a, b) => a.score > b.score ? -1 : 1);
|
||||||
|
|
||||||
|
return results.length > 50 ? results.slice(0, 200) : results;
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
findSimilarNotes
|
||||||
|
};
|
Loading…
x
Reference in New Issue
Block a user