diff --git a/package-lock.json b/package-lock.json index 9a7f96b4c..02fcc815e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8158,11 +8158,6 @@ "resolved": "https://registry.npmjs.org/streamsearch/-/streamsearch-0.1.2.tgz", "integrity": "sha1-gIudDlb8Jz2Am6VzOOkpkZoanxo=" }, - "string-similarity": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/string-similarity/-/string-similarity-4.0.2.tgz", - "integrity": "sha512-eCsPPyoQBgY4TMpVD6DVfO7pLrimUONriaO4Xjp3WPUW0YnNLqdHgRj23xotLlqrL90eJhBeq3zdAJf2mQgfBQ==" - }, "string-width": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-1.0.2.tgz", diff --git a/src/public/app/widgets/note_paths.js b/src/public/app/widgets/note_paths.js index f74679007..7c4ceb149 100644 --- a/src/public/app/widgets/note_paths.js +++ b/src/public/app/widgets/note_paths.js @@ -36,6 +36,11 @@ const TPL = ` overflow: hidden; text-overflow: ellipsis; } + + .note-path-list { + max-height: 600px; + overflow-y: auto; + }
diff --git a/src/public/app/widgets/similar_notes.js b/src/public/app/widgets/similar_notes.js index 1cb426efb..1637f403a 100644 --- a/src/public/app/widgets/similar_notes.js +++ b/src/public/app/widgets/similar_notes.js @@ -141,7 +141,7 @@ export default class SimilarNotesWidget extends TabAwareWidget { } const $item = (await linkService.createNoteLink(similarNote.notePath.join("/"))) - .css("font-size", 24 * similarNote.score); + .css("font-size", 24 * (1 - 1 / (similarNote.score - 1))); $list.append($item); } diff --git a/src/public/stylesheets/style.css b/src/public/stylesheets/style.css index 82cd6541e..739e297ab 100644 --- a/src/public/stylesheets/style.css +++ b/src/public/stylesheets/style.css @@ -649,7 +649,7 @@ a.external:not(.no-arrow):after, a[href^="http://"]:not(.no-arrow):after, a[href } .component { - contain: layout size; + contain: size; } .toast { diff --git a/src/routes/api/similar_notes.js b/src/routes/api/similar_notes.js index dbdfa944f..aa76a7f10 100644 --- a/src/routes/api/similar_notes.js +++ b/src/routes/api/similar_notes.js @@ -1,6 +1,6 @@ "use strict"; -const noteCacheService = require('../../services/note_cache/note_cache_service'); +const similarityService = require('../../services/note_cache/similarity.js'); const repository = require('../../services/repository'); async function getSimilarNotes(req) { @@ -12,10 +12,7 @@ async function getSimilarNotes(req) { return [404, `Note ${noteId} not found.`]; } - const results = await noteCacheService.findSimilarNotes(noteId); - - return results - .filter(note => note.noteId !== noteId); + return await similarityService.findSimilarNotes(noteId); } module.exports = { diff --git a/src/services/note_cache/similarity.js b/src/services/note_cache/similarity.js index 9d34d81b8..863d40cbe 100644 --- a/src/services/note_cache/similarity.js +++ b/src/services/note_cache/similarity.js @@ -2,10 +2,47 @@ const noteCache = require('./note_cache'); const noteCacheService = require('./note_cache_service.js'); const dateUtils = require('../date_utils'); -function computeScore(candidateNote, dates) { - let score = 0; +function gatherRewards(rewardMap, text) { + if (!text) { + return 0; + } + let counter = 0; + for (const word of text.toLowerCase().split(/\W+/)) { + counter += rewardMap[word] || 0; + } + + return counter; +} + +function computeScore(candidateNote, ancestorNoteIds, rewardMap, dates) { + let score = + gatherRewards(rewardMap, candidateNote.title) + + gatherRewards(rewardMap, candidateNote.type); + + gatherRewards(rewardMap, trimMime(candidateNote.mime)); + + for (const ancestorNote of candidateNote.ancestors) { + if (!ancestorNoteIds.includes(ancestorNote.noteId)) { + score += gatherRewards(rewardMap, ancestorNote.title); + + for (const branch of ancestorNote.parentBranches) { + score += gatherRewards(rewardMap, branch.prefix); + } + } + } + + for (const branch of candidateNote.parentBranches) { + score += gatherRewards(rewardMap, branch.prefix); + } + + for (const attr of candidateNote.attributes) { + if (!IGNORED_ATTR_NAMES.includes(attr.name)) { + score += gatherRewards(rewardMap, attr.name); + } + + score += gatherRewards(rewardMap, attr.value); + } /** * We want to improve standing of notes which have been created in similar time to each other since @@ -19,16 +56,16 @@ function computeScore(candidateNote, dates) { if (utcDateCreated >= dates.minDate && utcDateCreated <= dates.maxDate && utcDateCreated < dates.minExcludedDate && utcDateCreated > dates.maxExcludedDate) { - score += 0.3; + score += 3; } return score; } -function evaluateSimilarity(sourceNote, candidateNote, rewardMap, dates, results) { - let score = computeScore(candidateNote, rewardMap, dates); +function evaluateSimilarity(sourceNote, candidateNote, ancestorNoteIds, rewardMap, dates, results) { + let score = computeScore(candidateNote, ancestorNoteIds, rewardMap, dates); - if (score > 0.5) { + if (score >= 4) { const notePath = noteCacheService.getSomePath(candidateNote); // this takes care of note hoisting @@ -37,7 +74,7 @@ function evaluateSimilarity(sourceNote, candidateNote, rewardMap, dates, results } if (noteCacheService.isNotePathArchived(notePath)) { - score -= 0.2; // archived penalization + score -= 1; // archived penalization } results.push({score, notePath, noteId: candidateNote.noteId}); @@ -68,36 +105,36 @@ function buildRewardMap(note) { const map = {}; for (const ancestorNote of note.ancestors) { - updateMap(map, ancestorNote.title, 0.4); + addToRewardMap(map, ancestorNote.title, 0.4); for (const branch of ancestorNote.parentBranches) { - updateMap(map, branch.prefix, 0.4); + addToRewardMap(map, branch.prefix, 0.4); } } - updateMap(map, note.type, 0.2); - updateMap(map, processMime(note.mime), 0.3); + addToRewardMap(map, note.type, 0.2); + addToRewardMap(map, trimMime(note.mime), 0.3); - updateMap(map, note.title, 1); + addToRewardMap(map, note.title, 1); for (const branch of note.parentBranches) { - updateMap(map, branch.prefix, 1); + addToRewardMap(map, branch.prefix, 1); } for (const attr of note.attributes) { const reward = note.noteId === attr.noteId ? 0.8 : 0.5; if (!IGNORED_ATTR_NAMES.includes(attr.name)) { - updateMap(map, attr.name, reward); + addToRewardMap(map, attr.name, reward); } - updateMap(map, attr.value, reward); + addToRewardMap(map, attr.value, reward); } return map; } -function processMime(mime) { +function trimMime(mime) { if (!mime) { return; } @@ -118,23 +155,21 @@ function processMime(mime) { return str; } -function updateMap(map, text, baseReward) { +function addToRewardMap(map, text, baseReward) { if (!text) { return; } - for (const word of text.split(/\W+/)) { - map[word] = map[word] || 0; + for (const word of text.toLowerCase().split(/\W+/)) { + if (word) { + map[word] = map[word] || 0; - // reward grows with the length of matched string - map[word] += baseReward * Math.sqrt(word.length); + // reward grows with the length of matched string + map[word] += baseReward * Math.sqrt(word.length); + } } } -function tokenize(str) { - return ; -} - async function findSimilarNotes(noteId) { const results = []; let i = 0; @@ -155,26 +190,27 @@ async function findSimilarNotes(noteId) { }; const rewardMap = buildRewardMap(baseNote); + const ancestorNoteIds = baseNote.ancestors.map(note => note.noteId); for (const candidateNote of Object.values(noteCache.notes)) { if (candidateNote.noteId === baseNote.noteId) { continue; } - evaluateSimilarity(baseNote, candidateNote, rewardMap, dates, results); + evaluateSimilarity(baseNote, candidateNote, ancestorNoteIds, rewardMap, dates, results); i++; - if (i % 200 === 0) { + if (i % 1000 === 0) { await setImmediatePromise(); } } results.sort((a, b) => a.score > b.score ? -1 : 1); - return results.length > 50 ? results.slice(0, 200) : results; + return results.length > 200 ? results.slice(0, 200) : results; } module.exports = { findSimilarNotes -}; \ No newline at end of file +};