similarity tweaks

This commit is contained in:
zadam 2020-09-19 21:59:01 +02:00
parent ec7021b436
commit 07bd5129ca
2 changed files with 39 additions and 9 deletions

View File

@ -2,7 +2,7 @@ const noteCache = require('./note_cache');
const noteCacheService = require('./note_cache_service.js'); const noteCacheService = require('./note_cache_service.js');
const dateUtils = require('../date_utils'); const dateUtils = require('../date_utils');
const DEBUG = false; const DEBUG = true;
const IGNORED_ATTRS = [ const IGNORED_ATTRS = [
"datenote", "datenote",
@ -31,7 +31,7 @@ const IGNORED_ATTR_NAMES = [
"pageurl", "pageurl",
]; ];
function filterLabelValue(value) { function filterUrlValue(value) {
return value return value
.replace(/https?:\/\//ig, "") .replace(/https?:\/\//ig, "")
.replace(/www\./ig, "") .replace(/www\./ig, "")
@ -94,7 +94,7 @@ function buildRewardMap(note) {
} }
// inherited notes get small penalization // inherited notes get small penalization
const reward = note.noteId === attr.noteId ? 0.8 : 0.5; let reward = note.noteId === attr.noteId ? 0.8 : 0.5;
if (IGNORED_ATTRS.includes(attr.name)) { if (IGNORED_ATTRS.includes(attr.name)) {
continue; continue;
@ -104,7 +104,16 @@ function buildRewardMap(note) {
addToRewardMap(attr.name, reward); addToRewardMap(attr.name, reward);
} }
addToRewardMap(filterLabelValue(attr.value), reward); let value = attr.value;
if (value.startsWith('http')) {
value = filterUrlValue(value);
// words in URLs are not that valuable
reward = reward / 2;
}
addToRewardMap(value, reward);
} }
return map; return map;
@ -180,6 +189,16 @@ function splitToWords(text) {
return words; return words;
} }
/**
* includeNoteLink and imageLink relation mean that notes are clearly related, but so clearly
* that it doesn't actually need to be shown to the user.
*/
function hasConnectingRelation(sourceNote, targetNote) {
return sourceNote.attributes.find(attr => attr.type === 'relation'
&& ['includenotelink', 'imagelink'].includes(attr.name)
&& attr.value === targetNote.noteId);
}
async function findSimilarNotes(noteId) { async function findSimilarNotes(noteId) {
const results = []; const results = [];
let i = 0; let i = 0;
@ -270,7 +289,17 @@ async function findSimilarNotes(noteId) {
score += gatherRewards(attr.name); score += gatherRewards(attr.name);
} }
score += gatherRewards(attr.value); let value = attr.value;
let factor = 1;
if (value.startsWith('http')) {
value = filterUrlValue(value);
// words in URLs are not that valuable
factor = 0.5;
}
score += gatherRewards(attr.value, factor);
} }
if (candidateNote.type === baseNote.type) { if (candidateNote.type === baseNote.type) {
@ -300,13 +329,15 @@ async function findSimilarNotes(noteId) {
} }
for (const candidateNote of Object.values(noteCache.notes)) { for (const candidateNote of Object.values(noteCache.notes)) {
if (candidateNote.noteId === baseNote.noteId) { if (candidateNote.noteId === baseNote.noteId
|| hasConnectingRelation(candidateNote, baseNote)
|| hasConnectingRelation(baseNote, candidateNote)) {
continue; continue;
} }
let score = computeScore(candidateNote); let score = computeScore(candidateNote);
if (score >= 1.5) { if (score >= 2) {
const notePath = noteCacheService.getSomePath(candidateNote); const notePath = noteCacheService.getSomePath(candidateNote);
// this takes care of note hoisting // this takes care of note hoisting

View File

@ -14,7 +14,6 @@ process.on('SIGINT', function() {
}); });
const { app, sessionParser } = require('./app'); const { app, sessionParser } = require('./app');
const debug = require('debug')('node:server');
const fs = require('fs'); const fs = require('fs');
const http = require('http'); const http = require('http');
const https = require('https'); const https = require('https');
@ -100,7 +99,7 @@ async function startTrilium() {
} }
) )
httpServer.on('listening', () => debug('Listening on port' + httpServer.address().port)); httpServer.on('listening', () => log.info('Listening on port' + httpServer.address().port));
ws.init(httpServer, sessionParser); ws.init(httpServer, sessionParser);