server-ts: Port similarity

This commit is contained in:
Elian Doran 2024-02-17 20:55:36 +02:00
parent d35613f510
commit 748a551def
No known key found for this signature in database
6 changed files with 113 additions and 30 deletions

76
package-lock.json generated
View File

@ -93,6 +93,7 @@
"@types/escape-html": "^1.0.4", "@types/escape-html": "^1.0.4",
"@types/express": "^4.17.21", "@types/express": "^4.17.21",
"@types/ini": "^4.1.0", "@types/ini": "^4.1.0",
"@types/jsdom": "^21.1.6",
"@types/mime-types": "^2.1.4", "@types/mime-types": "^2.1.4",
"@types/node": "^20.11.19", "@types/node": "^20.11.19",
"@types/ws": "^8.5.10", "@types/ws": "^8.5.10",
@ -1498,6 +1499,41 @@
"integrity": "sha512-mTehMtc+xtnWBBvqizcqYCktKDBH2WChvx1GU3Sfe4PysFDXiNe+1YwtpVX1MDtCa4NQrSPw2+3HmvXHY3gt1w==", "integrity": "sha512-mTehMtc+xtnWBBvqizcqYCktKDBH2WChvx1GU3Sfe4PysFDXiNe+1YwtpVX1MDtCa4NQrSPw2+3HmvXHY3gt1w==",
"dev": true "dev": true
}, },
"node_modules/@types/jsdom": {
"version": "21.1.6",
"resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.6.tgz",
"integrity": "sha512-/7kkMsC+/kMs7gAYmmBR9P0vGTnOoLhQhyhQJSlXGI5bzTHp6xdo0TtKWQAsz6pmSAeVqKSbqeyP6hytqr9FDw==",
"dev": true,
"dependencies": {
"@types/node": "*",
"@types/tough-cookie": "*",
"parse5": "^7.0.0"
}
},
"node_modules/@types/jsdom/node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"dev": true,
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/@types/jsdom/node_modules/parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dev": true,
"dependencies": {
"entities": "^4.4.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/@types/json-schema": { "node_modules/@types/json-schema": {
"version": "7.0.9", "version": "7.0.9",
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.9.tgz", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.9.tgz",
@ -1631,6 +1667,12 @@
"@types/node": "*" "@types/node": "*"
} }
}, },
"node_modules/@types/tough-cookie": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
"integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==",
"dev": true
},
"node_modules/@types/unist": { "node_modules/@types/unist": {
"version": "2.0.10", "version": "2.0.10",
"resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz",
@ -16113,6 +16155,34 @@
"integrity": "sha512-mTehMtc+xtnWBBvqizcqYCktKDBH2WChvx1GU3Sfe4PysFDXiNe+1YwtpVX1MDtCa4NQrSPw2+3HmvXHY3gt1w==", "integrity": "sha512-mTehMtc+xtnWBBvqizcqYCktKDBH2WChvx1GU3Sfe4PysFDXiNe+1YwtpVX1MDtCa4NQrSPw2+3HmvXHY3gt1w==",
"dev": true "dev": true
}, },
"@types/jsdom": {
"version": "21.1.6",
"resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.6.tgz",
"integrity": "sha512-/7kkMsC+/kMs7gAYmmBR9P0vGTnOoLhQhyhQJSlXGI5bzTHp6xdo0TtKWQAsz6pmSAeVqKSbqeyP6hytqr9FDw==",
"dev": true,
"requires": {
"@types/node": "*",
"@types/tough-cookie": "*",
"parse5": "^7.0.0"
},
"dependencies": {
"entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"dev": true
},
"parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dev": true,
"requires": {
"entities": "^4.4.0"
}
}
}
},
"@types/json-schema": { "@types/json-schema": {
"version": "7.0.9", "version": "7.0.9",
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.9.tgz", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.9.tgz",
@ -16246,6 +16316,12 @@
"@types/node": "*" "@types/node": "*"
} }
}, },
"@types/tough-cookie": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
"integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==",
"dev": true
},
"@types/unist": { "@types/unist": {
"version": "2.0.10", "version": "2.0.10",
"resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz", "resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz",

View File

@ -116,6 +116,7 @@
"@types/escape-html": "^1.0.4", "@types/escape-html": "^1.0.4",
"@types/express": "^4.17.21", "@types/express": "^4.17.21",
"@types/ini": "^4.1.0", "@types/ini": "^4.1.0",
"@types/jsdom": "^21.1.6",
"@types/mime-types": "^2.1.4", "@types/mime-types": "^2.1.4",
"@types/node": "^20.11.19", "@types/node": "^20.11.19",
"@types/ws": "^8.5.10", "@types/ws": "^8.5.10",

View File

@ -29,14 +29,14 @@ interface ConstructorData<T extends AbstractBeccaEntity<T>> {
*/ */
abstract class AbstractBeccaEntity<T extends AbstractBeccaEntity<T>> { abstract class AbstractBeccaEntity<T extends AbstractBeccaEntity<T>> {
protected utcDateCreated?: string;
protected utcDateModified?: string; protected utcDateModified?: string;
protected dateCreated?: string; protected dateCreated?: string;
protected dateModified?: string; protected dateModified?: string;
protected isSynced?: boolean; protected isSynced?: boolean;
protected blobId?: string; protected blobId?: string;
utcDateCreated!: string;
isProtected?: boolean; isProtected?: boolean;
protected beforeSaving() { protected beforeSaving() {

View File

@ -1,8 +1,9 @@
const becca = require('./becca'); import becca = require('./becca');
const log = require('../services/log'); import log = require('../services/log');
const beccaService = require('./becca_service'); import beccaService = require('./becca_service');
const dateUtils = require('../services/date_utils'); import dateUtils = require('../services/date_utils');
const {JSDOM} = require("jsdom"); import { JSDOM } from "jsdom";
import BNote = require('./entities/bnote');
const DEBUG = false; const DEBUG = false;
@ -32,21 +33,25 @@ const IGNORED_ATTR_NAMES = [
"pageurl", "pageurl",
]; ];
function filterUrlValue(value) { interface DateLimits {
minDate: string;
minExcludedDate: string;
maxExcludedDate: string;
maxDate: string;
}
function filterUrlValue(value: string) {
return value return value
.replace(/https?:\/\//ig, "") .replace(/https?:\/\//ig, "")
.replace(/www\./ig, "") .replace(/www\./ig, "")
.replace(/(\.net|\.com|\.org|\.info|\.edu)/ig, ""); .replace(/(\.net|\.com|\.org|\.info|\.edu)/ig, "");
} }
/** function buildRewardMap(note: BNote) {
* @param {BNote} note
*/
function buildRewardMap(note) {
// Need to use Map instead of object: https://github.com/zadam/trilium/issues/1895 // Need to use Map instead of object: https://github.com/zadam/trilium/issues/1895
const map = new Map(); const map = new Map();
function addToRewardMap(text, rewardFactor) { function addToRewardMap(text: string | undefined | null, rewardFactor: number) {
if (!text) { if (!text) {
return; return;
} }
@ -126,7 +131,7 @@ function buildRewardMap(note) {
const content = note.getContent(); const content = note.getContent();
const dom = new JSDOM(content); const dom = new JSDOM(content);
function addHeadingsToRewardMap(elName, rewardFactor) { const addHeadingsToRewardMap = (elName: string, rewardFactor: number) => {
for (const el of dom.window.document.querySelectorAll(elName)) { for (const el of dom.window.document.querySelectorAll(elName)) {
addToRewardMap(el.textContent, rewardFactor); addToRewardMap(el.textContent, rewardFactor);
} }
@ -146,9 +151,9 @@ function buildRewardMap(note) {
return map; return map;
} }
const mimeCache = {}; const mimeCache: Record<string, string> = {};
function trimMime(mime) { function trimMime(mime?: string) {
if (!mime || mime === 'text/html') { if (!mime || mime === 'text/html') {
return; return;
} }
@ -173,7 +178,7 @@ function trimMime(mime) {
return mimeCache[mime]; return mimeCache[mime];
} }
function buildDateLimits(baseNote) { function buildDateLimits(baseNote: BNote): DateLimits {
const dateCreatedTs = dateUtils.parseDateTime(baseNote.utcDateCreated).getTime(); const dateCreatedTs = dateUtils.parseDateTime(baseNote.utcDateCreated).getTime();
return { return {
@ -193,7 +198,7 @@ const WORD_BLACKLIST = [
"than", "then", "and", "either", "or", "neither", "nor", "both", "also" "than", "then", "and", "either", "or", "neither", "nor", "both", "also"
]; ];
function splitToWords(text) { function splitToWords(text: string) {
let words = wordCache.get(text); let words = wordCache.get(text);
if (!words) { if (!words) {
@ -221,13 +226,13 @@ function splitToWords(text) {
* includeNoteLink and imageLink relation mean that notes are clearly related, but so clearly * includeNoteLink and imageLink relation mean that notes are clearly related, but so clearly
* that it doesn't actually need to be shown to the user. * that it doesn't actually need to be shown to the user.
*/ */
function hasConnectingRelation(sourceNote, targetNote) { function hasConnectingRelation(sourceNote: BNote, targetNote: BNote) {
return sourceNote.getAttributes().find(attr => attr.type === 'relation' return sourceNote.getAttributes().find(attr => attr.type === 'relation'
&& ['includenotelink', 'imagelink'].includes(attr.name) && ['includenotelink', 'imagelink'].includes(attr.name)
&& attr.value === targetNote.noteId); && attr.value === targetNote.noteId);
} }
async function findSimilarNotes(noteId) { async function findSimilarNotes(noteId: string) {
const results = []; const results = [];
let i = 0; let i = 0;
@ -237,23 +242,23 @@ async function findSimilarNotes(noteId) {
return []; return [];
} }
let dateLimits; let dateLimits: DateLimits;
try { try {
dateLimits = buildDateLimits(baseNote); dateLimits = buildDateLimits(baseNote);
} }
catch (e) { catch (e: any) {
throw new Error(`Date limits failed with ${e.message}, entity: ${JSON.stringify(baseNote.getPojo())}`); throw new Error(`Date limits failed with ${e.message}, entity: ${JSON.stringify(baseNote.getPojo())}`);
} }
const rewardMap = buildRewardMap(baseNote); const rewardMap = buildRewardMap(baseNote);
let ancestorRewardCache = {}; let ancestorRewardCache: Record<string, number> = {};
const ancestorNoteIds = new Set(baseNote.getAncestors().map(note => note.noteId)); const ancestorNoteIds = new Set(baseNote.getAncestors().map(note => note.noteId));
ancestorNoteIds.add(baseNote.noteId); ancestorNoteIds.add(baseNote.noteId);
let displayRewards = false; let displayRewards = false;
function gatherRewards(text, factor = 1) { function gatherRewards(text?: string | null, factor: number = 1) {
if (!text) { if (!text) {
return 0; return 0;
} }
@ -279,7 +284,7 @@ async function findSimilarNotes(noteId) {
return counter; return counter;
} }
function gatherAncestorRewards(note) { function gatherAncestorRewards(note?: BNote) {
if (!note || ancestorNoteIds.has(note.noteId)) { if (!note || ancestorNoteIds.has(note.noteId)) {
return 0; return 0;
} }
@ -311,7 +316,7 @@ async function findSimilarNotes(noteId) {
return ancestorRewardCache[note.noteId]; return ancestorRewardCache[note.noteId];
} }
function computeScore(candidateNote) { function computeScore(candidateNote: BNote) {
let score = gatherRewards(trimMime(candidateNote.mime)) let score = gatherRewards(trimMime(candidateNote.mime))
+ gatherAncestorRewards(candidateNote); + gatherAncestorRewards(candidateNote);
@ -451,11 +456,11 @@ async function findSimilarNotes(noteId) {
* see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/ * see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/
*/ */
function setImmediatePromise() { function setImmediatePromise() {
return new Promise((resolve) => { return new Promise<void>((resolve) => {
setTimeout(() => resolve(), 0); setTimeout(() => resolve(), 0);
}); });
} }
module.exports = { export = {
findSimilarNotes findSimilarNotes
}; };

View File

@ -1,6 +1,6 @@
"use strict"; "use strict";
const similarityService = require('../../becca/similarity.js'); const similarityService = require('../../becca/similarity');
const becca = require('../../becca/becca'); const becca = require('../../becca/becca');
async function getSimilarNotes(req) { async function getSimilarNotes(req) {

View File

@ -7,7 +7,8 @@
"strict": true, "strict": true,
"noImplicitAny": true, "noImplicitAny": true,
"resolveJsonModule": true, "resolveJsonModule": true,
"lib": ["ES2021"] "lib": ["ES2021"],
"downlevelIteration": true
}, },
"include": [ "include": [
"./src/**/*.js", "./src/**/*.js",