server-ts: Port similarity

This commit is contained in:
Elian Doran 2024-02-17 20:55:36 +02:00
parent d35613f510
commit 748a551def
No known key found for this signature in database
6 changed files with 113 additions and 30 deletions

76
package-lock.json generated
View File

@ -93,6 +93,7 @@
"@types/escape-html": "^1.0.4",
"@types/express": "^4.17.21",
"@types/ini": "^4.1.0",
"@types/jsdom": "^21.1.6",
"@types/mime-types": "^2.1.4",
"@types/node": "^20.11.19",
"@types/ws": "^8.5.10",
@ -1498,6 +1499,41 @@
"integrity": "sha512-mTehMtc+xtnWBBvqizcqYCktKDBH2WChvx1GU3Sfe4PysFDXiNe+1YwtpVX1MDtCa4NQrSPw2+3HmvXHY3gt1w==",
"dev": true
},
"node_modules/@types/jsdom": {
"version": "21.1.6",
"resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.6.tgz",
"integrity": "sha512-/7kkMsC+/kMs7gAYmmBR9P0vGTnOoLhQhyhQJSlXGI5bzTHp6xdo0TtKWQAsz6pmSAeVqKSbqeyP6hytqr9FDw==",
"dev": true,
"dependencies": {
"@types/node": "*",
"@types/tough-cookie": "*",
"parse5": "^7.0.0"
}
},
"node_modules/@types/jsdom/node_modules/entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"dev": true,
"engines": {
"node": ">=0.12"
},
"funding": {
"url": "https://github.com/fb55/entities?sponsor=1"
}
},
"node_modules/@types/jsdom/node_modules/parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dev": true,
"dependencies": {
"entities": "^4.4.0"
},
"funding": {
"url": "https://github.com/inikulin/parse5?sponsor=1"
}
},
"node_modules/@types/json-schema": {
"version": "7.0.9",
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.9.tgz",
@ -1631,6 +1667,12 @@
"@types/node": "*"
}
},
"node_modules/@types/tough-cookie": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
"integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==",
"dev": true
},
"node_modules/@types/unist": {
"version": "2.0.10",
"resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz",
@ -16113,6 +16155,34 @@
"integrity": "sha512-mTehMtc+xtnWBBvqizcqYCktKDBH2WChvx1GU3Sfe4PysFDXiNe+1YwtpVX1MDtCa4NQrSPw2+3HmvXHY3gt1w==",
"dev": true
},
"@types/jsdom": {
"version": "21.1.6",
"resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.6.tgz",
"integrity": "sha512-/7kkMsC+/kMs7gAYmmBR9P0vGTnOoLhQhyhQJSlXGI5bzTHp6xdo0TtKWQAsz6pmSAeVqKSbqeyP6hytqr9FDw==",
"dev": true,
"requires": {
"@types/node": "*",
"@types/tough-cookie": "*",
"parse5": "^7.0.0"
},
"dependencies": {
"entities": {
"version": "4.5.0",
"resolved": "https://registry.npmjs.org/entities/-/entities-4.5.0.tgz",
"integrity": "sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==",
"dev": true
},
"parse5": {
"version": "7.1.2",
"resolved": "https://registry.npmjs.org/parse5/-/parse5-7.1.2.tgz",
"integrity": "sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==",
"dev": true,
"requires": {
"entities": "^4.4.0"
}
}
}
},
"@types/json-schema": {
"version": "7.0.9",
"resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.9.tgz",
@ -16246,6 +16316,12 @@
"@types/node": "*"
}
},
"@types/tough-cookie": {
"version": "4.0.5",
"resolved": "https://registry.npmjs.org/@types/tough-cookie/-/tough-cookie-4.0.5.tgz",
"integrity": "sha512-/Ad8+nIOV7Rl++6f1BdKxFSMgmoqEoYbHRpPcx3JEfv8VRsQe9Z4mCXeJBzxs7mbHY/XOZZuXlRNfhpVPbs6ZA==",
"dev": true
},
"@types/unist": {
"version": "2.0.10",
"resolved": "https://registry.npmjs.org/@types/unist/-/unist-2.0.10.tgz",

View File

@ -116,6 +116,7 @@
"@types/escape-html": "^1.0.4",
"@types/express": "^4.17.21",
"@types/ini": "^4.1.0",
"@types/jsdom": "^21.1.6",
"@types/mime-types": "^2.1.4",
"@types/node": "^20.11.19",
"@types/ws": "^8.5.10",

View File

@ -29,14 +29,14 @@ interface ConstructorData<T extends AbstractBeccaEntity<T>> {
*/
abstract class AbstractBeccaEntity<T extends AbstractBeccaEntity<T>> {
protected utcDateCreated?: string;
protected utcDateModified?: string;
protected dateCreated?: string;
protected dateModified?: string;
protected isSynced?: boolean;
protected blobId?: string;
utcDateCreated!: string;
isProtected?: boolean;
protected beforeSaving() {

View File

@ -1,8 +1,9 @@
const becca = require('./becca');
const log = require('../services/log');
const beccaService = require('./becca_service');
const dateUtils = require('../services/date_utils');
const {JSDOM} = require("jsdom");
import becca = require('./becca');
import log = require('../services/log');
import beccaService = require('./becca_service');
import dateUtils = require('../services/date_utils');
import { JSDOM } from "jsdom";
import BNote = require('./entities/bnote');
const DEBUG = false;
@ -32,21 +33,25 @@ const IGNORED_ATTR_NAMES = [
"pageurl",
];
function filterUrlValue(value) {
interface DateLimits {
minDate: string;
minExcludedDate: string;
maxExcludedDate: string;
maxDate: string;
}
function filterUrlValue(value: string) {
return value
.replace(/https?:\/\//ig, "")
.replace(/www\./ig, "")
.replace(/(\.net|\.com|\.org|\.info|\.edu)/ig, "");
}
/**
* @param {BNote} note
*/
function buildRewardMap(note) {
function buildRewardMap(note: BNote) {
// Need to use Map instead of object: https://github.com/zadam/trilium/issues/1895
const map = new Map();
function addToRewardMap(text, rewardFactor) {
function addToRewardMap(text: string | undefined | null, rewardFactor: number) {
if (!text) {
return;
}
@ -126,7 +131,7 @@ function buildRewardMap(note) {
const content = note.getContent();
const dom = new JSDOM(content);
function addHeadingsToRewardMap(elName, rewardFactor) {
const addHeadingsToRewardMap = (elName: string, rewardFactor: number) => {
for (const el of dom.window.document.querySelectorAll(elName)) {
addToRewardMap(el.textContent, rewardFactor);
}
@ -146,9 +151,9 @@ function buildRewardMap(note) {
return map;
}
const mimeCache = {};
const mimeCache: Record<string, string> = {};
function trimMime(mime) {
function trimMime(mime?: string) {
if (!mime || mime === 'text/html') {
return;
}
@ -173,7 +178,7 @@ function trimMime(mime) {
return mimeCache[mime];
}
function buildDateLimits(baseNote) {
function buildDateLimits(baseNote: BNote): DateLimits {
const dateCreatedTs = dateUtils.parseDateTime(baseNote.utcDateCreated).getTime();
return {
@ -193,7 +198,7 @@ const WORD_BLACKLIST = [
"than", "then", "and", "either", "or", "neither", "nor", "both", "also"
];
function splitToWords(text) {
function splitToWords(text: string) {
let words = wordCache.get(text);
if (!words) {
@ -221,13 +226,13 @@ function splitToWords(text) {
* includeNoteLink and imageLink relation mean that notes are clearly related, but so clearly
* that it doesn't actually need to be shown to the user.
*/
function hasConnectingRelation(sourceNote, targetNote) {
function hasConnectingRelation(sourceNote: BNote, targetNote: BNote) {
return sourceNote.getAttributes().find(attr => attr.type === 'relation'
&& ['includenotelink', 'imagelink'].includes(attr.name)
&& attr.value === targetNote.noteId);
}
async function findSimilarNotes(noteId) {
async function findSimilarNotes(noteId: string) {
const results = [];
let i = 0;
@ -237,23 +242,23 @@ async function findSimilarNotes(noteId) {
return [];
}
let dateLimits;
let dateLimits: DateLimits;
try {
dateLimits = buildDateLimits(baseNote);
}
catch (e) {
catch (e: any) {
throw new Error(`Date limits failed with ${e.message}, entity: ${JSON.stringify(baseNote.getPojo())}`);
}
const rewardMap = buildRewardMap(baseNote);
let ancestorRewardCache = {};
let ancestorRewardCache: Record<string, number> = {};
const ancestorNoteIds = new Set(baseNote.getAncestors().map(note => note.noteId));
ancestorNoteIds.add(baseNote.noteId);
let displayRewards = false;
function gatherRewards(text, factor = 1) {
function gatherRewards(text?: string | null, factor: number = 1) {
if (!text) {
return 0;
}
@ -279,7 +284,7 @@ async function findSimilarNotes(noteId) {
return counter;
}
function gatherAncestorRewards(note) {
function gatherAncestorRewards(note?: BNote) {
if (!note || ancestorNoteIds.has(note.noteId)) {
return 0;
}
@ -311,7 +316,7 @@ async function findSimilarNotes(noteId) {
return ancestorRewardCache[note.noteId];
}
function computeScore(candidateNote) {
function computeScore(candidateNote: BNote) {
let score = gatherRewards(trimMime(candidateNote.mime))
+ gatherAncestorRewards(candidateNote);
@ -451,11 +456,11 @@ async function findSimilarNotes(noteId) {
* see https://snyk.io/blog/nodejs-how-even-quick-async-functions-can-block-the-event-loop-starve-io/
*/
function setImmediatePromise() {
return new Promise((resolve) => {
return new Promise<void>((resolve) => {
setTimeout(() => resolve(), 0);
});
}
module.exports = {
export = {
findSimilarNotes
};

View File

@ -1,6 +1,6 @@
"use strict";
const similarityService = require('../../becca/similarity.js');
const similarityService = require('../../becca/similarity');
const becca = require('../../becca/becca');
async function getSimilarNotes(req) {

View File

@ -7,7 +7,8 @@
"strict": true,
"noImplicitAny": true,
"resolveJsonModule": true,
"lib": ["ES2021"]
"lib": ["ES2021"],
"downlevelIteration": true
},
"include": [
"./src/**/*.js",