mirror of
				https://github.com/zadam/trilium.git
				synced 2025-11-04 05:28:59 +01:00 
			
		
		
		
	Create better relationships between notes, sanitize ridiculous spacing to save tokens
This commit is contained in:
		
							parent
							
								
									19bf741cd9
								
							
						
					
					
						commit
						7e232d17e1
					
				@ -18,45 +18,89 @@ export abstract class BaseEmbeddingProvider implements EmbeddingProvider {
 | 
				
			|||||||
    abstract generateEmbeddings(text: string): Promise<Float32Array>;
 | 
					    abstract generateEmbeddings(text: string): Promise<Float32Array>;
 | 
				
			||||||
    abstract generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]>;
 | 
					    abstract generateBatchEmbeddings(texts: string[]): Promise<Float32Array[]>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /**
 | 
				
			||||||
 | 
					     * Cleans and normalizes text for embeddings by removing excessive whitespace
 | 
				
			||||||
 | 
					     */
 | 
				
			||||||
 | 
					    private cleanText(text: string): string {
 | 
				
			||||||
 | 
					        return text.replace(/\s+/g, ' ').trim();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
     * Generates a rich text representation of a note's context for embedding
 | 
					     * Generates a rich text representation of a note's context for embedding
 | 
				
			||||||
     */
 | 
					     */
 | 
				
			||||||
    protected generateNoteContextText(context: NoteEmbeddingContext): string {
 | 
					    protected generateNoteContextText(context: NoteEmbeddingContext): string {
 | 
				
			||||||
        const parts = [
 | 
					        // Start with core note information
 | 
				
			||||||
            `Title: ${context.title}`,
 | 
					        let result =
 | 
				
			||||||
            `Type: ${context.type}`,
 | 
					            `Title: ${this.cleanText(context.title)}\n` +
 | 
				
			||||||
            `MIME: ${context.mime}`,
 | 
					            `Type: ${context.type}\n` +
 | 
				
			||||||
            `Created: ${context.dateCreated}`,
 | 
					            `MIME: ${context.mime}\n` +
 | 
				
			||||||
            `Modified: ${context.dateModified}`
 | 
					            `Created: ${context.dateCreated}\n` +
 | 
				
			||||||
        ];
 | 
					            `Modified: ${context.dateModified}\n`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Add attributes in a concise format
 | 
				
			||||||
        if (context.attributes.length > 0) {
 | 
					        if (context.attributes.length > 0) {
 | 
				
			||||||
            parts.push('Attributes:');
 | 
					            result += 'Attributes: ';
 | 
				
			||||||
            for (const attr of context.attributes) {
 | 
					            const attributeTexts = context.attributes.map(attr =>
 | 
				
			||||||
                parts.push(`  ${attr.type} - ${attr.name}: ${attr.value}`);
 | 
					                `${attr.type}:${attr.name}=${this.cleanText(attr.value)}`
 | 
				
			||||||
            }
 | 
					            );
 | 
				
			||||||
 | 
					            result += attributeTexts.join('; ') + '\n';
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Add important label values concisely
 | 
				
			||||||
 | 
					        if (context.labelValues && Object.keys(context.labelValues).length > 0) {
 | 
				
			||||||
 | 
					            result += 'Labels: ';
 | 
				
			||||||
 | 
					            const labelTexts = Object.entries(context.labelValues).map(([name, value]) =>
 | 
				
			||||||
 | 
					                `${name}=${this.cleanText(value)}`
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					            result += labelTexts.join('; ') + '\n';
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Add parents concisely
 | 
				
			||||||
        if (context.parentTitles.length > 0) {
 | 
					        if (context.parentTitles.length > 0) {
 | 
				
			||||||
            parts.push('Parent Notes:');
 | 
					            result += `Parents: ${context.parentTitles.map(t => this.cleanText(t)).join('; ')}\n`;
 | 
				
			||||||
            parts.push(...context.parentTitles.map(t => `  ${t}`));
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Add children concisely
 | 
				
			||||||
        if (context.childTitles.length > 0) {
 | 
					        if (context.childTitles.length > 0) {
 | 
				
			||||||
            parts.push('Child Notes:');
 | 
					            result += `Children: ${context.childTitles.map(t => this.cleanText(t)).join('; ')}\n`;
 | 
				
			||||||
            parts.push(...context.childTitles.map(t => `  ${t}`));
 | 
					 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Add template/inheritance relationships concisely
 | 
				
			||||||
 | 
					        if (context.templateTitles && context.templateTitles.length > 0) {
 | 
				
			||||||
 | 
					            result += `Templates: ${context.templateTitles.map(t => this.cleanText(t)).join('; ')}\n`;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Add related notes concisely
 | 
				
			||||||
 | 
					        if (context.relatedNotes && context.relatedNotes.length > 0) {
 | 
				
			||||||
 | 
					            result += 'Related: ';
 | 
				
			||||||
 | 
					            const relatedTexts = context.relatedNotes.map(rel =>
 | 
				
			||||||
 | 
					                `${rel.relationName}→${this.cleanText(rel.targetTitle)}`
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					            result += relatedTexts.join('; ') + '\n';
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Add backlinks concisely
 | 
				
			||||||
 | 
					        if (context.backlinks && context.backlinks.length > 0) {
 | 
				
			||||||
 | 
					            result += 'Referenced By: ';
 | 
				
			||||||
 | 
					            const backlinkTexts = context.backlinks.map(link =>
 | 
				
			||||||
 | 
					                `${this.cleanText(link.sourceTitle)}→${link.relationName}`
 | 
				
			||||||
 | 
					            );
 | 
				
			||||||
 | 
					            result += backlinkTexts.join('; ') + '\n';
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        // Add attachments concisely
 | 
				
			||||||
        if (context.attachments.length > 0) {
 | 
					        if (context.attachments.length > 0) {
 | 
				
			||||||
            parts.push('Attachments:');
 | 
					            result += 'Attachments: ';
 | 
				
			||||||
            for (const att of context.attachments) {
 | 
					            const attachmentTexts = context.attachments.map(att =>
 | 
				
			||||||
                parts.push(`  ${att.title} (${att.mime})`);
 | 
					                `${this.cleanText(att.title)}(${att.mime})`
 | 
				
			||||||
            }
 | 
					            );
 | 
				
			||||||
 | 
					            result += attachmentTexts.join('; ') + '\n';
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        parts.push('Content:', context.content);
 | 
					        // Add content (already cleaned in getNoteEmbeddingContext)
 | 
				
			||||||
 | 
					        result += `Content: ${context.content}`;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        return parts.join('\n');
 | 
					        return result;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /**
 | 
					    /**
 | 
				
			||||||
 | 
				
			|||||||
@ -22,6 +22,18 @@ export interface NoteEmbeddingContext {
 | 
				
			|||||||
        title: string;
 | 
					        title: string;
 | 
				
			||||||
        mime: string;
 | 
					        mime: string;
 | 
				
			||||||
    }[];
 | 
					    }[];
 | 
				
			||||||
 | 
					    backlinks?: {
 | 
				
			||||||
 | 
					        sourceNoteId: string;
 | 
				
			||||||
 | 
					        sourceTitle: string;
 | 
				
			||||||
 | 
					        relationName: string;
 | 
				
			||||||
 | 
					    }[];
 | 
				
			||||||
 | 
					    relatedNotes?: {
 | 
				
			||||||
 | 
					        targetNoteId: string;
 | 
				
			||||||
 | 
					        targetTitle: string;
 | 
				
			||||||
 | 
					        relationName: string;
 | 
				
			||||||
 | 
					    }[];
 | 
				
			||||||
 | 
					    labelValues?: Record<string, string>;
 | 
				
			||||||
 | 
					    templateTitles?: string[];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 | 
				
			|||||||
@ -7,6 +7,7 @@ import becca from "../../../becca/becca.js";
 | 
				
			|||||||
import type { NoteEmbeddingContext } from "./embeddings_interface.js";
 | 
					import type { NoteEmbeddingContext } from "./embeddings_interface.js";
 | 
				
			||||||
import { getEmbeddingProviders, getEnabledEmbeddingProviders } from "./providers.js";
 | 
					import { getEmbeddingProviders, getEnabledEmbeddingProviders } from "./providers.js";
 | 
				
			||||||
import eventService from "../../events.js";
 | 
					import eventService from "../../events.js";
 | 
				
			||||||
 | 
					import type BNote from "../../../becca/entities/bnote.js";
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// Type definition for embedding result
 | 
					// Type definition for embedding result
 | 
				
			||||||
interface EmbeddingResult {
 | 
					interface EmbeddingResult {
 | 
				
			||||||
@ -178,6 +179,31 @@ export async function findSimilarNotes(
 | 
				
			|||||||
        .slice(0, limit);
 | 
					        .slice(0, limit);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/**
 | 
				
			||||||
 | 
					 * Clean note content by removing HTML tags and normalizing whitespace
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					function cleanNoteContent(content: string, type: string, mime: string): string {
 | 
				
			||||||
 | 
					    // If it's HTML content, remove HTML tags
 | 
				
			||||||
 | 
					    if ((type === 'text' && mime === 'text/html') || content.includes('<div>') || content.includes('<p>')) {
 | 
				
			||||||
 | 
					        // Simple tag removal - for more complex HTML parsing, consider using a proper HTML parser
 | 
				
			||||||
 | 
					        content = content.replace(/<[^>]*>/g, ' '); // Replace tags with a space
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Normalize whitespace (replace multiple spaces/newlines with single space)
 | 
				
			||||||
 | 
					    content = content.replace(/\s+/g, ' ');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Trim the content
 | 
				
			||||||
 | 
					    content = content.trim();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Truncate if extremely long (optional, adjust limit as needed)
 | 
				
			||||||
 | 
					    const MAX_CONTENT_LENGTH = 10000;
 | 
				
			||||||
 | 
					    if (content.length > MAX_CONTENT_LENGTH) {
 | 
				
			||||||
 | 
					        content = content.substring(0, MAX_CONTENT_LENGTH) + ' [content truncated]';
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    return content;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/**
 | 
					/**
 | 
				
			||||||
 * Gets context for a note to be embedded
 | 
					 * Gets context for a note to be embedded
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
@ -196,13 +222,58 @@ export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbed
 | 
				
			|||||||
    const childNotes = note.getChildNotes();
 | 
					    const childNotes = note.getChildNotes();
 | 
				
			||||||
    const childTitles = childNotes.map(note => note.title);
 | 
					    const childTitles = childNotes.map(note => note.title);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Get attributes
 | 
					    // Get all attributes (not just owned ones)
 | 
				
			||||||
    const attributes = note.getOwnedAttributes().map(attr => ({
 | 
					    const attributes = note.getAttributes().map(attr => ({
 | 
				
			||||||
        type: attr.type,
 | 
					        type: attr.type,
 | 
				
			||||||
        name: attr.name,
 | 
					        name: attr.name,
 | 
				
			||||||
        value: attr.value
 | 
					        value: attr.value
 | 
				
			||||||
    }));
 | 
					    }));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Get backlinks (notes that reference this note through relations)
 | 
				
			||||||
 | 
					    const targetRelations = note.getTargetRelations();
 | 
				
			||||||
 | 
					    const backlinks = targetRelations
 | 
				
			||||||
 | 
					        .map(relation => {
 | 
				
			||||||
 | 
					            const sourceNote = relation.getNote();
 | 
				
			||||||
 | 
					            if (sourceNote && sourceNote.type !== 'search') { // Filter out search notes
 | 
				
			||||||
 | 
					                return {
 | 
				
			||||||
 | 
					                    sourceNoteId: sourceNote.noteId,
 | 
				
			||||||
 | 
					                    sourceTitle: sourceNote.title,
 | 
				
			||||||
 | 
					                    relationName: relation.name
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            return null;
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					        .filter((item): item is { sourceNoteId: string; sourceTitle: string; relationName: string } => item !== null);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Get related notes through relations
 | 
				
			||||||
 | 
					    const relations = note.getRelations();
 | 
				
			||||||
 | 
					    const relatedNotes = relations
 | 
				
			||||||
 | 
					        .map(relation => {
 | 
				
			||||||
 | 
					            const targetNote = relation.targetNote;
 | 
				
			||||||
 | 
					            if (targetNote) {
 | 
				
			||||||
 | 
					                return {
 | 
				
			||||||
 | 
					                    targetNoteId: targetNote.noteId,
 | 
				
			||||||
 | 
					                    targetTitle: targetNote.title,
 | 
				
			||||||
 | 
					                    relationName: relation.name
 | 
				
			||||||
 | 
					                };
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            return null;
 | 
				
			||||||
 | 
					        })
 | 
				
			||||||
 | 
					        .filter((item): item is { targetNoteId: string; targetTitle: string; relationName: string } => item !== null);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Extract important labels that might affect semantics
 | 
				
			||||||
 | 
					    const labelValues: Record<string, string> = {};
 | 
				
			||||||
 | 
					    const labels = note.getLabels();
 | 
				
			||||||
 | 
					    for (const label of labels) {
 | 
				
			||||||
 | 
					        // Skip CSS and UI-related labels that don't affect semantics
 | 
				
			||||||
 | 
					        if (!label.name.startsWith('css') &&
 | 
				
			||||||
 | 
					            !label.name.startsWith('workspace') &&
 | 
				
			||||||
 | 
					            !label.name.startsWith('hide') &&
 | 
				
			||||||
 | 
					            !label.name.startsWith('collapsed')) {
 | 
				
			||||||
 | 
					            labelValues[label.name] = label.value;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Get attachments
 | 
					    // Get attachments
 | 
				
			||||||
    const attachments = note.getAttachments().map(att => ({
 | 
					    const attachments = note.getAttachments().map(att => ({
 | 
				
			||||||
        title: att.title,
 | 
					        title: att.title,
 | 
				
			||||||
@ -219,6 +290,17 @@ export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbed
 | 
				
			|||||||
        content = `[${note.type} attachment: ${note.mime}]`;
 | 
					        content = `[${note.type} attachment: ${note.mime}]`;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Clean the content to remove HTML tags and normalize whitespace
 | 
				
			||||||
 | 
					    content = cleanNoteContent(content, note.type, note.mime);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    // Get template/inheritance relationships
 | 
				
			||||||
 | 
					    // This is from FNote.getNotesToInheritAttributesFrom - recreating similar logic for BNote
 | 
				
			||||||
 | 
					    const templateRelations = note.getRelations('template').concat(note.getRelations('inherit'));
 | 
				
			||||||
 | 
					    const templateTitles = templateRelations
 | 
				
			||||||
 | 
					        .map(rel => rel.targetNote)
 | 
				
			||||||
 | 
					        .filter((note): note is BNote => note !== undefined)
 | 
				
			||||||
 | 
					        .map(templateNote => templateNote.title);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    return {
 | 
					    return {
 | 
				
			||||||
        noteId: note.noteId,
 | 
					        noteId: note.noteId,
 | 
				
			||||||
        title: note.title,
 | 
					        title: note.title,
 | 
				
			||||||
@ -230,7 +312,11 @@ export async function getNoteEmbeddingContext(noteId: string): Promise<NoteEmbed
 | 
				
			|||||||
        attributes,
 | 
					        attributes,
 | 
				
			||||||
        parentTitles,
 | 
					        parentTitles,
 | 
				
			||||||
        childTitles,
 | 
					        childTitles,
 | 
				
			||||||
        attachments
 | 
					        attachments,
 | 
				
			||||||
 | 
					        backlinks,
 | 
				
			||||||
 | 
					        relatedNotes,
 | 
				
			||||||
 | 
					        labelValues,
 | 
				
			||||||
 | 
					        templateTitles
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
				
			|||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user