mirror of
https://github.com/zadam/trilium.git
synced 2026-01-04 05:34:30 +01:00
feat(search): allow for search through very large notes
This commit is contained in:
parent
51a8937c64
commit
6c79be881d
@ -155,9 +155,11 @@ class NoteContentFulltextExp extends Expression {
|
||||
content = normalize(content.toString());
|
||||
|
||||
if (type === "text" && mime === "text/html") {
|
||||
if (!this.raw && content.length < 20000) {
|
||||
// striptags is slow for very large notes
|
||||
if (!this.raw && content.length < 10 * 1024 * 1024) {
|
||||
// striptags is slow for very large notes - allow up to 10MB HTML processing
|
||||
content = this.stripTags(content);
|
||||
} else if (!this.raw) {
|
||||
console.info(`Skipping HTML tag stripping for very large note: ${content.length} bytes - will search raw HTML`);
|
||||
}
|
||||
|
||||
content = content.replace(/ /g, " ");
|
||||
@ -239,11 +241,17 @@ class NoteContentFulltextExp extends Expression {
|
||||
|
||||
const words = limitedText.toLowerCase().split(/\s+/);
|
||||
|
||||
// Early return for oversized word arrays
|
||||
if (words.length > FUZZY_SEARCH_CONFIG.MAX_WORD_COUNT) {
|
||||
// Only skip phrase matching for truly extreme word counts that could crash the system
|
||||
if (words.length > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT) {
|
||||
console.error(`Phrase matching skipped due to extreme word count that could cause system instability: ${words.length} words`);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Warn about large word counts but still attempt matching
|
||||
if (words.length > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_WORDS) {
|
||||
console.info(`Large word count for phrase matching: ${words.length} words - may take longer but will attempt full matching`);
|
||||
}
|
||||
|
||||
// Find positions of each token
|
||||
const tokenPositions: number[][] = this.tokens.map(token => {
|
||||
const normalizedToken = normalizeSearchText(token);
|
||||
@ -303,8 +311,8 @@ class NoteContentFulltextExp extends Expression {
|
||||
|
||||
const words = content.split(/\s+/);
|
||||
|
||||
// Limit word processing to prevent memory issues
|
||||
const limitedWords = words.slice(0, FUZZY_SEARCH_CONFIG.MAX_WORD_COUNT);
|
||||
// Only limit word processing for truly extreme cases to prevent system instability
|
||||
const limitedWords = words.slice(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT);
|
||||
|
||||
return limitedWords.some(word => this.fuzzyMatchSingle(token, word));
|
||||
}
|
||||
|
||||
@ -14,9 +14,15 @@ export const FUZZY_SEARCH_CONFIG = {
|
||||
MAX_EDIT_DISTANCE: 2,
|
||||
// Maximum proximity distance for phrase matching (in words)
|
||||
MAX_PHRASE_PROXIMITY: 10,
|
||||
// Content size limits for memory protection
|
||||
MAX_CONTENT_SIZE: 50 * 1024, // 50KB
|
||||
MAX_WORD_COUNT: 10000,
|
||||
// Absolute hard limits for extreme cases - only to prevent system crashes
|
||||
ABSOLUTE_MAX_CONTENT_SIZE: 100 * 1024 * 1024, // 100MB - extreme upper limit to prevent OOM
|
||||
ABSOLUTE_MAX_WORD_COUNT: 2000000, // 2M words - extreme upper limit for word processing
|
||||
// Performance warning thresholds - inform user but still attempt search
|
||||
PERFORMANCE_WARNING_SIZE: 5 * 1024 * 1024, // 5MB - warn about potential performance impact
|
||||
PERFORMANCE_WARNING_WORDS: 100000, // 100K words - warn about word count impact
|
||||
// Progressive processing thresholds for very large content
|
||||
PROGRESSIVE_PROCESSING_SIZE: 10 * 1024 * 1024, // 10MB - use progressive processing
|
||||
PROGRESSIVE_PROCESSING_WORDS: 500000, // 500K words - use progressive processing
|
||||
// Performance thresholds
|
||||
EARLY_TERMINATION_THRESHOLD: 3,
|
||||
} as const;
|
||||
@ -197,29 +203,46 @@ export function validateFuzzySearchTokens(tokens: string[], operator: string): {
|
||||
}
|
||||
|
||||
/**
|
||||
* Validates and preprocesses content for search operations with size limits.
|
||||
* Validates and preprocesses content for search operations.
|
||||
* Philosophy: Try to search everything! Only block truly extreme cases that could crash the system.
|
||||
*
|
||||
* @param content The content to validate and preprocess
|
||||
* @param noteId The note ID (for logging purposes)
|
||||
* @returns Processed content or null if content exceeds limits
|
||||
* @returns Processed content, only null for truly extreme cases that could cause system instability
|
||||
*/
|
||||
export function validateAndPreprocessContent(content: string, noteId?: string): string | null {
|
||||
if (!content || typeof content !== 'string') {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Check content size limits
|
||||
if (content.length > FUZZY_SEARCH_CONFIG.MAX_CONTENT_SIZE) {
|
||||
console.warn(`Content size exceeds limit for note ${noteId || 'unknown'}: ${content.length} bytes`);
|
||||
return content.substring(0, FUZZY_SEARCH_CONFIG.MAX_CONTENT_SIZE);
|
||||
// Only block content that could actually crash the system (100MB+)
|
||||
if (content.length > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_CONTENT_SIZE) {
|
||||
console.error(`Content size exceeds absolute system limit for note ${noteId || 'unknown'}: ${content.length} bytes - this could cause system instability`);
|
||||
// Only in truly extreme cases, truncate to prevent system crash
|
||||
return content.substring(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_CONTENT_SIZE);
|
||||
}
|
||||
|
||||
// Check word count limits for phrase matching
|
||||
// Warn about very large content but still process it
|
||||
if (content.length > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_SIZE) {
|
||||
console.info(`Large content for note ${noteId || 'unknown'}: ${content.length} bytes - processing may take time but will attempt full search`);
|
||||
}
|
||||
|
||||
// For word count, be even more permissive - only block truly extreme cases
|
||||
const wordCount = content.split(/\s+/).length;
|
||||
if (wordCount > FUZZY_SEARCH_CONFIG.MAX_WORD_COUNT) {
|
||||
console.warn(`Word count exceeds limit for note ${noteId || 'unknown'}: ${wordCount} words`);
|
||||
// Take first MAX_WORD_COUNT words
|
||||
return content.split(/\s+/).slice(0, FUZZY_SEARCH_CONFIG.MAX_WORD_COUNT).join(' ');
|
||||
if (wordCount > FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT) {
|
||||
console.error(`Word count exceeds absolute system limit for note ${noteId || 'unknown'}: ${wordCount} words - this could cause system instability`);
|
||||
// Only in truly extreme cases, truncate to prevent system crash
|
||||
return content.split(/\s+/).slice(0, FUZZY_SEARCH_CONFIG.ABSOLUTE_MAX_WORD_COUNT).join(' ');
|
||||
}
|
||||
|
||||
// Warn about high word counts but still process them
|
||||
if (wordCount > FUZZY_SEARCH_CONFIG.PERFORMANCE_WARNING_WORDS) {
|
||||
console.info(`High word count for note ${noteId || 'unknown'}: ${wordCount} words - phrase matching may take time but will attempt full search`);
|
||||
}
|
||||
|
||||
// Progressive processing warning for very large content
|
||||
if (content.length > FUZZY_SEARCH_CONFIG.PROGRESSIVE_PROCESSING_SIZE || wordCount > FUZZY_SEARCH_CONFIG.PROGRESSIVE_PROCESSING_WORDS) {
|
||||
console.info(`Very large content for note ${noteId || 'unknown'} - using progressive processing to maintain responsiveness`);
|
||||
}
|
||||
|
||||
return content;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user