feat: add HTML sanitization module using DOMPurify

This commit is contained in:
Octech2722 2025-10-18 12:25:52 -05:00
parent c707af2663
commit 022c697a2b
2 changed files with 976 additions and 0 deletions

View File

@ -0,0 +1,313 @@
/**
* HTML Sanitization module using DOMPurify
*
* Implements the security recommendations from Mozilla Readability documentation
* to sanitize HTML content and prevent script injection attacks.
*
* This is Phase 3 of the processing pipeline (after Readability and Cheerio).
*
* Note: This module should be used in contexts where the DOM is available (content scripts).
* For background scripts, the sanitization happens in the content script before sending data.
*/
import DOMPurify from 'dompurify';
import type { Config } from 'dompurify';
import { Logger } from './utils';
const logger = Logger.create('HTMLSanitizer', 'content');
export interface SanitizeOptions {
/**
* Allow images in the sanitized HTML
* @default true
*/
allowImages?: boolean;
/**
* Allow external links in the sanitized HTML
* @default true
*/
allowLinks?: boolean;
/**
* Allow data URIs in image sources
* @default true
*/
allowDataUri?: boolean;
/**
* Custom allowed tags (extends defaults)
*/
extraAllowedTags?: string[];
/**
* Custom allowed attributes (extends defaults)
*/
extraAllowedAttrs?: string[];
/**
* Custom configuration for DOMPurify
*/
customConfig?: Config;
}
/**
* Default configuration for DOMPurify
* Designed for Trilium note content (HTML notes and CKEditor compatibility)
*/
const DEFAULT_CONFIG: Config = {
// Allow safe HTML tags commonly used in notes
ALLOWED_TAGS: [
// Text formatting
'p', 'br', 'span', 'div',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
'strong', 'em', 'b', 'i', 'u', 's', 'sub', 'sup',
'mark', 'small', 'del', 'ins',
// Lists
'ul', 'ol', 'li',
// Links and media
'a', 'img', 'figure', 'figcaption',
// Tables
'table', 'thead', 'tbody', 'tfoot', 'tr', 'th', 'td', 'caption', 'col', 'colgroup',
// Code
'code', 'pre', 'kbd', 'samp', 'var',
// Quotes and citations
'blockquote', 'q', 'cite',
// Structural
'article', 'section', 'header', 'footer', 'main', 'aside', 'nav',
'details', 'summary',
// Definitions
'dl', 'dt', 'dd',
// Other
'hr', 'time', 'abbr', 'address'
],
// Allow safe attributes
ALLOWED_ATTR: [
'href', 'src', 'alt', 'title', 'class', 'id',
'width', 'height', 'style',
'target', 'rel',
'colspan', 'rowspan',
'datetime',
'start', 'reversed', 'type',
'data-*' // Allow data attributes for Trilium features
],
// Allow data URIs for images (base64 encoded images)
ALLOW_DATA_ATTR: true,
// Allow safe URI schemes
ALLOWED_URI_REGEXP: /^(?:(?:(?:f|ht)tps?|mailto|tel|callto|cid|xmpp|data):|[^a-z]|[a-z+.\-]+(?:[^a-z+.\-:]|$))/i,
// Keep safe HTML and remove dangerous content
KEEP_CONTENT: true,
// Return a DOM object instead of string (better for processing)
RETURN_DOM: false,
RETURN_DOM_FRAGMENT: false,
// Force body context
FORCE_BODY: false,
// Sanitize in place
IN_PLACE: false,
// Safe for HTML context
SAFE_FOR_TEMPLATES: true,
// Allow style attributes (Trilium uses inline styles)
ALLOW_UNKNOWN_PROTOCOLS: false,
// Whole document mode
WHOLE_DOCUMENT: false
};
/**
* Sanitize HTML content using DOMPurify
* This implements the security layer recommended by Mozilla Readability
*
* @param html - Raw HTML string to sanitize
* @param options - Sanitization options
* @returns Sanitized HTML string safe for insertion into Trilium
*/
export function sanitizeHtml(html: string, options: SanitizeOptions = {}): string {
const {
allowImages = true,
allowLinks = true,
allowDataUri = true,
extraAllowedTags = [],
extraAllowedAttrs = [],
customConfig = {}
} = options;
try {
// Build configuration
const config: Config = {
...DEFAULT_CONFIG,
...customConfig
};
// Adjust allowed tags based on options
if (!allowImages && config.ALLOWED_TAGS) {
config.ALLOWED_TAGS = config.ALLOWED_TAGS.filter((tag: string) =>
tag !== 'img' && tag !== 'figure' && tag !== 'figcaption'
);
}
if (!allowLinks && config.ALLOWED_TAGS) {
config.ALLOWED_TAGS = config.ALLOWED_TAGS.filter((tag: string) => tag !== 'a');
if (config.ALLOWED_ATTR) {
config.ALLOWED_ATTR = config.ALLOWED_ATTR.filter((attr: string) =>
attr !== 'href' && attr !== 'target' && attr !== 'rel'
);
}
}
if (!allowDataUri) {
config.ALLOW_DATA_ATTR = false;
}
// Add extra allowed tags
if (extraAllowedTags.length > 0 && config.ALLOWED_TAGS) {
config.ALLOWED_TAGS = [...config.ALLOWED_TAGS, ...extraAllowedTags];
}
// Add extra allowed attributes
if (extraAllowedAttrs.length > 0 && config.ALLOWED_ATTR) {
config.ALLOWED_ATTR = [...config.ALLOWED_ATTR, ...extraAllowedAttrs];
}
// Track what DOMPurify removes via hooks
const removedElements: Array<{ tag: string; reason?: string }> = [];
const removedAttributes: Array<{ element: string; attr: string }> = [];
// Add hooks to track DOMPurify's actions
DOMPurify.addHook('uponSanitizeElement', (_node, data) => {
if (data.allowedTags && !data.allowedTags[data.tagName]) {
removedElements.push({
tag: data.tagName,
reason: 'not in allowed tags'
});
}
});
DOMPurify.addHook('uponSanitizeAttribute', (node, data) => {
if (data.attrName && data.keepAttr === false) {
removedAttributes.push({
element: node.nodeName.toLowerCase(),
attr: data.attrName
});
}
});
// Sanitize the HTML using isomorphic-dompurify
// Works in both browser and service worker contexts
const cleanHtml = DOMPurify.sanitize(html, config) as string;
// Remove hooks after sanitization
DOMPurify.removeAllHooks();
// Aggregate stats
const tagCounts: Record<string, number> = {};
removedElements.forEach(({ tag }) => {
tagCounts[tag] = (tagCounts[tag] || 0) + 1;
});
const attrCounts: Record<string, number> = {};
removedAttributes.forEach(({ attr }) => {
attrCounts[attr] = (attrCounts[attr] || 0) + 1;
});
logger.debug('DOMPurify sanitization complete', {
originalLength: html.length,
cleanLength: cleanHtml.length,
bytesRemoved: html.length - cleanHtml.length,
reductionPercent: Math.round(((html.length - cleanHtml.length) / html.length) * 100),
elementsRemoved: removedElements.length,
attributesRemoved: removedAttributes.length,
removedTags: Object.keys(tagCounts).length > 0 ? tagCounts : undefined,
removedAttrs: Object.keys(attrCounts).length > 0 ? attrCounts : undefined,
config: {
allowImages,
allowLinks,
allowDataUri,
extraAllowedTags: extraAllowedTags.length > 0 ? extraAllowedTags : undefined
}
});
return cleanHtml;
} catch (error) {
logger.error('Failed to sanitize HTML', error as Error, {
htmlLength: html.length,
options
});
// Return empty string on error (fail safe)
return '';
}
}
/**
* Quick sanitization for simple text content
* Strips all HTML tags except basic formatting
*/
export function sanitizeSimpleText(html: string): string {
return sanitizeHtml(html, {
allowImages: false,
allowLinks: true,
customConfig: {
ALLOWED_TAGS: ['p', 'br', 'strong', 'em', 'b', 'i', 'u', 'a', 'code', 'pre']
}
});
}
/**
* Aggressive sanitization - strips almost everything
* Use for untrusted or potentially dangerous content
*/
export function sanitizeAggressive(html: string): string {
return sanitizeHtml(html, {
allowImages: false,
allowLinks: false,
customConfig: {
ALLOWED_TAGS: ['p', 'br', 'strong', 'em'],
ALLOWED_ATTR: []
}
});
}
/**
* Sanitize URLs to prevent javascript: and data: injection
*/
export function sanitizeUrl(url: string): string {
const cleaned = DOMPurify.sanitize(url, {
ALLOWED_TAGS: [],
ALLOWED_ATTR: []
}) as string;
// Block dangerous protocols
const dangerousProtocols = ['javascript:', 'data:', 'vbscript:', 'file:'];
const lowerUrl = cleaned.toLowerCase().trim();
for (const protocol of dangerousProtocols) {
if (lowerUrl.startsWith(protocol)) {
logger.warn('Blocked dangerous URL protocol', { url, protocol });
return '#';
}
}
return cleaned;
}export const HTMLSanitizer = {
sanitize: sanitizeHtml,
sanitizeSimpleText,
sanitizeAggressive,
sanitizeUrl
};

View File

@ -0,0 +1,663 @@
/**
* Modern Trilium Server Communication Layer for Manifest V3
* Handles connection discovery, authentication, and API communication
* with both desktop client and server instances
*/
import { Logger } from './utils';
import { TriliumResponse, ClipData } from './types';
const logger = Logger.create('TriliumServer', 'background');
// Protocol version for compatibility checking
const PROTOCOL_VERSION_MAJOR = 1;
export type ConnectionStatus =
| 'searching'
| 'found-desktop'
| 'found-server'
| 'not-found'
| 'version-mismatch';
export interface TriliumSearchResult {
status: ConnectionStatus;
url?: string;
port?: number;
token?: string;
extensionMajor?: number;
triliumMajor?: number;
}
export interface TriliumHandshakeResponse {
appName: string;
protocolVersion: string;
appVersion?: string;
clipperProtocolVersion?: string;
}
export interface TriliumConnectionConfig {
serverUrl?: string;
authToken?: string;
desktopPort?: string;
enableServer?: boolean;
enableDesktop?: boolean;
}
/**
* Modern Trilium Server Facade
* Provides unified interface for communicating with Trilium instances
*/
export class TriliumServerFacade {
private triliumSearch: TriliumSearchResult = { status: 'not-found' };
private searchPromise: Promise<void> | null = null;
private listeners: Array<(result: TriliumSearchResult) => void> = [];
constructor() {
this.initialize();
}
private async initialize(): Promise<void> {
logger.info('Initializing Trilium server facade');
// Start initial search
await this.triggerSearchForTrilium();
// Set up periodic connection monitoring
setInterval(() => {
this.triggerSearchForTrilium().catch(error => {
logger.error('Periodic connection check failed', error);
});
}, 60 * 1000); // Check every minute
}
/**
* Get current connection status
*/
public getConnectionStatus(): TriliumSearchResult {
return { ...this.triliumSearch };
}
/**
* Add listener for connection status changes
*/
public addConnectionListener(listener: (result: TriliumSearchResult) => void): () => void {
this.listeners.push(listener);
// Send current status immediately
listener(this.getConnectionStatus());
// Return unsubscribe function
return () => {
const index = this.listeners.indexOf(listener);
if (index > -1) {
this.listeners.splice(index, 1);
}
};
}
/**
* Manually trigger search for Trilium connections
*/
public async triggerSearchForTrilium(): Promise<void> {
// Prevent multiple simultaneous searches
if (this.searchPromise) {
return this.searchPromise;
}
this.searchPromise = this.performTriliumSearch();
try {
await this.searchPromise;
} finally {
this.searchPromise = null;
}
}
private async performTriliumSearch(): Promise<void> {
this.setTriliumSearch({ status: 'searching' });
try {
// Get connection configuration
const config = await this.getConnectionConfig();
// Try desktop client first (if enabled)
if (config.enableDesktop !== false) { // Default to true if not specified
const desktopResult = await this.tryDesktopConnection(config.desktopPort);
if (desktopResult) {
return; // Success, exit early
}
}
// Try server connection (if enabled and configured)
if (config.enableServer && config.serverUrl && config.authToken) {
const serverResult = await this.tryServerConnection(config.serverUrl, config.authToken);
if (serverResult) {
return; // Success, exit early
}
}
// If we reach here, no connections were successful
this.setTriliumSearch({ status: 'not-found' });
} catch (error) {
logger.error('Connection search failed', error as Error);
this.setTriliumSearch({ status: 'not-found' });
}
}
private async tryDesktopConnection(configuredPort?: string): Promise<boolean> {
const port = configuredPort ? parseInt(configuredPort) : this.getDefaultDesktopPort();
try {
logger.debug('Trying desktop connection', { port });
const response = await this.fetchWithTimeout(`http://127.0.0.1:${port}/api/clipper/handshake`, {
method: 'GET',
headers: { 'Accept': 'application/json' }
}, 5000);
if (!response.ok) {
return false;
}
const data: TriliumHandshakeResponse = await response.json();
if (data.appName === 'trilium') {
this.setTriliumSearchWithVersionCheck(data, {
status: 'found-desktop',
port: port,
url: `http://127.0.0.1:${port}`
});
return true;
}
} catch (error) {
logger.debug('Desktop connection failed', error, { port });
}
return false;
}
private async tryServerConnection(serverUrl: string, authToken: string): Promise<boolean> {
try {
logger.debug('Trying server connection', { serverUrl });
const response = await this.fetchWithTimeout(`${serverUrl}/api/clipper/handshake`, {
method: 'GET',
headers: {
'Accept': 'application/json',
'Authorization': authToken
}
}, 10000);
if (!response.ok) {
return false;
}
const data: TriliumHandshakeResponse = await response.json();
if (data.appName === 'trilium') {
this.setTriliumSearchWithVersionCheck(data, {
status: 'found-server',
url: serverUrl,
token: authToken
});
return true;
}
} catch (error) {
logger.debug('Server connection failed', error, { serverUrl });
}
return false;
}
private setTriliumSearch(result: TriliumSearchResult): void {
this.triliumSearch = { ...result };
// Notify all listeners
this.listeners.forEach(listener => {
try {
listener(this.getConnectionStatus());
} catch (error) {
logger.error('Error in connection listener', error as Error);
}
});
logger.debug('Connection status updated', { status: result.status });
}
private setTriliumSearchWithVersionCheck(handshake: TriliumHandshakeResponse, result: TriliumSearchResult): void {
const [major] = handshake.protocolVersion.split('.').map(chunk => parseInt(chunk));
if (major !== PROTOCOL_VERSION_MAJOR) {
this.setTriliumSearch({
status: 'version-mismatch',
extensionMajor: PROTOCOL_VERSION_MAJOR,
triliumMajor: major
});
} else {
this.setTriliumSearch(result);
}
}
private async getConnectionConfig(): Promise<TriliumConnectionConfig> {
try {
const data = await chrome.storage.sync.get([
'triliumServerUrl',
'authToken',
'triliumDesktopPort',
'enableServer',
'enableDesktop'
]);
return {
serverUrl: data.triliumServerUrl,
authToken: data.authToken,
desktopPort: data.triliumDesktopPort,
enableServer: data.enableServer,
enableDesktop: data.enableDesktop
};
} catch (error) {
logger.error('Failed to get connection config', error as Error);
return {};
}
}
private getDefaultDesktopPort(): number {
// Check if this is a development environment
const isDev = chrome.runtime.getManifest().name?.endsWith('(dev)');
return isDev ? 37740 : 37840;
}
/**
* Wait for Trilium connection to be established
*/
public async waitForTriliumConnection(): Promise<void> {
return new Promise((resolve, reject) => {
const checkStatus = () => {
if (this.triliumSearch.status === 'searching') {
setTimeout(checkStatus, 500);
} else if (this.triliumSearch.status === 'not-found' || this.triliumSearch.status === 'version-mismatch') {
reject(new Error(`Trilium connection not available: ${this.triliumSearch.status}`));
} else {
resolve();
}
};
checkStatus();
});
}
/**
* Call Trilium API endpoint
*/
public async callService(method: string, path: string, body?: unknown): Promise<unknown> {
const fetchOptions: RequestInit = {
method: method,
headers: {
'Content-Type': 'application/json',
'Accept': 'application/json'
}
};
if (body) {
fetchOptions.body = typeof body === 'string' ? body : JSON.stringify(body);
}
try {
// Ensure we have a connection
await this.waitForTriliumConnection();
// Add authentication if available
if (this.triliumSearch.token) {
(fetchOptions.headers as Record<string, string>)['Authorization'] = this.triliumSearch.token;
}
// Add trilium-specific headers
(fetchOptions.headers as Record<string, string>)['trilium-local-now-datetime'] = this.getLocalNowDateTime();
const url = `${this.triliumSearch.url}/api/clipper/${path}`;
logger.debug('Making API request', { method, url, path });
const response = await this.fetchWithTimeout(url, fetchOptions, 30000);
if (!response.ok) {
const errorText = await response.text();
throw new Error(`HTTP ${response.status}: ${errorText}`);
}
return await response.json();
} catch (error) {
logger.error('Trilium API call failed', error as Error, { method, path });
throw error;
}
}
/**
* Create a new note in Trilium
*/
public async createNote(
clipData: ClipData,
forceNew = false,
options?: { type?: string; mime?: string }
): Promise<TriliumResponse> {
try {
logger.info('Creating note in Trilium', {
title: clipData.title,
type: clipData.type,
contentLength: clipData.content?.length || 0,
url: clipData.url,
forceNew,
noteType: options?.type,
mime: options?.mime
});
// Server expects pageUrl, clipType, and other fields at top level
const noteData = {
title: clipData.title || 'Untitled Clip',
content: clipData.content || '',
pageUrl: clipData.url || '', // Top-level field - used for duplicate detection
clipType: clipData.type || 'unknown', // Top-level field - used for note categorization
images: clipData.images || [], // Images to process
forceNew, // Pass to server to force new note even if URL exists
type: options?.type, // Optional note type (e.g., 'code' for markdown)
mime: options?.mime, // Optional MIME type (e.g., 'text/markdown')
labels: {
// Additional labels can go here if needed
clipDate: new Date().toISOString()
}
};
logger.debug('Sending note data to server', {
pageUrl: noteData.pageUrl,
clipType: noteData.clipType,
hasImages: noteData.images.length > 0,
noteType: noteData.type,
mime: noteData.mime
});
const result = await this.callService('POST', 'clippings', noteData) as { noteId: string };
logger.info('Note created successfully', { noteId: result.noteId });
return {
success: true,
noteId: result.noteId
};
} catch (error) {
logger.error('Failed to create note', error as Error);
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown error occurred'
};
}
}
/**
* Create a child note under an existing parent note
*/
public async createChildNote(
parentNoteId: string,
noteData: {
title: string;
content: string;
type?: string;
url?: string;
attributes?: Array<{ type: string; name: string; value: string }>;
}
): Promise<TriliumResponse> {
try {
logger.info('Creating child note', {
parentNoteId,
title: noteData.title,
contentLength: noteData.content.length
});
const childNoteData = {
title: noteData.title,
content: noteData.content,
type: 'code', // Markdown notes are typically 'code' type
mime: 'text/markdown',
attributes: noteData.attributes || []
};
const result = await this.callService(
'POST',
`notes/${parentNoteId}/children`,
childNoteData
) as { note: { noteId: string } };
logger.info('Child note created successfully', {
childNoteId: result.note.noteId,
parentNoteId
});
return {
success: true,
noteId: result.note.noteId
};
} catch (error) {
logger.error('Failed to create child note', error as Error);
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown error occurred'
};
}
}
/**
* Append content to an existing note
*/
public async appendToNote(noteId: string, clipData: ClipData): Promise<TriliumResponse> {
try {
logger.info('Appending to existing note', {
noteId,
contentLength: clipData.content?.length || 0
});
const appendData = {
content: clipData.content || '',
images: clipData.images || [],
clipType: clipData.type || 'unknown',
clipDate: new Date().toISOString()
};
await this.callService('PUT', `clippings/${noteId}/append`, appendData);
logger.info('Content appended successfully', { noteId });
return {
success: true,
noteId
};
} catch (error) {
logger.error('Failed to append to note', error as Error);
return {
success: false,
error: error instanceof Error ? error.message : 'Unknown error occurred'
};
}
}
/**
* Check if a note exists for the given URL
*/
public async checkForExistingNote(url: string): Promise<{
exists: boolean;
noteId?: string;
title?: string;
createdAt?: string;
}> {
try {
const encodedUrl = encodeURIComponent(url);
const result = await this.callService('GET', `notes-by-url/${encodedUrl}`) as { noteId: string | null };
if (result.noteId) {
logger.info('Found existing note for URL', { url, noteId: result.noteId });
return {
exists: true,
noteId: result.noteId,
title: 'Existing clipping', // Title will be fetched by popup if needed
createdAt: new Date().toISOString() // API doesn't return this currently
};
}
return { exists: false };
} catch (error) {
logger.error('Failed to check for existing note', error as Error);
return { exists: false };
}
}
/**
* Opens a note in Trilium
* Sends a request to open the note in the Trilium app
*/
public async openNote(noteId: string): Promise<void> {
try {
logger.info('Opening note in Trilium', { noteId });
await this.callService('GET', `open/${noteId}`);
logger.info('Note open request sent successfully', { noteId });
} catch (error) {
logger.error('Failed to open note in Trilium', error as Error);
throw error;
}
}
/**
* Test connection to Trilium instance using the same endpoints as automatic discovery
* This ensures consistency between background monitoring and manual testing
*/
public async testConnection(serverUrl?: string, authToken?: string, desktopPort?: string): Promise<{
server?: { connected: boolean; version?: string; error?: string };
desktop?: { connected: boolean; version?: string; error?: string };
}> {
const results: {
server?: { connected: boolean; version?: string; error?: string };
desktop?: { connected: boolean; version?: string; error?: string };
} = {};
// Test server if provided - use the same clipper handshake endpoint as automatic discovery
if (serverUrl) {
try {
const headers: Record<string, string> = { 'Accept': 'application/json' };
if (authToken) {
headers['Authorization'] = authToken;
}
const response = await this.fetchWithTimeout(`${serverUrl}/api/clipper/handshake`, {
method: 'GET',
headers
}, 10000);
if (response.ok) {
const data: TriliumHandshakeResponse = await response.json();
if (data.appName === 'trilium') {
results.server = {
connected: true,
version: data.appVersion || 'Unknown'
};
} else {
results.server = {
connected: false,
error: 'Invalid response - not a Trilium instance'
};
}
} else {
results.server = {
connected: false,
error: `HTTP ${response.status}`
};
}
} catch (error) {
results.server = {
connected: false,
error: error instanceof Error ? error.message : 'Connection failed'
};
}
}
// Test desktop client - use the same clipper handshake endpoint as automatic discovery
if (desktopPort || !serverUrl) { // Test desktop by default if no server specified
const port = desktopPort ? parseInt(desktopPort) : this.getDefaultDesktopPort();
try {
const response = await this.fetchWithTimeout(`http://127.0.0.1:${port}/api/clipper/handshake`, {
method: 'GET',
headers: { 'Accept': 'application/json' }
}, 5000);
if (response.ok) {
const data: TriliumHandshakeResponse = await response.json();
if (data.appName === 'trilium') {
results.desktop = {
connected: true,
version: data.appVersion || 'Unknown'
};
} else {
results.desktop = {
connected: false,
error: 'Invalid response - not a Trilium instance'
};
}
} else {
results.desktop = {
connected: false,
error: `HTTP ${response.status}`
};
}
} catch (error) {
results.desktop = {
connected: false,
error: error instanceof Error ? error.message : 'Connection failed'
};
}
}
return results;
} private getLocalNowDateTime(): string {
const date = new Date();
const offset = date.getTimezoneOffset();
const absOffset = Math.abs(offset);
return (
new Date(date.getTime() - offset * 60 * 1000)
.toISOString()
.substr(0, 23)
.replace('T', ' ') +
(offset > 0 ? '-' : '+') +
Math.floor(absOffset / 60).toString().padStart(2, '0') + ':' +
(absOffset % 60).toString().padStart(2, '0')
);
}
private async fetchWithTimeout(url: string, options: RequestInit, timeoutMs: number): Promise<Response> {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
try {
const response = await fetch(url, {
...options,
signal: controller.signal
});
return response;
} finally {
clearTimeout(timeoutId);
}
}
}
// Singleton instance
export const triliumServerFacade = new TriliumServerFacade();