mirror of
https://github.com/zadam/trilium.git
synced 2025-06-06 18:08:33 +02:00
150 lines
3.8 KiB
JavaScript
150 lines
3.8 KiB
JavaScript
function lex(str) {
|
|
str = str.toLowerCase();
|
|
|
|
let fulltextQuery = "";
|
|
const fulltextTokens = [];
|
|
const expressionTokens = [];
|
|
|
|
/** @type {boolean|string} */
|
|
let quotes = false; // otherwise contains used quote - ', " or `
|
|
let fulltextEnded = false;
|
|
let currentWord = '';
|
|
|
|
function isSymbolAnOperator(chr) {
|
|
return ['=', '*', '>', '<', '!', "-", "+", '%', ','].includes(chr);
|
|
}
|
|
|
|
function isPreviousSymbolAnOperator() {
|
|
if (currentWord.length === 0) {
|
|
return false;
|
|
}
|
|
else {
|
|
return isSymbolAnOperator(currentWord[currentWord.length - 1]);
|
|
}
|
|
}
|
|
|
|
function finishWord(endIndex, createAlsoForEmptyWords = false) {
|
|
if (currentWord === '' && !createAlsoForEmptyWords) {
|
|
return;
|
|
}
|
|
|
|
const rec = {
|
|
token: currentWord,
|
|
inQuotes: !!quotes,
|
|
startIndex: endIndex - currentWord.length + 1,
|
|
endIndex
|
|
};
|
|
|
|
if (fulltextEnded) {
|
|
expressionTokens.push(rec);
|
|
} else {
|
|
fulltextTokens.push(rec);
|
|
|
|
fulltextQuery = str.substr(0, endIndex + 1);
|
|
}
|
|
|
|
currentWord = '';
|
|
}
|
|
|
|
for (let i = 0; i < str.length; i++) {
|
|
const chr = str[i];
|
|
|
|
if (chr === '\\') {
|
|
if (i + 1 < str.length) {
|
|
i++;
|
|
|
|
currentWord += str[i];
|
|
}
|
|
else {
|
|
currentWord += chr;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
else if (['"', "'", '`'].includes(chr)) {
|
|
if (!quotes) {
|
|
if (currentWord.length === 0 || isPreviousSymbolAnOperator()) {
|
|
finishWord(i - 1);
|
|
|
|
quotes = chr;
|
|
}
|
|
else {
|
|
// quote inside a word does not have special meening and does not break word
|
|
// e.g. d'Artagnan is kept as a single token
|
|
currentWord += chr;
|
|
}
|
|
}
|
|
else if (quotes === chr) {
|
|
finishWord(i - 1, true);
|
|
|
|
quotes = false;
|
|
}
|
|
else {
|
|
// it's a quote, but within other kind of quotes, so it's valid as a literal character
|
|
currentWord += chr;
|
|
}
|
|
|
|
continue;
|
|
}
|
|
else if (!quotes) {
|
|
if (!fulltextEnded && currentWord === 'note' && chr === '.' && i + 1 < str.length) {
|
|
fulltextEnded = true;
|
|
}
|
|
|
|
if (chr === '#' || chr === '~') {
|
|
if (!fulltextEnded) {
|
|
fulltextEnded = true;
|
|
}
|
|
else {
|
|
finishWord(i - 1);
|
|
}
|
|
|
|
currentWord = chr;
|
|
|
|
continue;
|
|
}
|
|
else if (['#', '~'].includes(currentWord) && chr === '!') {
|
|
currentWord += chr;
|
|
continue;
|
|
}
|
|
else if (chr === ' ') {
|
|
finishWord(i - 1);
|
|
continue;
|
|
}
|
|
else if (fulltextEnded && ['(', ')', '.'].includes(chr)) {
|
|
finishWord(i - 1);
|
|
currentWord += chr;
|
|
finishWord(i);
|
|
continue;
|
|
}
|
|
else if (fulltextEnded
|
|
&& !['#!', '~!'].includes(currentWord)
|
|
&& isPreviousSymbolAnOperator() !== isSymbolAnOperator(chr)) {
|
|
|
|
finishWord(i - 1);
|
|
|
|
currentWord += chr;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (chr === ',') {
|
|
continue;
|
|
}
|
|
|
|
currentWord += chr;
|
|
}
|
|
|
|
finishWord(str.length - 1);
|
|
|
|
fulltextQuery = fulltextQuery.trim();
|
|
|
|
return {
|
|
fulltextQuery,
|
|
fulltextTokens,
|
|
expressionTokens
|
|
}
|
|
}
|
|
|
|
module.exports = lex;
|