mirror of
https://github.com/zadam/trilium.git
synced 2025-03-01 14:22:32 +01:00
improvements to lexer and its tests
This commit is contained in:
parent
32ecb43b5c
commit
60e8bd98b9
@ -22,6 +22,16 @@ describe("Lexer fulltext", () => {
|
||||
.toEqual(["i can use \" or ` or #~=*", "without", "problem"]);
|
||||
});
|
||||
|
||||
it("quote inside a word does not have a special meaning", () => {
|
||||
const lexResult = lex("d'Artagnan is dead #hero = d'Artagnan");
|
||||
|
||||
expect(lexResult.fulltextTokens.map(t => t.token))
|
||||
.toEqual(["d'artagnan", "is", "dead"]);
|
||||
|
||||
expect(lexResult.expressionTokens.map(t => t.token))
|
||||
.toEqual(['#hero', '=', "d'artagnan"]);
|
||||
});
|
||||
|
||||
it("if quote is not ended then it's just one long token", () => {
|
||||
expect(lex("'unfinished quote").fulltextTokens.map(t => t.token))
|
||||
.toEqual(["unfinished quote"]);
|
||||
@ -52,16 +62,16 @@ describe("Lexer expression", () => {
|
||||
it("simple label operator with in quotes and without", () => {
|
||||
expect(lex("#label*=*'text'").expressionTokens)
|
||||
.toEqual([
|
||||
{token: "#label", inQuotes: false},
|
||||
{token: "*=*", inQuotes: false},
|
||||
{token: "text", inQuotes: true}
|
||||
{token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
|
||||
{token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8},
|
||||
{token: "text", inQuotes: true, startIndex: 10, endIndex: 13}
|
||||
]);
|
||||
|
||||
expect(lex("#label*=*text").expressionTokens)
|
||||
.toEqual([
|
||||
{token: "#label", inQuotes: false},
|
||||
{token: "*=*", inQuotes: false},
|
||||
{token: "text", inQuotes: false}
|
||||
{token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
|
||||
{token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8},
|
||||
{token: "text", inQuotes: false, startIndex: 9, endIndex: 12}
|
||||
]);
|
||||
});
|
||||
|
||||
@ -92,9 +102,8 @@ describe("Lexer invalid queries and edge cases", () => {
|
||||
.toEqual(["#label", "~relation"]);
|
||||
});
|
||||
|
||||
it("spaces in attribute names and values", () => {
|
||||
// invalid but should be reported by parser as an error
|
||||
expect(lex(`#'long label'="hello o' world" ~'long relation'`).expressionTokens.map(t => t.token))
|
||||
.toEqual(["#long label", "=", "hello o' world", "~long relation"]);
|
||||
it("trailing escape \\", () => {
|
||||
expect(lex('abc \\').fulltextTokens.map(t => t.token))
|
||||
.toEqual(["abc", "\\"]);
|
||||
});
|
||||
});
|
||||
|
@ -4,31 +4,33 @@ function lex(str) {
|
||||
const fulltextTokens = [];
|
||||
const expressionTokens = [];
|
||||
|
||||
let quotes = false;
|
||||
let quotes = false; // otherwise contains used quote - ', " or `
|
||||
let fulltextEnded = false;
|
||||
let currentWord = '';
|
||||
|
||||
function isOperatorSymbol(chr) {
|
||||
function isSymbolAnOperator(chr) {
|
||||
return ['=', '*', '>', '<', '!'].includes(chr);
|
||||
}
|
||||
|
||||
function previousOperatorSymbol() {
|
||||
function isPreviousSymbolAnOperator() {
|
||||
if (currentWord.length === 0) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return isOperatorSymbol(currentWord[currentWord.length - 1]);
|
||||
return isSymbolAnOperator(currentWord[currentWord.length - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
function finishWord() {
|
||||
function finishWord(endIndex) {
|
||||
if (currentWord === '') {
|
||||
return;
|
||||
}
|
||||
|
||||
const rec = {
|
||||
token: currentWord,
|
||||
inQuotes: !!quotes
|
||||
inQuotes: !!quotes,
|
||||
startIndex: endIndex - currentWord.length + 1,
|
||||
endIndex
|
||||
};
|
||||
|
||||
if (fulltextEnded) {
|
||||
@ -44,7 +46,7 @@ function lex(str) {
|
||||
const chr = str[i];
|
||||
|
||||
if (chr === '\\') {
|
||||
if ((i + 1) < str.length) {
|
||||
if (i + 1 < str.length) {
|
||||
i++;
|
||||
|
||||
currentWord += str[i];
|
||||
@ -57,10 +59,8 @@ function lex(str) {
|
||||
}
|
||||
else if (['"', "'", '`'].includes(chr)) {
|
||||
if (!quotes) {
|
||||
if (currentWord.length === 0 || fulltextEnded) {
|
||||
if (previousOperatorSymbol()) {
|
||||
finishWord();
|
||||
}
|
||||
if (currentWord.length === 0 || isPreviousSymbolAnOperator()) {
|
||||
finishWord(i - 1);
|
||||
|
||||
quotes = chr;
|
||||
}
|
||||
@ -71,7 +71,7 @@ function lex(str) {
|
||||
}
|
||||
}
|
||||
else if (quotes === chr) {
|
||||
finishWord();
|
||||
finishWord(i - 1);
|
||||
|
||||
quotes = false;
|
||||
}
|
||||
@ -79,6 +79,7 @@ function lex(str) {
|
||||
// it's a quote but within other kind of quotes so it's valid as a literal character
|
||||
currentWord += chr;
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
else if (!quotes) {
|
||||
@ -87,7 +88,7 @@ function lex(str) {
|
||||
fulltextEnded = true;
|
||||
}
|
||||
else {
|
||||
finishWord();
|
||||
finishWord(i - 1);
|
||||
}
|
||||
|
||||
currentWord = chr;
|
||||
@ -99,20 +100,20 @@ function lex(str) {
|
||||
continue;
|
||||
}
|
||||
else if (chr === ' ') {
|
||||
finishWord();
|
||||
finishWord(i - 1);
|
||||
continue;
|
||||
}
|
||||
else if (fulltextEnded && ['(', ')', '.'].includes(chr)) {
|
||||
finishWord();
|
||||
finishWord(i - 1);
|
||||
currentWord += chr;
|
||||
finishWord();
|
||||
finishWord(i);
|
||||
continue;
|
||||
}
|
||||
else if (fulltextEnded
|
||||
&& !['#!', '~!'].includes(currentWord)
|
||||
&& previousOperatorSymbol() !== isOperatorSymbol(chr)) {
|
||||
&& isPreviousSymbolAnOperator() !== isSymbolAnOperator(chr)) {
|
||||
|
||||
finishWord();
|
||||
finishWord(i - 1);
|
||||
|
||||
currentWord += chr;
|
||||
continue;
|
||||
@ -122,7 +123,7 @@ function lex(str) {
|
||||
currentWord += chr;
|
||||
}
|
||||
|
||||
finishWord();
|
||||
finishWord(str.length - 1);
|
||||
|
||||
return {
|
||||
fulltextTokens,
|
||||
|
Loading…
x
Reference in New Issue
Block a user