improvements to lexer and its tests

This commit is contained in:
zadam 2020-07-21 23:42:59 +02:00
parent 32ecb43b5c
commit 60e8bd98b9
2 changed files with 39 additions and 29 deletions

View File

@ -22,6 +22,16 @@ describe("Lexer fulltext", () => {
.toEqual(["i can use \" or ` or #~=*", "without", "problem"]); .toEqual(["i can use \" or ` or #~=*", "without", "problem"]);
}); });
it("quote inside a word does not have a special meaning", () => {
const lexResult = lex("d'Artagnan is dead #hero = d'Artagnan");
expect(lexResult.fulltextTokens.map(t => t.token))
.toEqual(["d'artagnan", "is", "dead"]);
expect(lexResult.expressionTokens.map(t => t.token))
.toEqual(['#hero', '=', "d'artagnan"]);
});
it("if quote is not ended then it's just one long token", () => { it("if quote is not ended then it's just one long token", () => {
expect(lex("'unfinished quote").fulltextTokens.map(t => t.token)) expect(lex("'unfinished quote").fulltextTokens.map(t => t.token))
.toEqual(["unfinished quote"]); .toEqual(["unfinished quote"]);
@ -52,16 +62,16 @@ describe("Lexer expression", () => {
it("simple label operator with in quotes and without", () => { it("simple label operator with in quotes and without", () => {
expect(lex("#label*=*'text'").expressionTokens) expect(lex("#label*=*'text'").expressionTokens)
.toEqual([ .toEqual([
{token: "#label", inQuotes: false}, {token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
{token: "*=*", inQuotes: false}, {token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8},
{token: "text", inQuotes: true} {token: "text", inQuotes: true, startIndex: 10, endIndex: 13}
]); ]);
expect(lex("#label*=*text").expressionTokens) expect(lex("#label*=*text").expressionTokens)
.toEqual([ .toEqual([
{token: "#label", inQuotes: false}, {token: "#label", inQuotes: false, startIndex: 0, endIndex: 5},
{token: "*=*", inQuotes: false}, {token: "*=*", inQuotes: false, startIndex: 6, endIndex: 8},
{token: "text", inQuotes: false} {token: "text", inQuotes: false, startIndex: 9, endIndex: 12}
]); ]);
}); });
@ -92,9 +102,8 @@ describe("Lexer invalid queries and edge cases", () => {
.toEqual(["#label", "~relation"]); .toEqual(["#label", "~relation"]);
}); });
it("spaces in attribute names and values", () => { it("trailing escape \\", () => {
// invalid but should be reported by parser as an error expect(lex('abc \\').fulltextTokens.map(t => t.token))
expect(lex(`#'long label'="hello o' world" ~'long relation'`).expressionTokens.map(t => t.token)) .toEqual(["abc", "\\"]);
.toEqual(["#long label", "=", "hello o' world", "~long relation"]);
}); });
}); });

View File

@ -4,31 +4,33 @@ function lex(str) {
const fulltextTokens = []; const fulltextTokens = [];
const expressionTokens = []; const expressionTokens = [];
let quotes = false; let quotes = false; // otherwise contains used quote - ', " or `
let fulltextEnded = false; let fulltextEnded = false;
let currentWord = ''; let currentWord = '';
function isOperatorSymbol(chr) { function isSymbolAnOperator(chr) {
return ['=', '*', '>', '<', '!'].includes(chr); return ['=', '*', '>', '<', '!'].includes(chr);
} }
function previousOperatorSymbol() { function isPreviousSymbolAnOperator() {
if (currentWord.length === 0) { if (currentWord.length === 0) {
return false; return false;
} }
else { else {
return isOperatorSymbol(currentWord[currentWord.length - 1]); return isSymbolAnOperator(currentWord[currentWord.length - 1]);
} }
} }
function finishWord() { function finishWord(endIndex) {
if (currentWord === '') { if (currentWord === '') {
return; return;
} }
const rec = { const rec = {
token: currentWord, token: currentWord,
inQuotes: !!quotes inQuotes: !!quotes,
startIndex: endIndex - currentWord.length + 1,
endIndex
}; };
if (fulltextEnded) { if (fulltextEnded) {
@ -44,7 +46,7 @@ function lex(str) {
const chr = str[i]; const chr = str[i];
if (chr === '\\') { if (chr === '\\') {
if ((i + 1) < str.length) { if (i + 1 < str.length) {
i++; i++;
currentWord += str[i]; currentWord += str[i];
@ -57,10 +59,8 @@ function lex(str) {
} }
else if (['"', "'", '`'].includes(chr)) { else if (['"', "'", '`'].includes(chr)) {
if (!quotes) { if (!quotes) {
if (currentWord.length === 0 || fulltextEnded) { if (currentWord.length === 0 || isPreviousSymbolAnOperator()) {
if (previousOperatorSymbol()) { finishWord(i - 1);
finishWord();
}
quotes = chr; quotes = chr;
} }
@ -71,7 +71,7 @@ function lex(str) {
} }
} }
else if (quotes === chr) { else if (quotes === chr) {
finishWord(); finishWord(i - 1);
quotes = false; quotes = false;
} }
@ -79,6 +79,7 @@ function lex(str) {
// it's a quote but within other kind of quotes so it's valid as a literal character // it's a quote but within other kind of quotes so it's valid as a literal character
currentWord += chr; currentWord += chr;
} }
continue; continue;
} }
else if (!quotes) { else if (!quotes) {
@ -87,7 +88,7 @@ function lex(str) {
fulltextEnded = true; fulltextEnded = true;
} }
else { else {
finishWord(); finishWord(i - 1);
} }
currentWord = chr; currentWord = chr;
@ -99,20 +100,20 @@ function lex(str) {
continue; continue;
} }
else if (chr === ' ') { else if (chr === ' ') {
finishWord(); finishWord(i - 1);
continue; continue;
} }
else if (fulltextEnded && ['(', ')', '.'].includes(chr)) { else if (fulltextEnded && ['(', ')', '.'].includes(chr)) {
finishWord(); finishWord(i - 1);
currentWord += chr; currentWord += chr;
finishWord(); finishWord(i);
continue; continue;
} }
else if (fulltextEnded else if (fulltextEnded
&& !['#!', '~!'].includes(currentWord) && !['#!', '~!'].includes(currentWord)
&& previousOperatorSymbol() !== isOperatorSymbol(chr)) { && isPreviousSymbolAnOperator() !== isSymbolAnOperator(chr)) {
finishWord(); finishWord(i - 1);
currentWord += chr; currentWord += chr;
continue; continue;
@ -122,7 +123,7 @@ function lex(str) {
currentWord += chr; currentWord += chr;
} }
finishWord(); finishWord(str.length - 1);
return { return {
fulltextTokens, fulltextTokens,