lexer fixes + tests

This commit is contained in:
zadam 2020-05-17 23:14:24 +02:00
parent e77e0ce675
commit 81bf84f2de
2 changed files with 59 additions and 8 deletions

View File

@ -1,6 +1,6 @@
const lexerSpec = require('../src/services/search/lexer.js'); const lexerSpec = require('../src/services/search/lexer');
describe("Lexer", function() { describe("Lexer fulltext", () => {
it("simple lexing", () => { it("simple lexing", () => {
expect(lexerSpec("hello world").fulltextTokens) expect(lexerSpec("hello world").fulltextTokens)
.toEqual(["hello", "world"]); .toEqual(["hello", "world"]);
@ -26,4 +26,36 @@ describe("Lexer", function() {
expect(lexerSpec("'unfinished quote").fulltextTokens) expect(lexerSpec("'unfinished quote").fulltextTokens)
.toEqual(["unfinished quote"]); .toEqual(["unfinished quote"]);
}); });
it("parenthesis and symbols in fulltext section are just normal characters", () => {
expect(lexerSpec("what's u=p <b(r*t)h>").fulltextTokens)
.toEqual(["what's", "u=p", "<b(r*t)h>"]);
});
it("escaping special characters", () => {
expect(lexerSpec("hello \\#\\@\\'").fulltextTokens)
.toEqual(["hello", "#@'"]);
});
});
describe("Lexer expression", () => {
it("simple attribute existence", () => {
expect(lexerSpec("#label @relation").expressionTokens)
.toEqual(["#label", "@relation"]);
});
it("simple label operators", () => {
expect(lexerSpec("#label*=*text").expressionTokens)
.toEqual(["#label", "*=*", "text"]);
});
it("spaces in attribute names and values", () => {
expect(lexerSpec(`#'long label'="hello o' world" @'long relation'`).expressionTokens)
.toEqual(["#long label", "=", "hello o' world", "@long relation"]);
});
it("complex expressions with and, or and parenthesis", () => {
expect(lexerSpec(`# (#label=text OR #second=text) AND @relation`).expressionTokens)
.toEqual(["#", "(", "#label", "=", "text", "OR", "#second", "=", "text", ")", "AND", "@relation"]);
});
}); });

View File

@ -5,12 +5,20 @@ function lexer(str) {
let quotes = false; let quotes = false;
let fulltextEnded = false; let fulltextEnded = false;
let currentWord = ''; let currentWord = '';
let symbol = false;
function isSymbol(chr) { function isOperatorSymbol(chr) {
return ['=', '*', '>', '<', '!'].includes(chr); return ['=', '*', '>', '<', '!'].includes(chr);
} }
function previusOperatorSymbol() {
if (currentWord.length === 0) {
return false;
}
else {
return isOperatorSymbol(currentWord[currentWord.length - 1]);
}
}
function finishWord() { function finishWord() {
if (currentWord === '') { if (currentWord === '') {
return; return;
@ -42,7 +50,11 @@ function lexer(str) {
} }
else if (['"', "'", '`'].includes(chr)) { else if (['"', "'", '`'].includes(chr)) {
if (!quotes) { if (!quotes) {
if (currentWord.length === 0) { if (currentWord.length === 0 || fulltextEnded) {
if (previusOperatorSymbol()) {
finishWord();
}
quotes = chr; quotes = chr;
} }
else { else {
@ -63,19 +75,26 @@ function lexer(str) {
continue; continue;
} }
else if (!quotes) { else if (!quotes) {
if (chr === '#' || chr === '@') { if (currentWord.length === 0 && (chr === '#' || chr === '@')) {
fulltextEnded = true; fulltextEnded = true;
currentWord = chr;
continue; continue;
} }
else if (chr === ' ') { else if (chr === ' ') {
finishWord(); finishWord();
continue; continue;
} }
else if (fulltextEnded && symbol !== isSymbol(chr)) { else if (fulltextEnded && ['(', ')'].includes(chr)) {
finishWord();
currentWord += chr;
finishWord();
continue;
}
else if (fulltextEnded && previusOperatorSymbol() !== isOperatorSymbol(chr)) {
finishWord(); finishWord();
currentWord += chr; currentWord += chr;
symbol = isSymbol(chr);
continue; continue;
} }
} }