lexer fixes + tests

2025-11-03 13:09:01 +01:00 · 2020-05-17 23:14:24 +02:00 · 2020-05-17 23:14:24 +02:00 · 81bf84f2de
commit 81bf84f2de
parent e77e0ce675
2 changed files with 59 additions and 8 deletions
--- a/spec/lexer.spec.js
+++ b/spec/lexer.spec.js
@ -1,6 +1,6 @@
-const lexerSpec = require('../src/services/search/lexer.js');
+const lexerSpec = require('../src/services/search/lexer');
-describe("Lexer", function() {
+describe("Lexer fulltext", () => {
    it("simple lexing", () => {
        expect(lexerSpec("hello world").fulltextTokens)
            .toEqual(["hello", "world"]);
@ -26,4 +26,36 @@ describe("Lexer", function() {
        expect(lexerSpec("'unfinished quote").fulltextTokens)
            .toEqual(["unfinished quote"]);
    });
    it("parenthesis and symbols in fulltext section are just normal characters", () => {
        expect(lexerSpec("what's u=p <b(r*t)h>").fulltextTokens)
            .toEqual(["what's", "u=p", "<b(r*t)h>"]);
    });
    it("escaping special characters", () => {
        expect(lexerSpec("hello \\#\\@\\'").fulltextTokens)
            .toEqual(["hello", "#@'"]);
    });
 });
 describe("Lexer expression", () => {
    it("simple attribute existence", () => {
        expect(lexerSpec("#label @relation").expressionTokens)
            .toEqual(["#label", "@relation"]);
    });
    it("simple label operators", () => {
        expect(lexerSpec("#label*=*text").expressionTokens)
            .toEqual(["#label", "*=*", "text"]);
    });
    it("spaces in attribute names and values", () => {
        expect(lexerSpec(`#'long label'="hello o' world" @'long relation'`).expressionTokens)
            .toEqual(["#long label", "=", "hello o' world", "@long relation"]);
    });
    it("complex expressions with and, or and parenthesis", () => {
        expect(lexerSpec(`# (#label=text OR #second=text) AND @relation`).expressionTokens)
            .toEqual(["#", "(", "#label", "=", "text", "OR", "#second", "=", "text", ")", "AND", "@relation"]);
    });
 });
--- a/src/services/search/lexer.js
+++ b/src/services/search/lexer.js
@ -5,12 +5,20 @@ function lexer(str) {
    let quotes = false;
    let fulltextEnded = false;
    let currentWord = '';
    let symbol = false;
-    function isSymbol(chr) {
+    function isOperatorSymbol(chr) {
        return ['=', '*', '>', '<', '!'].includes(chr);
    }
    function previusOperatorSymbol() {
        if (currentWord.length === 0) {
            return false;
        }
        else {
            return isOperatorSymbol(currentWord[currentWord.length - 1]);
        }
    }
    function finishWord() {
        if (currentWord === '') {
            return;
@ -42,7 +50,11 @@ function lexer(str) {
        }
        else if (['"', "'", '`'].includes(chr)) {
            if (!quotes) {
-                if (currentWord.length === 0) {
+                if (currentWord.length === 0 || fulltextEnded) {
                    if (previusOperatorSymbol()) {
                        finishWord();
                    }
                    quotes = chr;
                }
                else {
@ -63,19 +75,26 @@ function lexer(str) {
            continue;
        }
        else if (!quotes) {
-            if (chr === '#' || chr === '@') {
+            if (currentWord.length === 0 && (chr === '#' || chr === '@')) {
                fulltextEnded = true;
                currentWord = chr;
                continue;
            }
            else if (chr === ' ') {
                finishWord();
                continue;
            }
-            else if (fulltextEnded && symbol !== isSymbol(chr)) {
+            else if (fulltextEnded && ['(', ')'].includes(chr)) {
                finishWord();
                currentWord += chr;
                finishWord();
                continue;
            }
            else if (fulltextEnded && previusOperatorSymbol() !== isOperatorSymbol(chr)) {
                finishWord();
                currentWord += chr;
                symbol = isSymbol(chr);
                continue;
            }
        }