lexer now marks "in quotes" tokens

2025-12-05 15:04:24 +01:00 · 2020-07-19 23:19:45 +02:00 · 2020-07-19 23:19:45 +02:00 · 35469f6f2d
commit 35469f6f2d
parent 4c7b1d6543
9 changed files with 161 additions and 85 deletions
--- a/spec/search/lexer.spec.js
+++ b/spec/search/lexer.spec.js
@ -2,75 +2,83 @@ const lexer = require('../../src/services/search/lexer.js');

 describe("Lexer fulltext", () => {
    it("simple lexing", () => {
-        expect(lexer("hello world").fulltextTokens)
+        expect(lexer("hello world").fulltextTokens.map(t => t.token))
            .toEqual(["hello", "world"]);
    });

    it("use quotes to keep words together", () => {
-        expect(lexer("'hello world' my friend").fulltextTokens)
+        expect(lexer("'hello world' my friend").fulltextTokens.map(t => t.token))
            .toEqual(["hello world", "my", "friend"]);

-        expect(lexer('"hello world" my friend').fulltextTokens)
+        expect(lexer('"hello world" my friend').fulltextTokens.map(t => t.token))
            .toEqual(["hello world", "my", "friend"]);

-        expect(lexer('`hello world` my friend').fulltextTokens)
+        expect(lexer('`hello world` my friend').fulltextTokens.map(t => t.token))
            .toEqual(["hello world", "my", "friend"]);
    });

    it("you can use different quotes and other special characters inside quotes", () => {
-        expect(lexer("'i can use \" or ` or #~=*' without problem").fulltextTokens)
+        expect(lexer("'i can use \" or ` or #~=*' without problem").fulltextTokens.map(t => t.token))
            .toEqual(["i can use \" or ` or #~=*", "without", "problem"]);
    });

    it("if quote is not ended then it's just one long token", () => {
-        expect(lexer("'unfinished quote").fulltextTokens)
+        expect(lexer("'unfinished quote").fulltextTokens.map(t => t.token))
            .toEqual(["unfinished quote"]);
    });

    it("parenthesis and symbols in fulltext section are just normal characters", () => {
-        expect(lexer("what's u=p <b(r*t)h>").fulltextTokens)
+        expect(lexer("what's u=p <b(r*t)h>").fulltextTokens.map(t => t.token))
            .toEqual(["what's", "u=p", "<b(r*t)h>"]);
    });

    it("escaping special characters", () => {
-        expect(lexer("hello \\#\\~\\'").fulltextTokens)
+        expect(lexer("hello \\#\\~\\'").fulltextTokens.map(t => t.token))
            .toEqual(["hello", "#~'"]);
    });
 });

 describe("Lexer expression", () => {
    it("simple attribute existence", () => {
-        expect(lexer("#label ~relation").expressionTokens)
+        expect(lexer("#label ~relation").expressionTokens.map(t => t.token))
            .toEqual(["#label", "~relation"]);
    });

    it("simple label operators", () => {
-        expect(lexer("#label*=*text").expressionTokens)
+        expect(lexer("#label*=*text").expressionTokens.map(t => t.token))
            .toEqual(["#label", "*=*", "text"]);
    });

-    it("spaces in attribute names and values", () => {
-        expect(lexer(`#'long label'="hello o' world" ~'long relation'`).expressionTokens)
-            .toEqual(["#long label", "=", "hello o' world", "~long relation"]);
-    });
-
    it("complex expressions with and, or and parenthesis", () => {
-        expect(lexer(`# (#label=text OR #second=text) AND ~relation`).expressionTokens)
+        expect(lexer(`# (#label=text OR #second=text) AND ~relation`).expressionTokens.map(t => t.token))
            .toEqual(["#", "(", "#label", "=", "text", "or", "#second", "=", "text", ")", "and", "~relation"]);
    });

    it("dot separated properties", () => {
-        expect(lexer(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens)
+        expect(lexer(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens.map(t => t.token))
            .toEqual(["#", "~author", ".", "title", "=", "hugh howey", "and", "note", ".", "book title", "=", "silo"]);
    });

    it("negation of label and relation", () => {
-        expect(lexer(`#!capital ~!neighbor`).expressionTokens)
+        expect(lexer(`#!capital ~!neighbor`).expressionTokens.map(t => t.token))
            .toEqual(["#!capital", "~!neighbor"]);
    });

    it("negation of sub-expression", () => {
-        expect(lexer(`# not(#capital) and note.noteId != "root"`).expressionTokens)
+        expect(lexer(`# not(#capital) and note.noteId != "root"`).expressionTokens.map(t => t.token))
            .toEqual(["#", "not", "(", "#capital", ")", "and", "note", ".", "noteid", "!=", "root"]);
    });
 });
+
+describe("Lexer invalid queries and edge cases", () => {
+    it("concatenated attributes", () => {
+        expect(lexer("#label~relation").expressionTokens.map(t => t.token))
+            .toEqual(["#label", "~relation"]);
+    });
+
+    it("spaces in attribute names and values", () => {
+        // invalid but should be reported by parser as an error
+        expect(lexer(`#'long label'="hello o' world" ~'long relation'`).expressionTokens.map(t => t.token))
+            .toEqual(["#long label", "=", "hello o' world", "~long relation"]);
+    });
+});
--- a/spec/search/parens.spec.js
+++ b/spec/search/parens.spec.js
@ -2,19 +2,22 @@ const parens = require('../../src/services/search/parens.js');

 describe("Parens handler", () => {
    it("handles parens", () => {
-        expect(parens(["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]))
+        const input = ["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]
+            .map(token => ({token}));
+
+        expect(parens(input))
            .toEqual([
                [
-                    "hello"
+                    {token: "hello"}
                ],
-                "and",
+                {token: "and"},
                [
                    [
-                        "pick",
-                        "one"
+                        {token: "pick"},
+                        {token: "one"}
                    ],
-                    "and",
-                    "another"
+                    {token: "and"},
+                    {token: "another"}
                ]
            ]);
    });
--- a/spec/search/parser.spec.js
+++ b/spec/search/parser.spec.js
@ -1,10 +1,24 @@
 const ParsingContext = require("../../src/services/search/parsing_context.js");
 const parser = require('../../src/services/search/parser.js');

+function tokens(...args) {
+    return args.map(arg => {
+        if (Array.isArray(arg)) {
+            return arg;
+        }
+        else {
+            return {
+                token: arg,
+                inQuotes: false
+            };
+        }
+    });
+}
+
 describe("Parser", () => {
    it("fulltext parser without content", () => {
        const rootExp = parser({
-            fulltextTokens: ["hello", "hi"],
+            fulltextTokens: tokens("hello", "hi"),
            expressionTokens: [],
            parsingContext: new ParsingContext({includeNoteContent: false})
        });
@ -15,7 +29,7 @@ describe("Parser", () => {

    it("fulltext parser with content", () => {
        const rootExp = parser({
-            fulltextTokens: ["hello", "hi"],
+            fulltextTokens: tokens("hello", "hi"),
            expressionTokens: [],
            parsingContext: new ParsingContext({includeNoteContent: true})
        });
@ -36,7 +50,7 @@ describe("Parser", () => {
    it("simple label comparison", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#mylabel", "=", "text"],
+            expressionTokens: tokens("#mylabel", "=", "text"),
            parsingContext: new ParsingContext()
        });

@ -49,7 +63,7 @@ describe("Parser", () => {
    it("simple attribute negation", () => {
        let rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#!mylabel"],
+            expressionTokens: tokens("#!mylabel"),
            parsingContext: new ParsingContext()
        });

@ -60,7 +74,7 @@ describe("Parser", () => {

        rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["~!myrelation"],
+            expressionTokens: tokens("~!myrelation"),
            parsingContext: new ParsingContext()
        });

@ -73,7 +87,7 @@ describe("Parser", () => {
    it("simple label AND", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#first", "=", "text", "and", "#second", "=", "text"],
+            expressionTokens: tokens("#first", "=", "text", "and", "#second", "=", "text"),
            parsingContext: new ParsingContext(true)
        });

@ -90,7 +104,7 @@ describe("Parser", () => {
    it("simple label AND without explicit AND", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#first", "=", "text", "#second", "=", "text"],
+            expressionTokens: tokens("#first", "=", "text", "#second", "=", "text"),
            parsingContext: new ParsingContext()
        });

@ -107,7 +121,7 @@ describe("Parser", () => {
    it("simple label OR", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#first", "=", "text", "or", "#second", "=", "text"],
+            expressionTokens: tokens("#first", "=", "text", "or", "#second", "=", "text"),
            parsingContext: new ParsingContext()
        });

@ -123,8 +137,8 @@ describe("Parser", () => {

    it("fulltext and simple label", () => {
        const rootExp = parser({
-            fulltextTokens: ["hello"],
-            expressionTokens: ["#mylabel", "=", "text"],
+            fulltextTokens: tokens("hello"),
+            expressionTokens: tokens("#mylabel", "=", "text"),
            parsingContext: new ParsingContext()
        });

@ -141,7 +155,7 @@ describe("Parser", () => {
    it("label sub-expression", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#first", "=", "text", "or", ["#second", "=", "text", "and", "#third", "=", "text"]],
+            expressionTokens: tokens("#first", "=", "text", "or", tokens("#second", "=", "text", "and", "#third", "=", "text")),
            parsingContext: new ParsingContext()
        });

@ -161,3 +175,17 @@ describe("Parser", () => {
        expect(secondSubSub.attributeName).toEqual("third");
    });
 });
+
+describe("Invalid tokens", () => {
+    it("incomplete comparison", () => {
+        const parsingContext = new ParsingContext();
+
+        parser({
+            fulltextTokens: [],
+            expressionTokens: tokens("#first", "="),
+            parsingContext
+        });
+
+        expect(parsingContext.error).toEqual('Misplaced or incomplete expression "="')
+    });
+});
--- a/spec/search/search.spec.js
+++ b/spec/search/search.spec.js
@ -529,4 +529,26 @@ describe("Search", () => {
    });

    // FIXME: test what happens when we order without any filter criteria
+
+    // it("comparison between labels", async () => {
+    //     rootNote
+    //         .child(note("Europe")
+    //             .child(note("Austria")
+    //                 .label('capital', 'Vienna')
+    //                 .label('largestCity', 'Vienna'))
+    //             .child(note("Canada")
+    //                 .label('capital', 'Ottawa')
+    //                 .label('largestCity', 'Toronto'))
+    //             .child(note("Czech Republic")
+    //                 .label('capital', 'Prague')
+    //                 .label('largestCity', 'Prague'))
+    //         );
+    //
+    //     const parsingContext = new ParsingContext();
+    //
+    //     const searchResults = await searchService.findNotesWithQuery('#capital = #largestCity', parsingContext);
+    //     expect(searchResults.length).toEqual(2);
+    //     expect(findNoteByTitle(searchResults, "Czech Republic")).toBeTruthy();
+    //     expect(findNoteByTitle(searchResults, "Austria")).toBeTruthy();
+    // })
 });
--- a/src/services/search/expressions/property_comparison.js
+++ b/src/services/search/expressions/property_comparison.js
@ -51,6 +51,7 @@ class PropertyComparisonExp extends Expression {
            if (value) {
                value = value.toLowerCase();
            }
+
            if (this.comparator(value)) {
                resNoteSet.add(note);
            }
--- a/src/services/search/lexer.js
+++ b/src/services/search/lexer.js
@ -26,10 +26,15 @@ function lexer(str) {
            return;
        }

+        const rec = {
+            token: currentWord,
+            inQuotes: quotes
+        };
+
        if (fulltextEnded) {
-            expressionTokens.push(currentWord);
+            expressionTokens.push(rec);
        } else {
-            fulltextTokens.push(currentWord);
+            fulltextTokens.push(rec);
        }

        currentWord = '';
@ -77,8 +82,14 @@ function lexer(str) {
            continue;
        }
        else if (!quotes) {
-            if (currentWord.length === 0 && (chr === '#' || chr === '~')) {
-                fulltextEnded = true;
+            if (chr === '#' || chr === '~') {
+                if (!fulltextEnded) {
+                    fulltextEnded = true;
+                }
+                else {
+                    finishWord();
+                }
+
                currentWord = chr;

                continue;
--- a/src/services/search/parens.js
+++ b/src/services/search/parens.js
@ -7,7 +7,7 @@ function parens(tokens) {
    }

    while (true) {
-        const leftIdx = tokens.findIndex(token => token === '(');
+        const leftIdx = tokens.findIndex(token => token.token === '(');

        if (leftIdx === -1) {
            return tokens;
@ -17,13 +17,13 @@ function parens(tokens) {
        let parensLevel = 0

        for (rightIdx = leftIdx; rightIdx < tokens.length; rightIdx++) {
-            if (tokens[rightIdx] === ')') {
+            if (tokens[rightIdx].token === ')') {
                parensLevel--;

                if (parensLevel === 0) {
                    break;
                }
-            } else if (tokens[rightIdx] === '(') {
+            } else if (tokens[rightIdx].token === '(') {
                parensLevel++;
            }
        }
--- a/src/services/search/parser.js
+++ b/src/services/search/parser.js
@ -18,6 +18,8 @@ const comparatorBuilder = require('./comparator_builder');
 const ValueExtractor = require('./value_extractor');

 function getFulltext(tokens, parsingContext) {
+    tokens = tokens.map(t => t.token);
+
    parsingContext.highlightedTokens.push(...tokens);

    if (tokens.length === 0) {
@ -50,75 +52,75 @@ function getExpression(tokens, parsingContext, level = 0) {
    let i;

    function parseNoteProperty() {
-        if (tokens[i] !== '.') {
+        if (tokens[i].token !== '.') {
            parsingContext.addError('Expected "." to separate field path');
            return;
        }

        i++;

-        if (tokens[i] === 'content') {
+        if (tokens[i].token === 'content') {
            i += 1;

-            const operator = tokens[i];
+            const operator = tokens[i].token;

            if (!isOperator(operator)) {
-                parsingContext.addError(`After content expected operator, but got "${tokens[i]}"`);
+                parsingContext.addError(`After content expected operator, but got "${tokens[i].token}"`);
                return;
            }

            i++;

            return new OrExp([
-                new NoteContentUnprotectedFulltextExp(operator, [tokens[i]]),
-                new NoteContentProtectedFulltextExp(operator, [tokens[i]])
+                new NoteContentUnprotectedFulltextExp(operator, [tokens[i].token]),
+                new NoteContentProtectedFulltextExp(operator, [tokens[i].token])
            ]);
        }

-        if (tokens[i] === 'parents') {
+        if (tokens[i].token === 'parents') {
            i += 1;

            return new ChildOfExp(parseNoteProperty());
        }

-        if (tokens[i] === 'children') {
+        if (tokens[i].token === 'children') {
            i += 1;

            return new ParentOfExp(parseNoteProperty());
        }

-        if (tokens[i] === 'ancestors') {
+        if (tokens[i].token === 'ancestors') {
            i += 1;

            return new DescendantOfExp(parseNoteProperty());
        }

-        if (tokens[i] === 'labels') {
-            if (tokens[i + 1] !== '.') {
-                parsingContext.addError(`Expected "." to separate field path, god "${tokens[i + 1]}"`);
+        if (tokens[i].token === 'labels') {
+            if (tokens[i + 1].token !== '.') {
+                parsingContext.addError(`Expected "." to separate field path, got "${tokens[i + 1].token}"`);
                return;
            }

            i += 2;

-            return parseLabel(tokens[i]);
+            return parseLabel(tokens[i].token);
        }

-        if (tokens[i] === 'relations') {
-            if (tokens[i + 1] !== '.') {
-                parsingContext.addError(`Expected "." to separate field path, god "${tokens[i + 1]}"`);
+        if (tokens[i].token === 'relations') {
+            if (tokens[i + 1].token !== '.') {
+                parsingContext.addError(`Expected "." to separate field path, got "${tokens[i + 1].token}"`);
                return;
            }

            i += 2;

-            return parseRelation(tokens[i]);
+            return parseRelation(tokens[i].token);
        }

-        if (PropertyComparisonExp.isProperty(tokens[i])) {
-            const propertyName = tokens[i];
-            const operator = tokens[i + 1];
-            const comparedValue = tokens[i + 2];
+        if (PropertyComparisonExp.isProperty(tokens[i].token)) {
+            const propertyName = tokens[i].token;
+            const operator = tokens[i + 1].token;
+            const comparedValue = tokens[i + 2].token;
            const comparator = comparatorBuilder(operator, comparedValue);

            if (!comparator) {
@ -131,7 +133,7 @@ function getExpression(tokens, parsingContext, level = 0) {
            return new PropertyComparisonExp(propertyName, comparator);
        }

-        parsingContext.addError(`Unrecognized note property "${tokens[i]}"`);
+        parsingContext.addError(`Unrecognized note property "${tokens[i].token}"`);
    }

    function parseAttribute(name) {
@ -153,9 +155,9 @@ function getExpression(tokens, parsingContext, level = 0) {
    function parseLabel(labelName) {
        parsingContext.highlightedTokens.push(labelName);

-        if (i < tokens.length - 2 && isOperator(tokens[i + 1])) {
-            let operator = tokens[i + 1];
-            const comparedValue = tokens[i + 2];
+        if (i < tokens.length - 2 && isOperator(tokens[i + 1].token)) {
+            let operator = tokens[i + 1].token;
+            const comparedValue = tokens[i + 2].token;

            parsingContext.highlightedTokens.push(comparedValue);

@ -180,7 +182,7 @@ function getExpression(tokens, parsingContext, level = 0) {
    function parseRelation(relationName) {
        parsingContext.highlightedTokens.push(relationName);

-        if (i < tokens.length - 2 && tokens[i + 1] === '.') {
+        if (i < tokens.length - 2 && tokens[i + 1].token === '.') {
            i += 1;

            return new RelationWhereExp(relationName, parseNoteProperty());
@ -193,7 +195,7 @@ function getExpression(tokens, parsingContext, level = 0) {
        const orderDefinitions = [];
        let limit;

-        if (tokens[i] === 'orderby') {
+        if (tokens[i].token === 'orderby') {
            do {
                const propertyPath = [];
                let direction = "asc";
@ -201,13 +203,13 @@ function getExpression(tokens, parsingContext, level = 0) {
                do {
                    i++;

-                    propertyPath.push(tokens[i]);
+                    propertyPath.push(tokens[i].token);

                    i++;
-                } while (tokens[i] === '.');
+                } while (i < tokens.length && tokens[i].token === '.');

-                if (["asc", "desc"].includes(tokens[i])) {
-                    direction = tokens[i];
+                if (i < tokens.length && ["asc", "desc"].includes(tokens[i].token)) {
+                    direction = tokens[i].token;
                    i++;
                }

@ -221,11 +223,11 @@ function getExpression(tokens, parsingContext, level = 0) {
                    valueExtractor,
                    direction
                });
-            } while (tokens[i] === ',');
+            } while (i < tokens.length && tokens[i].token === ',');
        }

-        if (tokens[i] === 'limit') {
-            limit = parseInt(tokens[i + 1]);
+        if (i < tokens.length && tokens[i].token === 'limit') {
+            limit = parseInt(tokens[i + 1].token);
        }

        return new OrderByAndLimitExp(orderDefinitions, limit);
@ -241,16 +243,18 @@ function getExpression(tokens, parsingContext, level = 0) {
    }

    for (i = 0; i < tokens.length; i++) {
-        const token = tokens[i];
+        if (Array.isArray(tokens[i])) {
+            expressions.push(getExpression(tokens[i], parsingContext, level++));
+            continue;
+        }
+
+        const token = tokens[i].token;

        if (token === '#' || token === '~') {
            continue;
        }

-        if (Array.isArray(token)) {
-            expressions.push(getExpression(token, parsingContext, level++));
-        }
-        else if (token.startsWith('#') || token.startsWith('~')) {
+        if (token.startsWith('#') || token.startsWith('~')) {
            expressions.push(parseAttribute(token));
        }
        else if (['orderby', 'limit'].includes(token)) {
@ -273,7 +277,7 @@ function getExpression(tokens, parsingContext, level = 0) {
            i += 1;

            if (!Array.isArray(tokens[i])) {
-                parsingContext.addError(`not keyword should be followed by sub-expression in parenthesis, got ${tokens[i]} instead`);
+                parsingContext.addError(`not keyword should be followed by sub-expression in parenthesis, got ${tokens[i].token} instead`);
                continue;
            }

--- a/src/services/search/parsing_context.js
+++ b/src/services/search/parsing_context.js
@ -12,7 +12,6 @@ class ParsingContext {
        // we record only the first error, subsequent ones are usually consequence of the first
        if (!this.error) {
            this.error = error;
-            console.log(this.error);
        }
    }
 }