lexer now marks "in quotes" tokens

2025-06-06 18:08:33 +02:00 · 2020-07-19 23:19:45 +02:00 · 2020-07-19 23:19:45 +02:00 · 35469f6f2d
commit 35469f6f2d
parent 4c7b1d6543
9 changed files with 161 additions and 85 deletions
--- a/spec/search/lexer.spec.js
+++ b/spec/search/lexer.spec.js
@ -2,75 +2,83 @@ const lexer = require('../../src/services/search/lexer.js');
 describe("Lexer fulltext", () => {
    it("simple lexing", () => {
-        expect(lexer("hello world").fulltextTokens)
+        expect(lexer("hello world").fulltextTokens.map(t => t.token))
            .toEqual(["hello", "world"]);
    });
    it("use quotes to keep words together", () => {
-        expect(lexer("'hello world' my friend").fulltextTokens)
+        expect(lexer("'hello world' my friend").fulltextTokens.map(t => t.token))
            .toEqual(["hello world", "my", "friend"]);
-        expect(lexer('"hello world" my friend').fulltextTokens)
+        expect(lexer('"hello world" my friend').fulltextTokens.map(t => t.token))
            .toEqual(["hello world", "my", "friend"]);
-        expect(lexer('`hello world` my friend').fulltextTokens)
+        expect(lexer('`hello world` my friend').fulltextTokens.map(t => t.token))
            .toEqual(["hello world", "my", "friend"]);
    });
    it("you can use different quotes and other special characters inside quotes", () => {
-        expect(lexer("'i can use \" or ` or #~=*' without problem").fulltextTokens)
+        expect(lexer("'i can use \" or ` or #~=*' without problem").fulltextTokens.map(t => t.token))
            .toEqual(["i can use \" or ` or #~=*", "without", "problem"]);
    });
    it("if quote is not ended then it's just one long token", () => {
-        expect(lexer("'unfinished quote").fulltextTokens)
+        expect(lexer("'unfinished quote").fulltextTokens.map(t => t.token))
            .toEqual(["unfinished quote"]);
    });
    it("parenthesis and symbols in fulltext section are just normal characters", () => {
-        expect(lexer("what's u=p <b(r*t)h>").fulltextTokens)
+        expect(lexer("what's u=p <b(r*t)h>").fulltextTokens.map(t => t.token))
            .toEqual(["what's", "u=p", "<b(r*t)h>"]);
    });
    it("escaping special characters", () => {
-        expect(lexer("hello \\#\\~\\'").fulltextTokens)
+        expect(lexer("hello \\#\\~\\'").fulltextTokens.map(t => t.token))
            .toEqual(["hello", "#~'"]);
    });
 });
 describe("Lexer expression", () => {
    it("simple attribute existence", () => {
-        expect(lexer("#label ~relation").expressionTokens)
+        expect(lexer("#label ~relation").expressionTokens.map(t => t.token))
            .toEqual(["#label", "~relation"]);
    });
    it("simple label operators", () => {
-        expect(lexer("#label*=*text").expressionTokens)
+        expect(lexer("#label*=*text").expressionTokens.map(t => t.token))
            .toEqual(["#label", "*=*", "text"]);
    });
    it("spaces in attribute names and values", () => {
        expect(lexer(`#'long label'="hello o' world" ~'long relation'`).expressionTokens)
            .toEqual(["#long label", "=", "hello o' world", "~long relation"]);
    });
    it("complex expressions with and, or and parenthesis", () => {
-        expect(lexer(`# (#label=text OR #second=text) AND ~relation`).expressionTokens)
+        expect(lexer(`# (#label=text OR #second=text) AND ~relation`).expressionTokens.map(t => t.token))
            .toEqual(["#", "(", "#label", "=", "text", "or", "#second", "=", "text", ")", "and", "~relation"]);
    });
    it("dot separated properties", () => {
-        expect(lexer(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens)
+        expect(lexer(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens.map(t => t.token))
            .toEqual(["#", "~author", ".", "title", "=", "hugh howey", "and", "note", ".", "book title", "=", "silo"]);
    });
    it("negation of label and relation", () => {
-        expect(lexer(`#!capital ~!neighbor`).expressionTokens)
+        expect(lexer(`#!capital ~!neighbor`).expressionTokens.map(t => t.token))
            .toEqual(["#!capital", "~!neighbor"]);
    });
    it("negation of sub-expression", () => {
-        expect(lexer(`# not(#capital) and note.noteId != "root"`).expressionTokens)
+        expect(lexer(`# not(#capital) and note.noteId != "root"`).expressionTokens.map(t => t.token))
            .toEqual(["#", "not", "(", "#capital", ")", "and", "note", ".", "noteid", "!=", "root"]);
    });
 });
 describe("Lexer invalid queries and edge cases", () => {
    it("concatenated attributes", () => {
        expect(lexer("#label~relation").expressionTokens.map(t => t.token))
            .toEqual(["#label", "~relation"]);
    });
    it("spaces in attribute names and values", () => {
        // invalid but should be reported by parser as an error
        expect(lexer(`#'long label'="hello o' world" ~'long relation'`).expressionTokens.map(t => t.token))
            .toEqual(["#long label", "=", "hello o' world", "~long relation"]);
    });
 });
--- a/spec/search/parens.spec.js
+++ b/spec/search/parens.spec.js
@ -2,19 +2,22 @@ const parens = require('../../src/services/search/parens.js');
 describe("Parens handler", () => {
    it("handles parens", () => {
-        expect(parens(["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]))
+        const input = ["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]
            .map(token => ({token}));
        expect(parens(input))
            .toEqual([
                [
-                    "hello"
+                    {token: "hello"}
                ],
-                "and",
+                {token: "and"},
                [
                    [
-                        "pick",
+                        {token: "pick"},
-                        "one"
+                        {token: "one"}
                    ],
-                    "and",
+                    {token: "and"},
-                    "another"
+                    {token: "another"}
                ]
            ]);
    });
--- a/spec/search/parser.spec.js
+++ b/spec/search/parser.spec.js
@ -1,10 +1,24 @@
 const ParsingContext = require("../../src/services/search/parsing_context.js");
 const parser = require('../../src/services/search/parser.js');
 function tokens(...args) {
    return args.map(arg => {
        if (Array.isArray(arg)) {
            return arg;
        }
        else {
            return {
                token: arg,
                inQuotes: false
            };
        }
    });
 }
 describe("Parser", () => {
    it("fulltext parser without content", () => {
        const rootExp = parser({
-            fulltextTokens: ["hello", "hi"],
+            fulltextTokens: tokens("hello", "hi"),
            expressionTokens: [],
            parsingContext: new ParsingContext({includeNoteContent: false})
        });
@ -15,7 +29,7 @@ describe("Parser", () => {
    it("fulltext parser with content", () => {
        const rootExp = parser({
-            fulltextTokens: ["hello", "hi"],
+            fulltextTokens: tokens("hello", "hi"),
            expressionTokens: [],
            parsingContext: new ParsingContext({includeNoteContent: true})
        });
@ -36,7 +50,7 @@ describe("Parser", () => {
    it("simple label comparison", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#mylabel", "=", "text"],
+            expressionTokens: tokens("#mylabel", "=", "text"),
            parsingContext: new ParsingContext()
        });
@ -49,7 +63,7 @@ describe("Parser", () => {
    it("simple attribute negation", () => {
        let rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#!mylabel"],
+            expressionTokens: tokens("#!mylabel"),
            parsingContext: new ParsingContext()
        });
@ -60,7 +74,7 @@ describe("Parser", () => {
        rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["~!myrelation"],
+            expressionTokens: tokens("~!myrelation"),
            parsingContext: new ParsingContext()
        });
@ -73,7 +87,7 @@ describe("Parser", () => {
    it("simple label AND", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#first", "=", "text", "and", "#second", "=", "text"],
+            expressionTokens: tokens("#first", "=", "text", "and", "#second", "=", "text"),
            parsingContext: new ParsingContext(true)
        });
@ -90,7 +104,7 @@ describe("Parser", () => {
    it("simple label AND without explicit AND", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#first", "=", "text", "#second", "=", "text"],
+            expressionTokens: tokens("#first", "=", "text", "#second", "=", "text"),
            parsingContext: new ParsingContext()
        });
@ -107,7 +121,7 @@ describe("Parser", () => {
    it("simple label OR", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#first", "=", "text", "or", "#second", "=", "text"],
+            expressionTokens: tokens("#first", "=", "text", "or", "#second", "=", "text"),
            parsingContext: new ParsingContext()
        });
@ -123,8 +137,8 @@ describe("Parser", () => {
    it("fulltext and simple label", () => {
        const rootExp = parser({
-            fulltextTokens: ["hello"],
+            fulltextTokens: tokens("hello"),
-            expressionTokens: ["#mylabel", "=", "text"],
+            expressionTokens: tokens("#mylabel", "=", "text"),
            parsingContext: new ParsingContext()
        });
@ -141,7 +155,7 @@ describe("Parser", () => {
    it("label sub-expression", () => {
        const rootExp = parser({
            fulltextTokens: [],
-            expressionTokens: ["#first", "=", "text", "or", ["#second", "=", "text", "and", "#third", "=", "text"]],
+            expressionTokens: tokens("#first", "=", "text", "or", tokens("#second", "=", "text", "and", "#third", "=", "text")),
            parsingContext: new ParsingContext()
        });
@ -161,3 +175,17 @@ describe("Parser", () => {
        expect(secondSubSub.attributeName).toEqual("third");
    });
 });
 describe("Invalid tokens", () => {
    it("incomplete comparison", () => {
        const parsingContext = new ParsingContext();
        parser({
            fulltextTokens: [],
            expressionTokens: tokens("#first", "="),
            parsingContext
        });
        expect(parsingContext.error).toEqual('Misplaced or incomplete expression "="')
    });
 });
--- a/spec/search/search.spec.js
+++ b/spec/search/search.spec.js
@ -529,4 +529,26 @@ describe("Search", () => {
    });
    // FIXME: test what happens when we order without any filter criteria
    // it("comparison between labels", async () => {
    //     rootNote
    //         .child(note("Europe")
    //             .child(note("Austria")
    //                 .label('capital', 'Vienna')
    //                 .label('largestCity', 'Vienna'))
    //             .child(note("Canada")
    //                 .label('capital', 'Ottawa')
    //                 .label('largestCity', 'Toronto'))
    //             .child(note("Czech Republic")
    //                 .label('capital', 'Prague')
    //                 .label('largestCity', 'Prague'))
    //         );
    //
    //     const parsingContext = new ParsingContext();
    //
    //     const searchResults = await searchService.findNotesWithQuery('#capital = #largestCity', parsingContext);
    //     expect(searchResults.length).toEqual(2);
    //     expect(findNoteByTitle(searchResults, "Czech Republic")).toBeTruthy();
    //     expect(findNoteByTitle(searchResults, "Austria")).toBeTruthy();
    // })
 });
--- a/src/services/search/expressions/property_comparison.js
+++ b/src/services/search/expressions/property_comparison.js
@ -51,6 +51,7 @@ class PropertyComparisonExp extends Expression {
            if (value) {
                value = value.toLowerCase();
            }
            if (this.comparator(value)) {
                resNoteSet.add(note);
            }
--- a/src/services/search/lexer.js
+++ b/src/services/search/lexer.js
@ -26,10 +26,15 @@ function lexer(str) {
            return;
        }
        const rec = {
            token: currentWord,
            inQuotes: quotes
        };
        if (fulltextEnded) {
-            expressionTokens.push(currentWord);
+            expressionTokens.push(rec);
        } else {
-            fulltextTokens.push(currentWord);
+            fulltextTokens.push(rec);
        }
        currentWord = '';
@ -77,8 +82,14 @@ function lexer(str) {
            continue;
        }
        else if (!quotes) {
-            if (currentWord.length === 0 && (chr === '#' || chr === '~')) {
+            if (chr === '#' || chr === '~') {
-                fulltextEnded = true;
+                if (!fulltextEnded) {
                    fulltextEnded = true;
                }
                else {
                    finishWord();
                }
                currentWord = chr;
                continue;
--- a/src/services/search/parens.js
+++ b/src/services/search/parens.js
@ -7,7 +7,7 @@ function parens(tokens) {
    }
    while (true) {
-        const leftIdx = tokens.findIndex(token => token === '(');
+        const leftIdx = tokens.findIndex(token => token.token === '(');
        if (leftIdx === -1) {
            return tokens;
@ -17,13 +17,13 @@ function parens(tokens) {
        let parensLevel = 0
        for (rightIdx = leftIdx; rightIdx < tokens.length; rightIdx++) {
-            if (tokens[rightIdx] === ')') {
+            if (tokens[rightIdx].token === ')') {
                parensLevel--;
                if (parensLevel === 0) {
                    break;
                }
-            } else if (tokens[rightIdx] === '(') {
+            } else if (tokens[rightIdx].token === '(') {
                parensLevel++;
            }
        }
--- a/src/services/search/parser.js
+++ b/src/services/search/parser.js
@ -18,6 +18,8 @@ const comparatorBuilder = require('./comparator_builder');
 const ValueExtractor = require('./value_extractor');
 function getFulltext(tokens, parsingContext) {
    tokens = tokens.map(t => t.token);
    parsingContext.highlightedTokens.push(...tokens);
    if (tokens.length === 0) {
@ -50,75 +52,75 @@ function getExpression(tokens, parsingContext, level = 0) {
    let i;
    function parseNoteProperty() {
-        if (tokens[i] !== '.') {
+        if (tokens[i].token !== '.') {
            parsingContext.addError('Expected "." to separate field path');
            return;
        }
        i++;
-        if (tokens[i] === 'content') {
+        if (tokens[i].token === 'content') {
            i += 1;
-            const operator = tokens[i];
+            const operator = tokens[i].token;
            if (!isOperator(operator)) {
-                parsingContext.addError(`After content expected operator, but got "${tokens[i]}"`);
+                parsingContext.addError(`After content expected operator, but got "${tokens[i].token}"`);
                return;
            }
            i++;
            return new OrExp([
-                new NoteContentUnprotectedFulltextExp(operator, [tokens[i]]),
+                new NoteContentUnprotectedFulltextExp(operator, [tokens[i].token]),
-                new NoteContentProtectedFulltextExp(operator, [tokens[i]])
+                new NoteContentProtectedFulltextExp(operator, [tokens[i].token])
            ]);
        }
-        if (tokens[i] === 'parents') {
+        if (tokens[i].token === 'parents') {
            i += 1;
            return new ChildOfExp(parseNoteProperty());
        }
-        if (tokens[i] === 'children') {
+        if (tokens[i].token === 'children') {
            i += 1;
            return new ParentOfExp(parseNoteProperty());
        }
-        if (tokens[i] === 'ancestors') {
+        if (tokens[i].token === 'ancestors') {
            i += 1;
            return new DescendantOfExp(parseNoteProperty());
        }
-        if (tokens[i] === 'labels') {
+        if (tokens[i].token === 'labels') {
-            if (tokens[i + 1] !== '.') {
+            if (tokens[i + 1].token !== '.') {
-                parsingContext.addError(`Expected "." to separate field path, god "${tokens[i + 1]}"`);
+                parsingContext.addError(`Expected "." to separate field path, got "${tokens[i + 1].token}"`);
                return;
            }
            i += 2;
-            return parseLabel(tokens[i]);
+            return parseLabel(tokens[i].token);
        }
-        if (tokens[i] === 'relations') {
+        if (tokens[i].token === 'relations') {
-            if (tokens[i + 1] !== '.') {
+            if (tokens[i + 1].token !== '.') {
-                parsingContext.addError(`Expected "." to separate field path, god "${tokens[i + 1]}"`);
+                parsingContext.addError(`Expected "." to separate field path, got "${tokens[i + 1].token}"`);
                return;
            }
            i += 2;
-            return parseRelation(tokens[i]);
+            return parseRelation(tokens[i].token);
        }
-        if (PropertyComparisonExp.isProperty(tokens[i])) {
+        if (PropertyComparisonExp.isProperty(tokens[i].token)) {
-            const propertyName = tokens[i];
+            const propertyName = tokens[i].token;
-            const operator = tokens[i + 1];
+            const operator = tokens[i + 1].token;
-            const comparedValue = tokens[i + 2];
+            const comparedValue = tokens[i + 2].token;
            const comparator = comparatorBuilder(operator, comparedValue);
            if (!comparator) {
@ -131,7 +133,7 @@ function getExpression(tokens, parsingContext, level = 0) {
            return new PropertyComparisonExp(propertyName, comparator);
        }
-        parsingContext.addError(`Unrecognized note property "${tokens[i]}"`);
+        parsingContext.addError(`Unrecognized note property "${tokens[i].token}"`);
    }
    function parseAttribute(name) {
@ -153,9 +155,9 @@ function getExpression(tokens, parsingContext, level = 0) {
    function parseLabel(labelName) {
        parsingContext.highlightedTokens.push(labelName);
-        if (i < tokens.length - 2 && isOperator(tokens[i + 1])) {
+        if (i < tokens.length - 2 && isOperator(tokens[i + 1].token)) {
-            let operator = tokens[i + 1];
+            let operator = tokens[i + 1].token;
-            const comparedValue = tokens[i + 2];
+            const comparedValue = tokens[i + 2].token;
            parsingContext.highlightedTokens.push(comparedValue);
@ -180,7 +182,7 @@ function getExpression(tokens, parsingContext, level = 0) {
    function parseRelation(relationName) {
        parsingContext.highlightedTokens.push(relationName);
-        if (i < tokens.length - 2 && tokens[i + 1] === '.') {
+        if (i < tokens.length - 2 && tokens[i + 1].token === '.') {
            i += 1;
            return new RelationWhereExp(relationName, parseNoteProperty());
@ -193,7 +195,7 @@ function getExpression(tokens, parsingContext, level = 0) {
        const orderDefinitions = [];
        let limit;
-        if (tokens[i] === 'orderby') {
+        if (tokens[i].token === 'orderby') {
            do {
                const propertyPath = [];
                let direction = "asc";
@ -201,13 +203,13 @@ function getExpression(tokens, parsingContext, level = 0) {
                do {
                    i++;
-                    propertyPath.push(tokens[i]);
+                    propertyPath.push(tokens[i].token);
                    i++;
-                } while (tokens[i] === '.');
+                } while (i < tokens.length && tokens[i].token === '.');
-                if (["asc", "desc"].includes(tokens[i])) {
+                if (i < tokens.length && ["asc", "desc"].includes(tokens[i].token)) {
-                    direction = tokens[i];
+                    direction = tokens[i].token;
                    i++;
                }
@ -221,11 +223,11 @@ function getExpression(tokens, parsingContext, level = 0) {
                    valueExtractor,
                    direction
                });
-            } while (tokens[i] === ',');
+            } while (i < tokens.length && tokens[i].token === ',');
        }
-        if (tokens[i] === 'limit') {
+        if (i < tokens.length && tokens[i].token === 'limit') {
-            limit = parseInt(tokens[i + 1]);
+            limit = parseInt(tokens[i + 1].token);
        }
        return new OrderByAndLimitExp(orderDefinitions, limit);
@ -241,16 +243,18 @@ function getExpression(tokens, parsingContext, level = 0) {
    }
    for (i = 0; i < tokens.length; i++) {
-        const token = tokens[i];
+        if (Array.isArray(tokens[i])) {
            expressions.push(getExpression(tokens[i], parsingContext, level++));
            continue;
        }
        const token = tokens[i].token;
        if (token === '#' || token === '~') {
            continue;
        }
-        if (Array.isArray(token)) {
+        if (token.startsWith('#') || token.startsWith('~')) {
            expressions.push(getExpression(token, parsingContext, level++));
        }
        else if (token.startsWith('#') || token.startsWith('~')) {
            expressions.push(parseAttribute(token));
        }
        else if (['orderby', 'limit'].includes(token)) {
@ -273,7 +277,7 @@ function getExpression(tokens, parsingContext, level = 0) {
            i += 1;
            if (!Array.isArray(tokens[i])) {
-                parsingContext.addError(`not keyword should be followed by sub-expression in parenthesis, got ${tokens[i]} instead`);
+                parsingContext.addError(`not keyword should be followed by sub-expression in parenthesis, got ${tokens[i].token} instead`);
                continue;
            }
--- a/src/services/search/parsing_context.js
+++ b/src/services/search/parsing_context.js
@ -12,7 +12,6 @@ class ParsingContext {
        // we record only the first error, subsequent ones are usually consequence of the first
        if (!this.error) {
            this.error = error;
            console.log(this.error);
        }
    }
 }