From b72dc977e63b37135fd1702ac04d129ecc95cd4e Mon Sep 17 00:00:00 2001 From: zadam Date: Tue, 19 May 2020 00:00:35 +0200 Subject: [PATCH] parens handler + parser in progress --- spec/lexer.spec.js | 26 ++++---- spec/parens.spec.js | 21 ++++++ src/services/search/expressions/and.js | 9 +++ src/services/search/expressions/equals.js | 3 +- src/services/search/expressions/or.js | 2 + src/services/search/parens.js | 43 ++++++++++++ src/services/search/parser.js | 81 +++++++++++++++++++++++ 7 files changed, 171 insertions(+), 14 deletions(-) create mode 100644 spec/parens.spec.js create mode 100644 src/services/search/parens.js create mode 100644 src/services/search/parser.js diff --git a/spec/lexer.spec.js b/spec/lexer.spec.js index e24235594..14f4314fb 100644 --- a/spec/lexer.spec.js +++ b/spec/lexer.spec.js @@ -1,61 +1,61 @@ -const lexerSpec = require('../src/services/search/lexer'); +const lexer = require('../src/services/search/lexer'); describe("Lexer fulltext", () => { it("simple lexing", () => { - expect(lexerSpec("hello world").fulltextTokens) + expect(lexer("hello world").fulltextTokens) .toEqual(["hello", "world"]); }); it("use quotes to keep words together", () => { - expect(lexerSpec("'hello world' my friend").fulltextTokens) + expect(lexer("'hello world' my friend").fulltextTokens) .toEqual(["hello world", "my", "friend"]); - expect(lexerSpec('"hello world" my friend').fulltextTokens) + expect(lexer('"hello world" my friend').fulltextTokens) .toEqual(["hello world", "my", "friend"]); - expect(lexerSpec('`hello world` my friend').fulltextTokens) + expect(lexer('`hello world` my friend').fulltextTokens) .toEqual(["hello world", "my", "friend"]); }); it("you can use different quotes and other special characters inside quotes", () => { - expect(lexerSpec("'I can use \" or ` or #@=*' without problem").fulltextTokens) + expect(lexer("'I can use \" or ` or #@=*' without problem").fulltextTokens) .toEqual(["I can use \" or ` or #@=*", "without", "problem"]); }); it("if quote is not ended then it's just one long token", () => { - expect(lexerSpec("'unfinished quote").fulltextTokens) + expect(lexer("'unfinished quote").fulltextTokens) .toEqual(["unfinished quote"]); }); it("parenthesis and symbols in fulltext section are just normal characters", () => { - expect(lexerSpec("what's u=p ").fulltextTokens) + expect(lexer("what's u=p ").fulltextTokens) .toEqual(["what's", "u=p", ""]); }); it("escaping special characters", () => { - expect(lexerSpec("hello \\#\\@\\'").fulltextTokens) + expect(lexer("hello \\#\\@\\'").fulltextTokens) .toEqual(["hello", "#@'"]); }); }); describe("Lexer expression", () => { it("simple attribute existence", () => { - expect(lexerSpec("#label @relation").expressionTokens) + expect(lexer("#label @relation").expressionTokens) .toEqual(["#label", "@relation"]); }); it("simple label operators", () => { - expect(lexerSpec("#label*=*text").expressionTokens) + expect(lexer("#label*=*text").expressionTokens) .toEqual(["#label", "*=*", "text"]); }); it("spaces in attribute names and values", () => { - expect(lexerSpec(`#'long label'="hello o' world" @'long relation'`).expressionTokens) + expect(lexer(`#'long label'="hello o' world" @'long relation'`).expressionTokens) .toEqual(["#long label", "=", "hello o' world", "@long relation"]); }); it("complex expressions with and, or and parenthesis", () => { - expect(lexerSpec(`# (#label=text OR #second=text) AND @relation`).expressionTokens) + expect(lexer(`# (#label=text OR #second=text) AND @relation`).expressionTokens) .toEqual(["#", "(", "#label", "=", "text", "OR", "#second", "=", "text", ")", "AND", "@relation"]); }); }); diff --git a/spec/parens.spec.js b/spec/parens.spec.js new file mode 100644 index 000000000..8c7db6d56 --- /dev/null +++ b/spec/parens.spec.js @@ -0,0 +1,21 @@ +const parens = require('../src/services/search/parens'); + +describe("Parens handler", () => { + it("handles parens", () => {console.log(parens(["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"])) + expect(parens(["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"])) + .toEqual([ + [ + "hello" + ], + "and", + [ + [ + "pick", + "one" + ], + "and", + "another" + ] + ]); + }); +}); diff --git a/src/services/search/expressions/and.js b/src/services/search/expressions/and.js index f542d416c..44e19c0af 100644 --- a/src/services/search/expressions/and.js +++ b/src/services/search/expressions/and.js @@ -5,6 +5,15 @@ class AndExp { this.subExpressions = subExpressions; } + static of(subExpressions) { + if (subExpressions.length === 1) { + return subExpressions[0]; + } + else { + return new AndExp(subExpressions); + } + } + execute(noteSet, searchContext) { for (const subExpression of this.subExpressions) { noteSet = subExpression.execute(noteSet, searchContext); diff --git a/src/services/search/expressions/equals.js b/src/services/search/expressions/equals.js index e5e04b3d8..ecae22241 100644 --- a/src/services/search/expressions/equals.js +++ b/src/services/search/expressions/equals.js @@ -4,9 +4,10 @@ const NoteSet = require('../note_set'); const noteCache = require('../../note_cache/note_cache'); class EqualsExp { - constructor(attributeType, attributeName, attributeValue) { + constructor(attributeType, attributeName, operator, attributeValue) { this.attributeType = attributeType; this.attributeName = attributeName; + this.operator = operator; this.attributeValue = attributeValue; } diff --git a/src/services/search/expressions/or.js b/src/services/search/expressions/or.js index c17dd2210..a48bb8bc8 100644 --- a/src/services/search/expressions/or.js +++ b/src/services/search/expressions/or.js @@ -1,5 +1,7 @@ "use strict"; +const NoteSet = require('../note_set'); + class OrExp { constructor(subExpressions) { this.subExpressions = subExpressions; diff --git a/src/services/search/parens.js b/src/services/search/parens.js new file mode 100644 index 000000000..7f402d2b5 --- /dev/null +++ b/src/services/search/parens.js @@ -0,0 +1,43 @@ +/** + * This will create a recursive object from list of tokens - tokens between parenthesis are grouped in a single array + */ +function parens(tokens) { + if (tokens.length === 0) { + throw new Error("Empty expression."); + } + + while (true) { + const leftIdx = tokens.findIndex(token => token === '('); + + if (leftIdx === -1) { + return tokens; + } + + let rightIdx; + let parensLevel = 0 + + for (rightIdx = leftIdx; rightIdx < tokens.length; rightIdx++) { + if (tokens[rightIdx] === ')') { + parensLevel--; + + if (parensLevel === 0) { + break; + } + } else if (tokens[rightIdx] === '(') { + parensLevel++; + } + } + + if (rightIdx >= tokens.length) { + throw new Error("Did not find matching right parenthesis."); + } + + tokens = [ + ...tokens.slice(0, leftIdx), + parens(tokens.slice(leftIdx + 1, rightIdx)), + ...tokens.slice(rightIdx + 1) + ]; + } +} + +module.exports = parens; diff --git a/src/services/search/parser.js b/src/services/search/parser.js new file mode 100644 index 000000000..8ad021f89 --- /dev/null +++ b/src/services/search/parser.js @@ -0,0 +1,81 @@ +const AndExp = require('./expressions/and'); +const OrExp = require('./expressions/or'); +const NotExp = require('./expressions/not'); +const ExistsExp = require('./expressions/exists'); +const EqualsExp = require('./expressions/equals'); +const NoteCacheFulltextExp = require('./expressions/note_cache_fulltext'); +const NoteContentFulltextExp = require('./expressions/note_content_fulltext'); + +function getFulltext(tokens, includingNoteContent) { + if (includingNoteContent) { + return [ + new OrExp([ + new NoteCacheFulltextExp(tokens), + new NoteContentFulltextExp(tokens) + ]) + ] + } + else { + return [ + new NoteCacheFulltextExp(tokens) + ] + } +} + +function isOperator(str) { + return str.matches(/^[=<>*]+$/); +} + +function getExpressions(tokens) { + const expressions = []; + let op = null; + + for (let i = 0; i < tokens.length; i++) { + const token = tokens[i]; + + if (token === '#' || token === '@') { + continue; + } + + if (Array.isArray(token)) { + expressions.push(getExpressions(token)); + } + else if (token.startsWith('#') || token.startsWith('@')) { + const type = token.startsWith('#') ? 'label' : 'relation'; + + if (i < tokens.length - 2 && isOperator(tokens[i + 1])) { + expressions.push(new EqualsExp(type, token.substr(1), tokens[i + 1], tokens[i + 2])); + + i += 2; + } + else { + expressions.push(new ExistsExp(type, token.substr(1))); + } + } + else if (['and', 'or'].includes(token.toLowerCase())) { + if (!op) { + op = token.toLowerCase(); + } + else if (op !== token.toLowerCase()) { + throw new Error('Mixed usage of AND/OR - always use parenthesis to group AND/OR expressions.'); + } + } + else if (isOperator(token)) { + throw new Error(`Misplaced or incomplete expression "${token}"`); + } + else { + throw new Error(`Unrecognized expression "${token}"`); + } + + if (!op && expressions.length > 1) { + op = 'and'; + } + } +} + +function parse(fulltextTokens, expressionTokens, includingNoteContent) { + return AndExp.of([ + ...getFulltext(fulltextTokens, includingNoteContent), + ...getExpressions(expressionTokens) + ]); +}