parens handler + parser in progress

This commit is contained in:
zadam 2020-05-19 00:00:35 +02:00
parent 81bf84f2de
commit b72dc977e6
7 changed files with 171 additions and 14 deletions

View File

@ -1,61 +1,61 @@
const lexerSpec = require('../src/services/search/lexer'); const lexer = require('../src/services/search/lexer');
describe("Lexer fulltext", () => { describe("Lexer fulltext", () => {
it("simple lexing", () => { it("simple lexing", () => {
expect(lexerSpec("hello world").fulltextTokens) expect(lexer("hello world").fulltextTokens)
.toEqual(["hello", "world"]); .toEqual(["hello", "world"]);
}); });
it("use quotes to keep words together", () => { it("use quotes to keep words together", () => {
expect(lexerSpec("'hello world' my friend").fulltextTokens) expect(lexer("'hello world' my friend").fulltextTokens)
.toEqual(["hello world", "my", "friend"]); .toEqual(["hello world", "my", "friend"]);
expect(lexerSpec('"hello world" my friend').fulltextTokens) expect(lexer('"hello world" my friend').fulltextTokens)
.toEqual(["hello world", "my", "friend"]); .toEqual(["hello world", "my", "friend"]);
expect(lexerSpec('`hello world` my friend').fulltextTokens) expect(lexer('`hello world` my friend').fulltextTokens)
.toEqual(["hello world", "my", "friend"]); .toEqual(["hello world", "my", "friend"]);
}); });
it("you can use different quotes and other special characters inside quotes", () => { it("you can use different quotes and other special characters inside quotes", () => {
expect(lexerSpec("'I can use \" or ` or #@=*' without problem").fulltextTokens) expect(lexer("'I can use \" or ` or #@=*' without problem").fulltextTokens)
.toEqual(["I can use \" or ` or #@=*", "without", "problem"]); .toEqual(["I can use \" or ` or #@=*", "without", "problem"]);
}); });
it("if quote is not ended then it's just one long token", () => { it("if quote is not ended then it's just one long token", () => {
expect(lexerSpec("'unfinished quote").fulltextTokens) expect(lexer("'unfinished quote").fulltextTokens)
.toEqual(["unfinished quote"]); .toEqual(["unfinished quote"]);
}); });
it("parenthesis and symbols in fulltext section are just normal characters", () => { it("parenthesis and symbols in fulltext section are just normal characters", () => {
expect(lexerSpec("what's u=p <b(r*t)h>").fulltextTokens) expect(lexer("what's u=p <b(r*t)h>").fulltextTokens)
.toEqual(["what's", "u=p", "<b(r*t)h>"]); .toEqual(["what's", "u=p", "<b(r*t)h>"]);
}); });
it("escaping special characters", () => { it("escaping special characters", () => {
expect(lexerSpec("hello \\#\\@\\'").fulltextTokens) expect(lexer("hello \\#\\@\\'").fulltextTokens)
.toEqual(["hello", "#@'"]); .toEqual(["hello", "#@'"]);
}); });
}); });
describe("Lexer expression", () => { describe("Lexer expression", () => {
it("simple attribute existence", () => { it("simple attribute existence", () => {
expect(lexerSpec("#label @relation").expressionTokens) expect(lexer("#label @relation").expressionTokens)
.toEqual(["#label", "@relation"]); .toEqual(["#label", "@relation"]);
}); });
it("simple label operators", () => { it("simple label operators", () => {
expect(lexerSpec("#label*=*text").expressionTokens) expect(lexer("#label*=*text").expressionTokens)
.toEqual(["#label", "*=*", "text"]); .toEqual(["#label", "*=*", "text"]);
}); });
it("spaces in attribute names and values", () => { it("spaces in attribute names and values", () => {
expect(lexerSpec(`#'long label'="hello o' world" @'long relation'`).expressionTokens) expect(lexer(`#'long label'="hello o' world" @'long relation'`).expressionTokens)
.toEqual(["#long label", "=", "hello o' world", "@long relation"]); .toEqual(["#long label", "=", "hello o' world", "@long relation"]);
}); });
it("complex expressions with and, or and parenthesis", () => { it("complex expressions with and, or and parenthesis", () => {
expect(lexerSpec(`# (#label=text OR #second=text) AND @relation`).expressionTokens) expect(lexer(`# (#label=text OR #second=text) AND @relation`).expressionTokens)
.toEqual(["#", "(", "#label", "=", "text", "OR", "#second", "=", "text", ")", "AND", "@relation"]); .toEqual(["#", "(", "#label", "=", "text", "OR", "#second", "=", "text", ")", "AND", "@relation"]);
}); });
}); });

21
spec/parens.spec.js Normal file
View File

@ -0,0 +1,21 @@
const parens = require('../src/services/search/parens');
describe("Parens handler", () => {
it("handles parens", () => {console.log(parens(["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]))
expect(parens(["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]))
.toEqual([
[
"hello"
],
"and",
[
[
"pick",
"one"
],
"and",
"another"
]
]);
});
});

View File

@ -5,6 +5,15 @@ class AndExp {
this.subExpressions = subExpressions; this.subExpressions = subExpressions;
} }
static of(subExpressions) {
if (subExpressions.length === 1) {
return subExpressions[0];
}
else {
return new AndExp(subExpressions);
}
}
execute(noteSet, searchContext) { execute(noteSet, searchContext) {
for (const subExpression of this.subExpressions) { for (const subExpression of this.subExpressions) {
noteSet = subExpression.execute(noteSet, searchContext); noteSet = subExpression.execute(noteSet, searchContext);

View File

@ -4,9 +4,10 @@ const NoteSet = require('../note_set');
const noteCache = require('../../note_cache/note_cache'); const noteCache = require('../../note_cache/note_cache');
class EqualsExp { class EqualsExp {
constructor(attributeType, attributeName, attributeValue) { constructor(attributeType, attributeName, operator, attributeValue) {
this.attributeType = attributeType; this.attributeType = attributeType;
this.attributeName = attributeName; this.attributeName = attributeName;
this.operator = operator;
this.attributeValue = attributeValue; this.attributeValue = attributeValue;
} }

View File

@ -1,5 +1,7 @@
"use strict"; "use strict";
const NoteSet = require('../note_set');
class OrExp { class OrExp {
constructor(subExpressions) { constructor(subExpressions) {
this.subExpressions = subExpressions; this.subExpressions = subExpressions;

View File

@ -0,0 +1,43 @@
/**
* This will create a recursive object from list of tokens - tokens between parenthesis are grouped in a single array
*/
function parens(tokens) {
if (tokens.length === 0) {
throw new Error("Empty expression.");
}
while (true) {
const leftIdx = tokens.findIndex(token => token === '(');
if (leftIdx === -1) {
return tokens;
}
let rightIdx;
let parensLevel = 0
for (rightIdx = leftIdx; rightIdx < tokens.length; rightIdx++) {
if (tokens[rightIdx] === ')') {
parensLevel--;
if (parensLevel === 0) {
break;
}
} else if (tokens[rightIdx] === '(') {
parensLevel++;
}
}
if (rightIdx >= tokens.length) {
throw new Error("Did not find matching right parenthesis.");
}
tokens = [
...tokens.slice(0, leftIdx),
parens(tokens.slice(leftIdx + 1, rightIdx)),
...tokens.slice(rightIdx + 1)
];
}
}
module.exports = parens;

View File

@ -0,0 +1,81 @@
const AndExp = require('./expressions/and');
const OrExp = require('./expressions/or');
const NotExp = require('./expressions/not');
const ExistsExp = require('./expressions/exists');
const EqualsExp = require('./expressions/equals');
const NoteCacheFulltextExp = require('./expressions/note_cache_fulltext');
const NoteContentFulltextExp = require('./expressions/note_content_fulltext');
function getFulltext(tokens, includingNoteContent) {
if (includingNoteContent) {
return [
new OrExp([
new NoteCacheFulltextExp(tokens),
new NoteContentFulltextExp(tokens)
])
]
}
else {
return [
new NoteCacheFulltextExp(tokens)
]
}
}
function isOperator(str) {
return str.matches(/^[=<>*]+$/);
}
function getExpressions(tokens) {
const expressions = [];
let op = null;
for (let i = 0; i < tokens.length; i++) {
const token = tokens[i];
if (token === '#' || token === '@') {
continue;
}
if (Array.isArray(token)) {
expressions.push(getExpressions(token));
}
else if (token.startsWith('#') || token.startsWith('@')) {
const type = token.startsWith('#') ? 'label' : 'relation';
if (i < tokens.length - 2 && isOperator(tokens[i + 1])) {
expressions.push(new EqualsExp(type, token.substr(1), tokens[i + 1], tokens[i + 2]));
i += 2;
}
else {
expressions.push(new ExistsExp(type, token.substr(1)));
}
}
else if (['and', 'or'].includes(token.toLowerCase())) {
if (!op) {
op = token.toLowerCase();
}
else if (op !== token.toLowerCase()) {
throw new Error('Mixed usage of AND/OR - always use parenthesis to group AND/OR expressions.');
}
}
else if (isOperator(token)) {
throw new Error(`Misplaced or incomplete expression "${token}"`);
}
else {
throw new Error(`Unrecognized expression "${token}"`);
}
if (!op && expressions.length > 1) {
op = 'and';
}
}
}
function parse(fulltextTokens, expressionTokens, includingNoteContent) {
return AndExp.of([
...getFulltext(fulltextTokens, includingNoteContent),
...getExpressions(expressionTokens)
]);
}