lexer now marks "in quotes" tokens

This commit is contained in:
zadam 2020-07-19 23:19:45 +02:00
parent 4c7b1d6543
commit 35469f6f2d
9 changed files with 161 additions and 85 deletions

View File

@ -2,75 +2,83 @@ const lexer = require('../../src/services/search/lexer.js');
describe("Lexer fulltext", () => { describe("Lexer fulltext", () => {
it("simple lexing", () => { it("simple lexing", () => {
expect(lexer("hello world").fulltextTokens) expect(lexer("hello world").fulltextTokens.map(t => t.token))
.toEqual(["hello", "world"]); .toEqual(["hello", "world"]);
}); });
it("use quotes to keep words together", () => { it("use quotes to keep words together", () => {
expect(lexer("'hello world' my friend").fulltextTokens) expect(lexer("'hello world' my friend").fulltextTokens.map(t => t.token))
.toEqual(["hello world", "my", "friend"]); .toEqual(["hello world", "my", "friend"]);
expect(lexer('"hello world" my friend').fulltextTokens) expect(lexer('"hello world" my friend').fulltextTokens.map(t => t.token))
.toEqual(["hello world", "my", "friend"]); .toEqual(["hello world", "my", "friend"]);
expect(lexer('`hello world` my friend').fulltextTokens) expect(lexer('`hello world` my friend').fulltextTokens.map(t => t.token))
.toEqual(["hello world", "my", "friend"]); .toEqual(["hello world", "my", "friend"]);
}); });
it("you can use different quotes and other special characters inside quotes", () => { it("you can use different quotes and other special characters inside quotes", () => {
expect(lexer("'i can use \" or ` or #~=*' without problem").fulltextTokens) expect(lexer("'i can use \" or ` or #~=*' without problem").fulltextTokens.map(t => t.token))
.toEqual(["i can use \" or ` or #~=*", "without", "problem"]); .toEqual(["i can use \" or ` or #~=*", "without", "problem"]);
}); });
it("if quote is not ended then it's just one long token", () => { it("if quote is not ended then it's just one long token", () => {
expect(lexer("'unfinished quote").fulltextTokens) expect(lexer("'unfinished quote").fulltextTokens.map(t => t.token))
.toEqual(["unfinished quote"]); .toEqual(["unfinished quote"]);
}); });
it("parenthesis and symbols in fulltext section are just normal characters", () => { it("parenthesis and symbols in fulltext section are just normal characters", () => {
expect(lexer("what's u=p <b(r*t)h>").fulltextTokens) expect(lexer("what's u=p <b(r*t)h>").fulltextTokens.map(t => t.token))
.toEqual(["what's", "u=p", "<b(r*t)h>"]); .toEqual(["what's", "u=p", "<b(r*t)h>"]);
}); });
it("escaping special characters", () => { it("escaping special characters", () => {
expect(lexer("hello \\#\\~\\'").fulltextTokens) expect(lexer("hello \\#\\~\\'").fulltextTokens.map(t => t.token))
.toEqual(["hello", "#~'"]); .toEqual(["hello", "#~'"]);
}); });
}); });
describe("Lexer expression", () => { describe("Lexer expression", () => {
it("simple attribute existence", () => { it("simple attribute existence", () => {
expect(lexer("#label ~relation").expressionTokens) expect(lexer("#label ~relation").expressionTokens.map(t => t.token))
.toEqual(["#label", "~relation"]); .toEqual(["#label", "~relation"]);
}); });
it("simple label operators", () => { it("simple label operators", () => {
expect(lexer("#label*=*text").expressionTokens) expect(lexer("#label*=*text").expressionTokens.map(t => t.token))
.toEqual(["#label", "*=*", "text"]); .toEqual(["#label", "*=*", "text"]);
}); });
it("spaces in attribute names and values", () => {
expect(lexer(`#'long label'="hello o' world" ~'long relation'`).expressionTokens)
.toEqual(["#long label", "=", "hello o' world", "~long relation"]);
});
it("complex expressions with and, or and parenthesis", () => { it("complex expressions with and, or and parenthesis", () => {
expect(lexer(`# (#label=text OR #second=text) AND ~relation`).expressionTokens) expect(lexer(`# (#label=text OR #second=text) AND ~relation`).expressionTokens.map(t => t.token))
.toEqual(["#", "(", "#label", "=", "text", "or", "#second", "=", "text", ")", "and", "~relation"]); .toEqual(["#", "(", "#label", "=", "text", "or", "#second", "=", "text", ")", "and", "~relation"]);
}); });
it("dot separated properties", () => { it("dot separated properties", () => {
expect(lexer(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens) expect(lexer(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens.map(t => t.token))
.toEqual(["#", "~author", ".", "title", "=", "hugh howey", "and", "note", ".", "book title", "=", "silo"]); .toEqual(["#", "~author", ".", "title", "=", "hugh howey", "and", "note", ".", "book title", "=", "silo"]);
}); });
it("negation of label and relation", () => { it("negation of label and relation", () => {
expect(lexer(`#!capital ~!neighbor`).expressionTokens) expect(lexer(`#!capital ~!neighbor`).expressionTokens.map(t => t.token))
.toEqual(["#!capital", "~!neighbor"]); .toEqual(["#!capital", "~!neighbor"]);
}); });
it("negation of sub-expression", () => { it("negation of sub-expression", () => {
expect(lexer(`# not(#capital) and note.noteId != "root"`).expressionTokens) expect(lexer(`# not(#capital) and note.noteId != "root"`).expressionTokens.map(t => t.token))
.toEqual(["#", "not", "(", "#capital", ")", "and", "note", ".", "noteid", "!=", "root"]); .toEqual(["#", "not", "(", "#capital", ")", "and", "note", ".", "noteid", "!=", "root"]);
}); });
}); });
describe("Lexer invalid queries and edge cases", () => {
it("concatenated attributes", () => {
expect(lexer("#label~relation").expressionTokens.map(t => t.token))
.toEqual(["#label", "~relation"]);
});
it("spaces in attribute names and values", () => {
// invalid but should be reported by parser as an error
expect(lexer(`#'long label'="hello o' world" ~'long relation'`).expressionTokens.map(t => t.token))
.toEqual(["#long label", "=", "hello o' world", "~long relation"]);
});
});

View File

@ -2,19 +2,22 @@ const parens = require('../../src/services/search/parens.js');
describe("Parens handler", () => { describe("Parens handler", () => {
it("handles parens", () => { it("handles parens", () => {
expect(parens(["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"])) const input = ["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]
.map(token => ({token}));
expect(parens(input))
.toEqual([ .toEqual([
[ [
"hello" {token: "hello"}
], ],
"and", {token: "and"},
[ [
[ [
"pick", {token: "pick"},
"one" {token: "one"}
], ],
"and", {token: "and"},
"another" {token: "another"}
] ]
]); ]);
}); });

View File

@ -1,10 +1,24 @@
const ParsingContext = require("../../src/services/search/parsing_context.js"); const ParsingContext = require("../../src/services/search/parsing_context.js");
const parser = require('../../src/services/search/parser.js'); const parser = require('../../src/services/search/parser.js');
function tokens(...args) {
return args.map(arg => {
if (Array.isArray(arg)) {
return arg;
}
else {
return {
token: arg,
inQuotes: false
};
}
});
}
describe("Parser", () => { describe("Parser", () => {
it("fulltext parser without content", () => { it("fulltext parser without content", () => {
const rootExp = parser({ const rootExp = parser({
fulltextTokens: ["hello", "hi"], fulltextTokens: tokens("hello", "hi"),
expressionTokens: [], expressionTokens: [],
parsingContext: new ParsingContext({includeNoteContent: false}) parsingContext: new ParsingContext({includeNoteContent: false})
}); });
@ -15,7 +29,7 @@ describe("Parser", () => {
it("fulltext parser with content", () => { it("fulltext parser with content", () => {
const rootExp = parser({ const rootExp = parser({
fulltextTokens: ["hello", "hi"], fulltextTokens: tokens("hello", "hi"),
expressionTokens: [], expressionTokens: [],
parsingContext: new ParsingContext({includeNoteContent: true}) parsingContext: new ParsingContext({includeNoteContent: true})
}); });
@ -36,7 +50,7 @@ describe("Parser", () => {
it("simple label comparison", () => { it("simple label comparison", () => {
const rootExp = parser({ const rootExp = parser({
fulltextTokens: [], fulltextTokens: [],
expressionTokens: ["#mylabel", "=", "text"], expressionTokens: tokens("#mylabel", "=", "text"),
parsingContext: new ParsingContext() parsingContext: new ParsingContext()
}); });
@ -49,7 +63,7 @@ describe("Parser", () => {
it("simple attribute negation", () => { it("simple attribute negation", () => {
let rootExp = parser({ let rootExp = parser({
fulltextTokens: [], fulltextTokens: [],
expressionTokens: ["#!mylabel"], expressionTokens: tokens("#!mylabel"),
parsingContext: new ParsingContext() parsingContext: new ParsingContext()
}); });
@ -60,7 +74,7 @@ describe("Parser", () => {
rootExp = parser({ rootExp = parser({
fulltextTokens: [], fulltextTokens: [],
expressionTokens: ["~!myrelation"], expressionTokens: tokens("~!myrelation"),
parsingContext: new ParsingContext() parsingContext: new ParsingContext()
}); });
@ -73,7 +87,7 @@ describe("Parser", () => {
it("simple label AND", () => { it("simple label AND", () => {
const rootExp = parser({ const rootExp = parser({
fulltextTokens: [], fulltextTokens: [],
expressionTokens: ["#first", "=", "text", "and", "#second", "=", "text"], expressionTokens: tokens("#first", "=", "text", "and", "#second", "=", "text"),
parsingContext: new ParsingContext(true) parsingContext: new ParsingContext(true)
}); });
@ -90,7 +104,7 @@ describe("Parser", () => {
it("simple label AND without explicit AND", () => { it("simple label AND without explicit AND", () => {
const rootExp = parser({ const rootExp = parser({
fulltextTokens: [], fulltextTokens: [],
expressionTokens: ["#first", "=", "text", "#second", "=", "text"], expressionTokens: tokens("#first", "=", "text", "#second", "=", "text"),
parsingContext: new ParsingContext() parsingContext: new ParsingContext()
}); });
@ -107,7 +121,7 @@ describe("Parser", () => {
it("simple label OR", () => { it("simple label OR", () => {
const rootExp = parser({ const rootExp = parser({
fulltextTokens: [], fulltextTokens: [],
expressionTokens: ["#first", "=", "text", "or", "#second", "=", "text"], expressionTokens: tokens("#first", "=", "text", "or", "#second", "=", "text"),
parsingContext: new ParsingContext() parsingContext: new ParsingContext()
}); });
@ -123,8 +137,8 @@ describe("Parser", () => {
it("fulltext and simple label", () => { it("fulltext and simple label", () => {
const rootExp = parser({ const rootExp = parser({
fulltextTokens: ["hello"], fulltextTokens: tokens("hello"),
expressionTokens: ["#mylabel", "=", "text"], expressionTokens: tokens("#mylabel", "=", "text"),
parsingContext: new ParsingContext() parsingContext: new ParsingContext()
}); });
@ -141,7 +155,7 @@ describe("Parser", () => {
it("label sub-expression", () => { it("label sub-expression", () => {
const rootExp = parser({ const rootExp = parser({
fulltextTokens: [], fulltextTokens: [],
expressionTokens: ["#first", "=", "text", "or", ["#second", "=", "text", "and", "#third", "=", "text"]], expressionTokens: tokens("#first", "=", "text", "or", tokens("#second", "=", "text", "and", "#third", "=", "text")),
parsingContext: new ParsingContext() parsingContext: new ParsingContext()
}); });
@ -161,3 +175,17 @@ describe("Parser", () => {
expect(secondSubSub.attributeName).toEqual("third"); expect(secondSubSub.attributeName).toEqual("third");
}); });
}); });
describe("Invalid tokens", () => {
it("incomplete comparison", () => {
const parsingContext = new ParsingContext();
parser({
fulltextTokens: [],
expressionTokens: tokens("#first", "="),
parsingContext
});
expect(parsingContext.error).toEqual('Misplaced or incomplete expression "="')
});
});

View File

@ -529,4 +529,26 @@ describe("Search", () => {
}); });
// FIXME: test what happens when we order without any filter criteria // FIXME: test what happens when we order without any filter criteria
// it("comparison between labels", async () => {
// rootNote
// .child(note("Europe")
// .child(note("Austria")
// .label('capital', 'Vienna')
// .label('largestCity', 'Vienna'))
// .child(note("Canada")
// .label('capital', 'Ottawa')
// .label('largestCity', 'Toronto'))
// .child(note("Czech Republic")
// .label('capital', 'Prague')
// .label('largestCity', 'Prague'))
// );
//
// const parsingContext = new ParsingContext();
//
// const searchResults = await searchService.findNotesWithQuery('#capital = #largestCity', parsingContext);
// expect(searchResults.length).toEqual(2);
// expect(findNoteByTitle(searchResults, "Czech Republic")).toBeTruthy();
// expect(findNoteByTitle(searchResults, "Austria")).toBeTruthy();
// })
}); });

View File

@ -51,6 +51,7 @@ class PropertyComparisonExp extends Expression {
if (value) { if (value) {
value = value.toLowerCase(); value = value.toLowerCase();
} }
if (this.comparator(value)) { if (this.comparator(value)) {
resNoteSet.add(note); resNoteSet.add(note);
} }

View File

@ -26,10 +26,15 @@ function lexer(str) {
return; return;
} }
const rec = {
token: currentWord,
inQuotes: quotes
};
if (fulltextEnded) { if (fulltextEnded) {
expressionTokens.push(currentWord); expressionTokens.push(rec);
} else { } else {
fulltextTokens.push(currentWord); fulltextTokens.push(rec);
} }
currentWord = ''; currentWord = '';
@ -77,8 +82,14 @@ function lexer(str) {
continue; continue;
} }
else if (!quotes) { else if (!quotes) {
if (currentWord.length === 0 && (chr === '#' || chr === '~')) { if (chr === '#' || chr === '~') {
fulltextEnded = true; if (!fulltextEnded) {
fulltextEnded = true;
}
else {
finishWord();
}
currentWord = chr; currentWord = chr;
continue; continue;

View File

@ -7,7 +7,7 @@ function parens(tokens) {
} }
while (true) { while (true) {
const leftIdx = tokens.findIndex(token => token === '('); const leftIdx = tokens.findIndex(token => token.token === '(');
if (leftIdx === -1) { if (leftIdx === -1) {
return tokens; return tokens;
@ -17,13 +17,13 @@ function parens(tokens) {
let parensLevel = 0 let parensLevel = 0
for (rightIdx = leftIdx; rightIdx < tokens.length; rightIdx++) { for (rightIdx = leftIdx; rightIdx < tokens.length; rightIdx++) {
if (tokens[rightIdx] === ')') { if (tokens[rightIdx].token === ')') {
parensLevel--; parensLevel--;
if (parensLevel === 0) { if (parensLevel === 0) {
break; break;
} }
} else if (tokens[rightIdx] === '(') { } else if (tokens[rightIdx].token === '(') {
parensLevel++; parensLevel++;
} }
} }

View File

@ -18,6 +18,8 @@ const comparatorBuilder = require('./comparator_builder');
const ValueExtractor = require('./value_extractor'); const ValueExtractor = require('./value_extractor');
function getFulltext(tokens, parsingContext) { function getFulltext(tokens, parsingContext) {
tokens = tokens.map(t => t.token);
parsingContext.highlightedTokens.push(...tokens); parsingContext.highlightedTokens.push(...tokens);
if (tokens.length === 0) { if (tokens.length === 0) {
@ -50,75 +52,75 @@ function getExpression(tokens, parsingContext, level = 0) {
let i; let i;
function parseNoteProperty() { function parseNoteProperty() {
if (tokens[i] !== '.') { if (tokens[i].token !== '.') {
parsingContext.addError('Expected "." to separate field path'); parsingContext.addError('Expected "." to separate field path');
return; return;
} }
i++; i++;
if (tokens[i] === 'content') { if (tokens[i].token === 'content') {
i += 1; i += 1;
const operator = tokens[i]; const operator = tokens[i].token;
if (!isOperator(operator)) { if (!isOperator(operator)) {
parsingContext.addError(`After content expected operator, but got "${tokens[i]}"`); parsingContext.addError(`After content expected operator, but got "${tokens[i].token}"`);
return; return;
} }
i++; i++;
return new OrExp([ return new OrExp([
new NoteContentUnprotectedFulltextExp(operator, [tokens[i]]), new NoteContentUnprotectedFulltextExp(operator, [tokens[i].token]),
new NoteContentProtectedFulltextExp(operator, [tokens[i]]) new NoteContentProtectedFulltextExp(operator, [tokens[i].token])
]); ]);
} }
if (tokens[i] === 'parents') { if (tokens[i].token === 'parents') {
i += 1; i += 1;
return new ChildOfExp(parseNoteProperty()); return new ChildOfExp(parseNoteProperty());
} }
if (tokens[i] === 'children') { if (tokens[i].token === 'children') {
i += 1; i += 1;
return new ParentOfExp(parseNoteProperty()); return new ParentOfExp(parseNoteProperty());
} }
if (tokens[i] === 'ancestors') { if (tokens[i].token === 'ancestors') {
i += 1; i += 1;
return new DescendantOfExp(parseNoteProperty()); return new DescendantOfExp(parseNoteProperty());
} }
if (tokens[i] === 'labels') { if (tokens[i].token === 'labels') {
if (tokens[i + 1] !== '.') { if (tokens[i + 1].token !== '.') {
parsingContext.addError(`Expected "." to separate field path, god "${tokens[i + 1]}"`); parsingContext.addError(`Expected "." to separate field path, got "${tokens[i + 1].token}"`);
return; return;
} }
i += 2; i += 2;
return parseLabel(tokens[i]); return parseLabel(tokens[i].token);
} }
if (tokens[i] === 'relations') { if (tokens[i].token === 'relations') {
if (tokens[i + 1] !== '.') { if (tokens[i + 1].token !== '.') {
parsingContext.addError(`Expected "." to separate field path, god "${tokens[i + 1]}"`); parsingContext.addError(`Expected "." to separate field path, got "${tokens[i + 1].token}"`);
return; return;
} }
i += 2; i += 2;
return parseRelation(tokens[i]); return parseRelation(tokens[i].token);
} }
if (PropertyComparisonExp.isProperty(tokens[i])) { if (PropertyComparisonExp.isProperty(tokens[i].token)) {
const propertyName = tokens[i]; const propertyName = tokens[i].token;
const operator = tokens[i + 1]; const operator = tokens[i + 1].token;
const comparedValue = tokens[i + 2]; const comparedValue = tokens[i + 2].token;
const comparator = comparatorBuilder(operator, comparedValue); const comparator = comparatorBuilder(operator, comparedValue);
if (!comparator) { if (!comparator) {
@ -131,7 +133,7 @@ function getExpression(tokens, parsingContext, level = 0) {
return new PropertyComparisonExp(propertyName, comparator); return new PropertyComparisonExp(propertyName, comparator);
} }
parsingContext.addError(`Unrecognized note property "${tokens[i]}"`); parsingContext.addError(`Unrecognized note property "${tokens[i].token}"`);
} }
function parseAttribute(name) { function parseAttribute(name) {
@ -153,9 +155,9 @@ function getExpression(tokens, parsingContext, level = 0) {
function parseLabel(labelName) { function parseLabel(labelName) {
parsingContext.highlightedTokens.push(labelName); parsingContext.highlightedTokens.push(labelName);
if (i < tokens.length - 2 && isOperator(tokens[i + 1])) { if (i < tokens.length - 2 && isOperator(tokens[i + 1].token)) {
let operator = tokens[i + 1]; let operator = tokens[i + 1].token;
const comparedValue = tokens[i + 2]; const comparedValue = tokens[i + 2].token;
parsingContext.highlightedTokens.push(comparedValue); parsingContext.highlightedTokens.push(comparedValue);
@ -180,7 +182,7 @@ function getExpression(tokens, parsingContext, level = 0) {
function parseRelation(relationName) { function parseRelation(relationName) {
parsingContext.highlightedTokens.push(relationName); parsingContext.highlightedTokens.push(relationName);
if (i < tokens.length - 2 && tokens[i + 1] === '.') { if (i < tokens.length - 2 && tokens[i + 1].token === '.') {
i += 1; i += 1;
return new RelationWhereExp(relationName, parseNoteProperty()); return new RelationWhereExp(relationName, parseNoteProperty());
@ -193,7 +195,7 @@ function getExpression(tokens, parsingContext, level = 0) {
const orderDefinitions = []; const orderDefinitions = [];
let limit; let limit;
if (tokens[i] === 'orderby') { if (tokens[i].token === 'orderby') {
do { do {
const propertyPath = []; const propertyPath = [];
let direction = "asc"; let direction = "asc";
@ -201,13 +203,13 @@ function getExpression(tokens, parsingContext, level = 0) {
do { do {
i++; i++;
propertyPath.push(tokens[i]); propertyPath.push(tokens[i].token);
i++; i++;
} while (tokens[i] === '.'); } while (i < tokens.length && tokens[i].token === '.');
if (["asc", "desc"].includes(tokens[i])) { if (i < tokens.length && ["asc", "desc"].includes(tokens[i].token)) {
direction = tokens[i]; direction = tokens[i].token;
i++; i++;
} }
@ -221,11 +223,11 @@ function getExpression(tokens, parsingContext, level = 0) {
valueExtractor, valueExtractor,
direction direction
}); });
} while (tokens[i] === ','); } while (i < tokens.length && tokens[i].token === ',');
} }
if (tokens[i] === 'limit') { if (i < tokens.length && tokens[i].token === 'limit') {
limit = parseInt(tokens[i + 1]); limit = parseInt(tokens[i + 1].token);
} }
return new OrderByAndLimitExp(orderDefinitions, limit); return new OrderByAndLimitExp(orderDefinitions, limit);
@ -241,16 +243,18 @@ function getExpression(tokens, parsingContext, level = 0) {
} }
for (i = 0; i < tokens.length; i++) { for (i = 0; i < tokens.length; i++) {
const token = tokens[i]; if (Array.isArray(tokens[i])) {
expressions.push(getExpression(tokens[i], parsingContext, level++));
continue;
}
const token = tokens[i].token;
if (token === '#' || token === '~') { if (token === '#' || token === '~') {
continue; continue;
} }
if (Array.isArray(token)) { if (token.startsWith('#') || token.startsWith('~')) {
expressions.push(getExpression(token, parsingContext, level++));
}
else if (token.startsWith('#') || token.startsWith('~')) {
expressions.push(parseAttribute(token)); expressions.push(parseAttribute(token));
} }
else if (['orderby', 'limit'].includes(token)) { else if (['orderby', 'limit'].includes(token)) {
@ -273,7 +277,7 @@ function getExpression(tokens, parsingContext, level = 0) {
i += 1; i += 1;
if (!Array.isArray(tokens[i])) { if (!Array.isArray(tokens[i])) {
parsingContext.addError(`not keyword should be followed by sub-expression in parenthesis, got ${tokens[i]} instead`); parsingContext.addError(`not keyword should be followed by sub-expression in parenthesis, got ${tokens[i].token} instead`);
continue; continue;
} }

View File

@ -12,7 +12,6 @@ class ParsingContext {
// we record only the first error, subsequent ones are usually consequence of the first // we record only the first error, subsequent ones are usually consequence of the first
if (!this.error) { if (!this.error) {
this.error = error; this.error = error;
console.log(this.error);
} }
} }
} }