lexer now marks "in quotes" tokens

This commit is contained in:
zadam 2020-07-19 23:19:45 +02:00
parent 4c7b1d6543
commit 35469f6f2d
9 changed files with 161 additions and 85 deletions

View File

@ -2,75 +2,83 @@ const lexer = require('../../src/services/search/lexer.js');
describe("Lexer fulltext", () => {
it("simple lexing", () => {
expect(lexer("hello world").fulltextTokens)
expect(lexer("hello world").fulltextTokens.map(t => t.token))
.toEqual(["hello", "world"]);
});
it("use quotes to keep words together", () => {
expect(lexer("'hello world' my friend").fulltextTokens)
expect(lexer("'hello world' my friend").fulltextTokens.map(t => t.token))
.toEqual(["hello world", "my", "friend"]);
expect(lexer('"hello world" my friend').fulltextTokens)
expect(lexer('"hello world" my friend').fulltextTokens.map(t => t.token))
.toEqual(["hello world", "my", "friend"]);
expect(lexer('`hello world` my friend').fulltextTokens)
expect(lexer('`hello world` my friend').fulltextTokens.map(t => t.token))
.toEqual(["hello world", "my", "friend"]);
});
it("you can use different quotes and other special characters inside quotes", () => {
expect(lexer("'i can use \" or ` or #~=*' without problem").fulltextTokens)
expect(lexer("'i can use \" or ` or #~=*' without problem").fulltextTokens.map(t => t.token))
.toEqual(["i can use \" or ` or #~=*", "without", "problem"]);
});
it("if quote is not ended then it's just one long token", () => {
expect(lexer("'unfinished quote").fulltextTokens)
expect(lexer("'unfinished quote").fulltextTokens.map(t => t.token))
.toEqual(["unfinished quote"]);
});
it("parenthesis and symbols in fulltext section are just normal characters", () => {
expect(lexer("what's u=p <b(r*t)h>").fulltextTokens)
expect(lexer("what's u=p <b(r*t)h>").fulltextTokens.map(t => t.token))
.toEqual(["what's", "u=p", "<b(r*t)h>"]);
});
it("escaping special characters", () => {
expect(lexer("hello \\#\\~\\'").fulltextTokens)
expect(lexer("hello \\#\\~\\'").fulltextTokens.map(t => t.token))
.toEqual(["hello", "#~'"]);
});
});
describe("Lexer expression", () => {
it("simple attribute existence", () => {
expect(lexer("#label ~relation").expressionTokens)
expect(lexer("#label ~relation").expressionTokens.map(t => t.token))
.toEqual(["#label", "~relation"]);
});
it("simple label operators", () => {
expect(lexer("#label*=*text").expressionTokens)
expect(lexer("#label*=*text").expressionTokens.map(t => t.token))
.toEqual(["#label", "*=*", "text"]);
});
it("spaces in attribute names and values", () => {
expect(lexer(`#'long label'="hello o' world" ~'long relation'`).expressionTokens)
.toEqual(["#long label", "=", "hello o' world", "~long relation"]);
});
it("complex expressions with and, or and parenthesis", () => {
expect(lexer(`# (#label=text OR #second=text) AND ~relation`).expressionTokens)
expect(lexer(`# (#label=text OR #second=text) AND ~relation`).expressionTokens.map(t => t.token))
.toEqual(["#", "(", "#label", "=", "text", "or", "#second", "=", "text", ")", "and", "~relation"]);
});
it("dot separated properties", () => {
expect(lexer(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens)
expect(lexer(`# ~author.title = 'Hugh Howey' AND note.'book title' = 'Silo'`).expressionTokens.map(t => t.token))
.toEqual(["#", "~author", ".", "title", "=", "hugh howey", "and", "note", ".", "book title", "=", "silo"]);
});
it("negation of label and relation", () => {
expect(lexer(`#!capital ~!neighbor`).expressionTokens)
expect(lexer(`#!capital ~!neighbor`).expressionTokens.map(t => t.token))
.toEqual(["#!capital", "~!neighbor"]);
});
it("negation of sub-expression", () => {
expect(lexer(`# not(#capital) and note.noteId != "root"`).expressionTokens)
expect(lexer(`# not(#capital) and note.noteId != "root"`).expressionTokens.map(t => t.token))
.toEqual(["#", "not", "(", "#capital", ")", "and", "note", ".", "noteid", "!=", "root"]);
});
});
describe("Lexer invalid queries and edge cases", () => {
it("concatenated attributes", () => {
expect(lexer("#label~relation").expressionTokens.map(t => t.token))
.toEqual(["#label", "~relation"]);
});
it("spaces in attribute names and values", () => {
// invalid but should be reported by parser as an error
expect(lexer(`#'long label'="hello o' world" ~'long relation'`).expressionTokens.map(t => t.token))
.toEqual(["#long label", "=", "hello o' world", "~long relation"]);
});
});

View File

@ -2,19 +2,22 @@ const parens = require('../../src/services/search/parens.js');
describe("Parens handler", () => {
it("handles parens", () => {
expect(parens(["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]))
const input = ["(", "hello", ")", "and", "(", "(", "pick", "one", ")", "and", "another", ")"]
.map(token => ({token}));
expect(parens(input))
.toEqual([
[
"hello"
{token: "hello"}
],
"and",
{token: "and"},
[
[
"pick",
"one"
{token: "pick"},
{token: "one"}
],
"and",
"another"
{token: "and"},
{token: "another"}
]
]);
});

View File

@ -1,10 +1,24 @@
const ParsingContext = require("../../src/services/search/parsing_context.js");
const parser = require('../../src/services/search/parser.js');
function tokens(...args) {
return args.map(arg => {
if (Array.isArray(arg)) {
return arg;
}
else {
return {
token: arg,
inQuotes: false
};
}
});
}
describe("Parser", () => {
it("fulltext parser without content", () => {
const rootExp = parser({
fulltextTokens: ["hello", "hi"],
fulltextTokens: tokens("hello", "hi"),
expressionTokens: [],
parsingContext: new ParsingContext({includeNoteContent: false})
});
@ -15,7 +29,7 @@ describe("Parser", () => {
it("fulltext parser with content", () => {
const rootExp = parser({
fulltextTokens: ["hello", "hi"],
fulltextTokens: tokens("hello", "hi"),
expressionTokens: [],
parsingContext: new ParsingContext({includeNoteContent: true})
});
@ -36,7 +50,7 @@ describe("Parser", () => {
it("simple label comparison", () => {
const rootExp = parser({
fulltextTokens: [],
expressionTokens: ["#mylabel", "=", "text"],
expressionTokens: tokens("#mylabel", "=", "text"),
parsingContext: new ParsingContext()
});
@ -49,7 +63,7 @@ describe("Parser", () => {
it("simple attribute negation", () => {
let rootExp = parser({
fulltextTokens: [],
expressionTokens: ["#!mylabel"],
expressionTokens: tokens("#!mylabel"),
parsingContext: new ParsingContext()
});
@ -60,7 +74,7 @@ describe("Parser", () => {
rootExp = parser({
fulltextTokens: [],
expressionTokens: ["~!myrelation"],
expressionTokens: tokens("~!myrelation"),
parsingContext: new ParsingContext()
});
@ -73,7 +87,7 @@ describe("Parser", () => {
it("simple label AND", () => {
const rootExp = parser({
fulltextTokens: [],
expressionTokens: ["#first", "=", "text", "and", "#second", "=", "text"],
expressionTokens: tokens("#first", "=", "text", "and", "#second", "=", "text"),
parsingContext: new ParsingContext(true)
});
@ -90,7 +104,7 @@ describe("Parser", () => {
it("simple label AND without explicit AND", () => {
const rootExp = parser({
fulltextTokens: [],
expressionTokens: ["#first", "=", "text", "#second", "=", "text"],
expressionTokens: tokens("#first", "=", "text", "#second", "=", "text"),
parsingContext: new ParsingContext()
});
@ -107,7 +121,7 @@ describe("Parser", () => {
it("simple label OR", () => {
const rootExp = parser({
fulltextTokens: [],
expressionTokens: ["#first", "=", "text", "or", "#second", "=", "text"],
expressionTokens: tokens("#first", "=", "text", "or", "#second", "=", "text"),
parsingContext: new ParsingContext()
});
@ -123,8 +137,8 @@ describe("Parser", () => {
it("fulltext and simple label", () => {
const rootExp = parser({
fulltextTokens: ["hello"],
expressionTokens: ["#mylabel", "=", "text"],
fulltextTokens: tokens("hello"),
expressionTokens: tokens("#mylabel", "=", "text"),
parsingContext: new ParsingContext()
});
@ -141,7 +155,7 @@ describe("Parser", () => {
it("label sub-expression", () => {
const rootExp = parser({
fulltextTokens: [],
expressionTokens: ["#first", "=", "text", "or", ["#second", "=", "text", "and", "#third", "=", "text"]],
expressionTokens: tokens("#first", "=", "text", "or", tokens("#second", "=", "text", "and", "#third", "=", "text")),
parsingContext: new ParsingContext()
});
@ -161,3 +175,17 @@ describe("Parser", () => {
expect(secondSubSub.attributeName).toEqual("third");
});
});
describe("Invalid tokens", () => {
it("incomplete comparison", () => {
const parsingContext = new ParsingContext();
parser({
fulltextTokens: [],
expressionTokens: tokens("#first", "="),
parsingContext
});
expect(parsingContext.error).toEqual('Misplaced or incomplete expression "="')
});
});

View File

@ -529,4 +529,26 @@ describe("Search", () => {
});
// FIXME: test what happens when we order without any filter criteria
// it("comparison between labels", async () => {
// rootNote
// .child(note("Europe")
// .child(note("Austria")
// .label('capital', 'Vienna')
// .label('largestCity', 'Vienna'))
// .child(note("Canada")
// .label('capital', 'Ottawa')
// .label('largestCity', 'Toronto'))
// .child(note("Czech Republic")
// .label('capital', 'Prague')
// .label('largestCity', 'Prague'))
// );
//
// const parsingContext = new ParsingContext();
//
// const searchResults = await searchService.findNotesWithQuery('#capital = #largestCity', parsingContext);
// expect(searchResults.length).toEqual(2);
// expect(findNoteByTitle(searchResults, "Czech Republic")).toBeTruthy();
// expect(findNoteByTitle(searchResults, "Austria")).toBeTruthy();
// })
});

View File

@ -51,6 +51,7 @@ class PropertyComparisonExp extends Expression {
if (value) {
value = value.toLowerCase();
}
if (this.comparator(value)) {
resNoteSet.add(note);
}

View File

@ -26,10 +26,15 @@ function lexer(str) {
return;
}
const rec = {
token: currentWord,
inQuotes: quotes
};
if (fulltextEnded) {
expressionTokens.push(currentWord);
expressionTokens.push(rec);
} else {
fulltextTokens.push(currentWord);
fulltextTokens.push(rec);
}
currentWord = '';
@ -77,8 +82,14 @@ function lexer(str) {
continue;
}
else if (!quotes) {
if (currentWord.length === 0 && (chr === '#' || chr === '~')) {
fulltextEnded = true;
if (chr === '#' || chr === '~') {
if (!fulltextEnded) {
fulltextEnded = true;
}
else {
finishWord();
}
currentWord = chr;
continue;

View File

@ -7,7 +7,7 @@ function parens(tokens) {
}
while (true) {
const leftIdx = tokens.findIndex(token => token === '(');
const leftIdx = tokens.findIndex(token => token.token === '(');
if (leftIdx === -1) {
return tokens;
@ -17,13 +17,13 @@ function parens(tokens) {
let parensLevel = 0
for (rightIdx = leftIdx; rightIdx < tokens.length; rightIdx++) {
if (tokens[rightIdx] === ')') {
if (tokens[rightIdx].token === ')') {
parensLevel--;
if (parensLevel === 0) {
break;
}
} else if (tokens[rightIdx] === '(') {
} else if (tokens[rightIdx].token === '(') {
parensLevel++;
}
}

View File

@ -18,6 +18,8 @@ const comparatorBuilder = require('./comparator_builder');
const ValueExtractor = require('./value_extractor');
function getFulltext(tokens, parsingContext) {
tokens = tokens.map(t => t.token);
parsingContext.highlightedTokens.push(...tokens);
if (tokens.length === 0) {
@ -50,75 +52,75 @@ function getExpression(tokens, parsingContext, level = 0) {
let i;
function parseNoteProperty() {
if (tokens[i] !== '.') {
if (tokens[i].token !== '.') {
parsingContext.addError('Expected "." to separate field path');
return;
}
i++;
if (tokens[i] === 'content') {
if (tokens[i].token === 'content') {
i += 1;
const operator = tokens[i];
const operator = tokens[i].token;
if (!isOperator(operator)) {
parsingContext.addError(`After content expected operator, but got "${tokens[i]}"`);
parsingContext.addError(`After content expected operator, but got "${tokens[i].token}"`);
return;
}
i++;
return new OrExp([
new NoteContentUnprotectedFulltextExp(operator, [tokens[i]]),
new NoteContentProtectedFulltextExp(operator, [tokens[i]])
new NoteContentUnprotectedFulltextExp(operator, [tokens[i].token]),
new NoteContentProtectedFulltextExp(operator, [tokens[i].token])
]);
}
if (tokens[i] === 'parents') {
if (tokens[i].token === 'parents') {
i += 1;
return new ChildOfExp(parseNoteProperty());
}
if (tokens[i] === 'children') {
if (tokens[i].token === 'children') {
i += 1;
return new ParentOfExp(parseNoteProperty());
}
if (tokens[i] === 'ancestors') {
if (tokens[i].token === 'ancestors') {
i += 1;
return new DescendantOfExp(parseNoteProperty());
}
if (tokens[i] === 'labels') {
if (tokens[i + 1] !== '.') {
parsingContext.addError(`Expected "." to separate field path, god "${tokens[i + 1]}"`);
if (tokens[i].token === 'labels') {
if (tokens[i + 1].token !== '.') {
parsingContext.addError(`Expected "." to separate field path, got "${tokens[i + 1].token}"`);
return;
}
i += 2;
return parseLabel(tokens[i]);
return parseLabel(tokens[i].token);
}
if (tokens[i] === 'relations') {
if (tokens[i + 1] !== '.') {
parsingContext.addError(`Expected "." to separate field path, god "${tokens[i + 1]}"`);
if (tokens[i].token === 'relations') {
if (tokens[i + 1].token !== '.') {
parsingContext.addError(`Expected "." to separate field path, got "${tokens[i + 1].token}"`);
return;
}
i += 2;
return parseRelation(tokens[i]);
return parseRelation(tokens[i].token);
}
if (PropertyComparisonExp.isProperty(tokens[i])) {
const propertyName = tokens[i];
const operator = tokens[i + 1];
const comparedValue = tokens[i + 2];
if (PropertyComparisonExp.isProperty(tokens[i].token)) {
const propertyName = tokens[i].token;
const operator = tokens[i + 1].token;
const comparedValue = tokens[i + 2].token;
const comparator = comparatorBuilder(operator, comparedValue);
if (!comparator) {
@ -131,7 +133,7 @@ function getExpression(tokens, parsingContext, level = 0) {
return new PropertyComparisonExp(propertyName, comparator);
}
parsingContext.addError(`Unrecognized note property "${tokens[i]}"`);
parsingContext.addError(`Unrecognized note property "${tokens[i].token}"`);
}
function parseAttribute(name) {
@ -153,9 +155,9 @@ function getExpression(tokens, parsingContext, level = 0) {
function parseLabel(labelName) {
parsingContext.highlightedTokens.push(labelName);
if (i < tokens.length - 2 && isOperator(tokens[i + 1])) {
let operator = tokens[i + 1];
const comparedValue = tokens[i + 2];
if (i < tokens.length - 2 && isOperator(tokens[i + 1].token)) {
let operator = tokens[i + 1].token;
const comparedValue = tokens[i + 2].token;
parsingContext.highlightedTokens.push(comparedValue);
@ -180,7 +182,7 @@ function getExpression(tokens, parsingContext, level = 0) {
function parseRelation(relationName) {
parsingContext.highlightedTokens.push(relationName);
if (i < tokens.length - 2 && tokens[i + 1] === '.') {
if (i < tokens.length - 2 && tokens[i + 1].token === '.') {
i += 1;
return new RelationWhereExp(relationName, parseNoteProperty());
@ -193,7 +195,7 @@ function getExpression(tokens, parsingContext, level = 0) {
const orderDefinitions = [];
let limit;
if (tokens[i] === 'orderby') {
if (tokens[i].token === 'orderby') {
do {
const propertyPath = [];
let direction = "asc";
@ -201,13 +203,13 @@ function getExpression(tokens, parsingContext, level = 0) {
do {
i++;
propertyPath.push(tokens[i]);
propertyPath.push(tokens[i].token);
i++;
} while (tokens[i] === '.');
} while (i < tokens.length && tokens[i].token === '.');
if (["asc", "desc"].includes(tokens[i])) {
direction = tokens[i];
if (i < tokens.length && ["asc", "desc"].includes(tokens[i].token)) {
direction = tokens[i].token;
i++;
}
@ -221,11 +223,11 @@ function getExpression(tokens, parsingContext, level = 0) {
valueExtractor,
direction
});
} while (tokens[i] === ',');
} while (i < tokens.length && tokens[i].token === ',');
}
if (tokens[i] === 'limit') {
limit = parseInt(tokens[i + 1]);
if (i < tokens.length && tokens[i].token === 'limit') {
limit = parseInt(tokens[i + 1].token);
}
return new OrderByAndLimitExp(orderDefinitions, limit);
@ -241,16 +243,18 @@ function getExpression(tokens, parsingContext, level = 0) {
}
for (i = 0; i < tokens.length; i++) {
const token = tokens[i];
if (Array.isArray(tokens[i])) {
expressions.push(getExpression(tokens[i], parsingContext, level++));
continue;
}
const token = tokens[i].token;
if (token === '#' || token === '~') {
continue;
}
if (Array.isArray(token)) {
expressions.push(getExpression(token, parsingContext, level++));
}
else if (token.startsWith('#') || token.startsWith('~')) {
if (token.startsWith('#') || token.startsWith('~')) {
expressions.push(parseAttribute(token));
}
else if (['orderby', 'limit'].includes(token)) {
@ -273,7 +277,7 @@ function getExpression(tokens, parsingContext, level = 0) {
i += 1;
if (!Array.isArray(tokens[i])) {
parsingContext.addError(`not keyword should be followed by sub-expression in parenthesis, got ${tokens[i]} instead`);
parsingContext.addError(`not keyword should be followed by sub-expression in parenthesis, got ${tokens[i].token} instead`);
continue;
}

View File

@ -12,7 +12,6 @@ class ParsingContext {
// we record only the first error, subsequent ones are usually consequence of the first
if (!this.error) {
this.error = error;
console.log(this.error);
}
}
}