aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/query
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/query')
-rw-r--r--pkg/query/lang.md12
-rw-r--r--pkg/query/lexer.go27
-rw-r--r--pkg/query/lexer_test.go24
-rw-r--r--pkg/query/query.go34
4 files changed, 68 insertions, 29 deletions
diff --git a/pkg/query/lang.md b/pkg/query/lang.md
deleted file mode 100644
index a399cb8..0000000
--- a/pkg/query/lang.md
+++ /dev/null
@@ -1,12 +0,0 @@
-# Query Language Spec
-
-```
-<expr_list> := <expr> | <expr> <expr_list>
-
-<expr> := <statment> <bin_op> <statment>
-<statment> := <statement_start> {strings} <statment_end>
-<statment_start :=
-<statment_end> :=
-
-<bin_op> := "and" | "or" | "not" | "similar"
-```
diff --git a/pkg/query/lexer.go b/pkg/query/lexer.go
index db7ea28..a53f0b4 100644
--- a/pkg/query/lexer.go
+++ b/pkg/query/lexer.go
@@ -9,7 +9,8 @@ import (
type queryTokenType int
-var lexRegex, oldLexRegex *regexp.Regexp
+var LexRegex *regexp.Regexp
+var LexRegexPattern string
const (
TOK_UNKNOWN queryTokenType = iota
@@ -148,20 +149,20 @@ func Lex(query string) []Token {
CLAUSE_END
)
- matches := lexRegex.FindAllStringSubmatch(query, -1)
+ matches := LexRegex.FindAllStringSubmatch(query, -1)
tokens := make([]Token, 0, 4*len(matches))
tokens = append(tokens, Token{Type: TOK_CLAUSE_START})
tokens = append(tokens, Token{TOK_CLAUSE_AND, "and"}) // default to and'ing all args
clauseLevel := 1
- for i, match := range matches {
+ for _, match := range matches {
if match[CLAUSE_START] != "" {
tokens = append(tokens, Token{Type: TOK_CLAUSE_START})
// TODO: set maximum nest level
clauseLevel += 1
}
if match[CLAUSE_OPERATOR] != "" {
- if i == 0 || tokens[i-1].Type != TOK_CLAUSE_START {
+ if len(tokens) == 0 || tokens[len(tokens)-1].Type != TOK_CLAUSE_START {
tokens = append(tokens, Token{Type: TOK_CLAUSE_START})
clauseLevel += 1
}
@@ -283,15 +284,10 @@ func tokenizeValue(s string, catType queryTokenType) Token {
return t
}
-func treeStringify(tokens []Token) string {
+func TokensStringify(tokens []Token) string {
b := strings.Builder{}
indentLvl := 0
- writeIndent := func(level int) {
- for range level {
- b.WriteString("\t")
- }
- }
writeToken := func(t Token) {
b.WriteByte('`')
b.WriteString(t.String())
@@ -301,11 +297,11 @@ func treeStringify(tokens []Token) string {
for i, token := range tokens {
switch token.Type {
case TOK_CLAUSE_START:
- writeIndent(indentLvl)
+ writeIndent(&b, indentLvl)
b.WriteByte('(')
case TOK_CLAUSE_END:
indentLvl -= 1
- writeIndent(indentLvl)
+ writeIndent(&b, indentLvl)
b.WriteString(")\n")
case TOK_CLAUSE_OR:
b.WriteString("or\n")
@@ -315,7 +311,7 @@ func treeStringify(tokens []Token) string {
indentLvl += 1
case TOK_CAT_TITLE, TOK_CAT_AUTHOR, TOK_CAT_DATE, TOK_CAT_FILETIME, TOK_CAT_TAGS, TOK_CAT_LINKS, TOK_CAT_META, TOK_OP_NEG:
if i == 0 || tokens[i-1].Type != TOK_OP_NEG {
- writeIndent(indentLvl)
+ writeIndent(&b, indentLvl)
}
writeToken(token)
case TOK_VAL_STR, TOK_VAL_DATETIME, TOK_UNKNOWN:
@@ -340,7 +336,8 @@ func init() {
clauseOpPattern := `(?<clause_operator>(?i)and|or)?`
clauseStart := `(?<clause_start>\()?`
clauseEnd := `(?<clause_end>\))?`
- clausePattern := clauseStart + `\s*` + clauseOpPattern + `\s*(?:` + statementPattern + `|` + unknownPattern + `)\s*` + clauseEnd
+ clausePattern := clauseStart + `\s*` + clauseOpPattern + `\s*(?:` + statementPattern + `|` + unknownPattern + `)\s*` + clauseEnd + `\s*`
+ LexRegexPattern = clausePattern
- lexRegex = regexp.MustCompile(clausePattern)
+ LexRegex = regexp.MustCompile(LexRegexPattern)
}
diff --git a/pkg/query/lexer_test.go b/pkg/query/lexer_test.go
index 761055e..0cfc1de 100644
--- a/pkg/query/lexer_test.go
+++ b/pkg/query/lexer_test.go
@@ -28,6 +28,26 @@ func TestLex(t *testing.T) {
{TOK_CAT_DATE, "d"}, {TOK_OP_AP, ":"}, {TOK_VAL_DATETIME, "01010001"},
{Type: TOK_CLAUSE_END},
}},
+ {"leading subclause", "(or a:a a:b)", []Token{
+ {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_AND, "and"},
+ {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_OR, "or"},
+ {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "a"},
+ {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "b"},
+ {Type: TOK_CLAUSE_END},
+ {Type: TOK_CLAUSE_END},
+ }},
+ {"clause after clause", "(or a:a a:b) (or a:c a:d)", []Token{
+ {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_AND, "and"},
+ {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_OR, "or"},
+ {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "a"},
+ {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "b"},
+ {Type: TOK_CLAUSE_END},
+ {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_OR, "or"},
+ {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "c"},
+ {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "d"},
+ {Type: TOK_CLAUSE_END},
+ {Type: TOK_CLAUSE_END},
+ }},
{"nested clauses", "a:a (or t:b t!=c) or d<=01010001 and -T~foo", []Token{
{Type: TOK_CLAUSE_START}, {TOK_CLAUSE_AND, "and"},
{TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "a"},
@@ -63,8 +83,8 @@ func TestLex(t *testing.T) {
}
if t.Failed() {
- t.Log("Got\n", treeStringify(got))
- t.Log("Want\n", treeStringify(tt.want))
+ t.Log("Got\n", TokensStringify(got))
+ t.Log("Want\n", TokensStringify(tt.want))
}
})
}
diff --git a/pkg/query/query.go b/pkg/query/query.go
new file mode 100644
index 0000000..5ddc724
--- /dev/null
+++ b/pkg/query/query.go
@@ -0,0 +1,34 @@
+package query
+
+import "strings"
+
+func Generate(ir *QueryIR) (any, error) {
+ // TODO: implement
+ return nil, nil
+}
+
+func Compile(query string) (any, error) {
+ // TODO: logging
+ clause, err := Parse(Lex(query))
+ if err != nil {
+ return nil, err
+ }
+
+ ir, err := NewIR(*clause)
+ if err != nil {
+ return nil, err
+ }
+
+ ir, err = Optimize(ir)
+ if err != nil {
+ return nil, err
+ }
+
+ return Generate(ir)
+}
+
+func writeIndent(b *strings.Builder, level int) {
+ for range level {
+ b.WriteByte('\t')
+ }
+}