aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
Diffstat (limited to 'pkg')
-rw-r--r--pkg/query/compiler.go94
-rw-r--r--pkg/query/optimizer.go38
-rw-r--r--pkg/query/parser.go31
-rw-r--r--pkg/shell/interpreter.go38
-rw-r--r--pkg/shell/state.go91
5 files changed, 141 insertions, 151 deletions
diff --git a/pkg/query/compiler.go b/pkg/query/compiler.go
index f0df4b4..e6418d5 100644
--- a/pkg/query/compiler.go
+++ b/pkg/query/compiler.go
@@ -9,77 +9,27 @@ import (
const MAX_CLAUSE_DEPTH int = 16
-func (stmt Statement) Compile(b *strings.Builder) (*string, error) {
- if stmt.Negated {
- b.WriteString("NOT ")
- }
+type CompilationArtifact struct {
+ Query string
+ Args []string
+}
- switch stmt.Category {
- case CAT_TITLE:
- b.WriteString("title ")
- case CAT_AUTHOR:
- b.WriteString("author ")
- case CAT_DATE:
- b.WriteString("date ")
- case CAT_FILETIME:
- b.WriteString("fileTime ")
- case CAT_TAGS:
- b.WriteString("tags ")
- case CAT_LINKS:
- b.WriteString("links ")
- default:
- return nil, &CompileError{
- fmt.Sprint("unknown or invalid category ", stmt.Category.String()),
- }
- }
- switch stmt.Operator {
- case OP_EQ:
- if stmt.Category.IsSet() {
- b.WriteString("IN ")
+func (art CompilationArtifact) String() string {
+ b := strings.Builder{}
+ fmt.Fprintln(&b, art.Query)
+ b.WriteByte('[')
+ for i, arg := range art.Args {
+ if i != len(art.Args)-1 {
+ fmt.Fprintf(&b, "`%s`, ", arg)
} else {
- b.WriteString("= ")
- }
- case OP_AP:
- b.WriteString("LIKE ")
- case OP_NE:
- b.WriteString("!= ")
- case OP_LT:
- b.WriteString("< ")
- case OP_LE:
- b.WriteString("<= ")
- case OP_GE:
- b.WriteString(">= ")
- case OP_GT:
- b.WriteString("> ")
- default:
- return nil, &CompileError{
- fmt.Sprint("unknown or invalid operand ", stmt.Operator.String()),
+ fmt.Fprintf(&b, "`%s`", arg)
}
}
-
- switch stmt.Value.Type() {
- case VAL_STR:
- s, ok := stmt.Value.(StringValue)
- if !ok {
- panic(CompileError{"type corruption in string value"})
- }
- b.WriteString("(?) ")
- return &s.S, nil
- case VAL_DATETIME:
- dt, ok := stmt.Value.(DatetimeValue)
- if !ok {
- panic(CompileError{"type corruption in datetime value"})
- }
- fmt.Fprint(b, dt.D.Unix(), " ")
- default:
- return nil, &CompileError{
- fmt.Sprint("unknown or invalid value type ", stmt.Value.Type()),
- }
- }
- return nil, nil
+ b.WriteByte(']')
+ return b.String()
}
-func (s Statements) Compile(b *strings.Builder, delim string) ([]string, error) {
+func (s Statements) buildCompile(b *strings.Builder, delim string) ([]string, error) {
var args []string
sCount := 0
@@ -185,6 +135,7 @@ func (s Statements) Compile(b *strings.Builder, delim string) ([]string, error)
start, end := util.FuzzDatetime(d.D)
+ b.WriteString("NOT ")
b.WriteString(opStr)
fmt.Fprint(b, start.Unix(), " ")
b.WriteString("AND ")
@@ -199,6 +150,9 @@ func (s Statements) Compile(b *strings.Builder, delim string) ([]string, error)
} else {
idx := 0
for _, stmt := range opStmts {
+ if stmt.Negated {
+ b.WriteString("NOT ")
+ }
b.WriteString(catStr)
b.WriteString(opStr)
arg, ok := stmt.Value.buildCompile(b)
@@ -229,9 +183,9 @@ func (s Statements) Compile(b *strings.Builder, delim string) ([]string, error)
return args, nil
}
-func (root Clause) Compile() (string, []string, error) {
+func (root Clause) Compile() (CompilationArtifact, error) {
if d := root.Depth(); d > MAX_CLAUSE_DEPTH {
- return "", nil, &CompileError{
+ return CompilationArtifact{}, &CompileError{
fmt.Sprintf("exceeded maximum clause depth: %d > %d", d, MAX_CLAUSE_DEPTH),
}
}
@@ -239,9 +193,9 @@ func (root Clause) Compile() (string, []string, error) {
b := strings.Builder{}
args, err := root.buildCompile(&b)
if err != nil {
- return "", nil, err
+ return CompilationArtifact{}, err
}
- return b.String(), args, nil
+ return CompilationArtifact{b.String(), args}, nil
}
func (c Clause) buildCompile(b *strings.Builder) ([]string, error) {
@@ -257,7 +211,7 @@ func (c Clause) buildCompile(b *strings.Builder) ([]string, error) {
return nil, &CompileError{fmt.Sprint("invalid clause operator ", c.Operator)}
}
- args, err := c.Statements.Compile(b, delim)
+ args, err := c.Statements.buildCompile(b, delim)
if err != nil {
return nil, err
}
diff --git a/pkg/query/optimizer.go b/pkg/query/optimizer.go
index c203b9c..cdb2455 100644
--- a/pkg/query/optimizer.go
+++ b/pkg/query/optimizer.go
@@ -47,6 +47,38 @@ func NewOptimizer(root *Clause, workers uint) Optimizer {
}
}
+// Optimize clause according to level.
+// level 0 is automatic and levels < 0 do nothing.
+func (o Optimizer) Optimize(level int) {
+ o.Simplify()
+ if level < 0 {
+ return
+ } else if level == 0 {
+ // TODO: determine smarter level determination strategy
+ level = o.root.Depth()
+ }
+
+ oldDepth := o.root.Depth()
+ for range level {
+ // clause level parallel
+ o.Compact()
+ o.StrictEquality()
+ o.Tighten()
+ o.Contradictions()
+ // parallel + serial
+ o.Tidy()
+ // purely serial
+ o.Flatten()
+
+ depth := o.root.Depth()
+ if depth == oldDepth {
+ break
+ } else {
+ oldDepth = depth
+ }
+ }
+}
+
// Perform optimizations in parallel. They should **NOT** mutate the tree
func (o Optimizer) parallel(optimize func(*Clause)) {
jobs := make(chan *Clause, o.workers)
@@ -131,6 +163,12 @@ func (o *Optimizer) Flatten() {
})
}
+// Remove multiples of equivalent statements within the same clause
+//
+// Examples
+//
+// (and a="Fred Flinstone" a="Fred Flinstone") --> (and a="Fred Flinstone")
+// (or a=Shaggy -a!=Shaggy) --> (or a=Shaggy)
func (o *Optimizer) Compact() {
o.parallel(func(c *Clause) {
c.Statements = slices.CompactFunc(c.Statements, StatementEq)
diff --git a/pkg/query/parser.go b/pkg/query/parser.go
index 178665d..c52530e 100644
--- a/pkg/query/parser.go
+++ b/pkg/query/parser.go
@@ -322,6 +322,35 @@ func (s Statements) OperatorPartition() iter.Seq2[opType, Statements] {
}
}
+// Partition statements by their negated status without copying, similar to
+// CategoryPartition and OperatorPartition
+func (s Statements) NegatedPartition() iter.Seq2[bool, Statements] {
+ if !slices.IsSortedFunc(s, StatementCmp) {
+ slices.SortFunc(s, StatementCmp)
+ }
+
+ return func(yield func(bool, Statements) bool) {
+ firstNegated := -1
+ for i, stmt := range s {
+ if stmt.Negated {
+ firstNegated = i
+ }
+ }
+
+ if firstNegated > 0 {
+ if !yield(false, s[:firstNegated]) {
+ return
+ } else if !yield(true, s[firstNegated:]) {
+ return
+ }
+ } else {
+ if !yield(false, s) {
+ return
+ }
+ }
+ }
+}
+
func (c Clause) String() string {
b := &strings.Builder{}
c.buildString(b, 0)
@@ -459,7 +488,7 @@ func Parse(tokens []Token) (*Clause, error) {
}
clause.Operator = COP_OR
case TOK_OP_NEG:
- if !prevToken.Type.Any(TOK_CLAUSE_OR, TOK_CLAUSE_AND, TOK_VAL_STR, TOK_VAL_DATETIME) {
+ if !prevToken.Type.Any(TOK_CLAUSE_OR, TOK_CLAUSE_AND, TOK_VAL_STR, TOK_VAL_DATETIME, TOK_CLAUSE_END) {
return nil, &TokenError{
got: token,
gotPrev: prevToken,
diff --git a/pkg/shell/interpreter.go b/pkg/shell/interpreter.go
index 6c97619..cf1454a 100644
--- a/pkg/shell/interpreter.go
+++ b/pkg/shell/interpreter.go
@@ -55,6 +55,7 @@ const (
ITOK_CMD_SLICE
ITOK_CMD_REMATCH
ITOK_CMD_REPATTERN
+ ITOK_CMD_LVL_OPTIMIZE
ITOK_CMD_OPTIMIZE
ITOK_CMD_TOKENIZE
ITOK_CMD_PARSE
@@ -88,6 +89,7 @@ var commands = []string{
"tokenize",
"parse",
"compile",
+ "optimize_",
}
func NewInterpreter(initialState State, env map[string]string, workers uint) *Interpreter {
@@ -207,7 +209,10 @@ out:
}
return false, fmt.Errorf("No variable %s%s", tokens[1].Text, suggestionText)
}
+ fmt.Fprintln(w, carryValue)
+ carryValue.Type = VAL_INVALID
}
+ break out
case ITOK_CMD_REMATCH:
if carryValue.Type != VAL_STRING {
return false, fmt.Errorf("Unable to match against argument of type %s", carryValue.Type)
@@ -261,6 +266,25 @@ out:
}
carryValue.Type = VAL_CLAUSE
carryValue.Val = clause
+ case ITOK_CMD_LVL_OPTIMIZE:
+ if carryValue.Type != VAL_CLAUSE {
+ return false, fmt.Errorf("Unable to optimize argument of type: %s", carryValue)
+ }
+
+ level, err := strconv.Atoi(t.Text)
+ if err != nil {
+ return false, fmt.Errorf("Cannot parse optimization level %s", t.Text)
+ }
+
+ clause, ok := carryValue.Val.(*query.Clause)
+ if !ok {
+ return true, errors.New("Type corruption during optimization, expected *query.Clause")
+ }
+ o := query.NewOptimizer(clause, inter.Workers)
+ o.Optimize(level)
+
+ carryValue.Type = VAL_CLAUSE
+ carryValue.Val = clause
case ITOK_CMD_OPTIMIZE:
if carryValue.Type != VAL_CLAUSE {
return false, fmt.Errorf("Unable to optimize argument of type: %s", carryValue)
@@ -315,14 +339,13 @@ out:
return true, errors.New("Type corruption during compilation, expected *query.Clause")
}
- query, params, err := clause.Compile()
+ artifact, err := clause.Compile()
if err != nil {
return false, err
}
- fmt.Fprintf(w, "query:\n%s\n--------\nparams:\n%s\n", query, params)
- carryValue.Type = VAL_INVALID
- break out
+ carryValue.Val = artifact
+ carryValue.Type = VAL_ARTIFACT
case ITOK_VAR_NAME:
// NOTE: very brittle, only allows expansion of a single variable
if i == len(tokens)-1 {
@@ -437,6 +460,8 @@ func (inter Interpreter) Tokenize(line string) []IToken {
tokens = append(tokens, IToken{Type: ITOK_CMD_TOKENIZE})
} else if trimmedWord == "parse" {
tokens = append(tokens, IToken{Type: ITOK_CMD_PARSE})
+ } else if l := len("optimize_"); len(trimmedWord) > l && trimmedWord[:l] == "optimize_" {
+ tokens = append(tokens, IToken{ITOK_CMD_LVL_OPTIMIZE, trimmedWord[l:]})
} else if l := len("env_"); len(trimmedWord) > l && trimmedWord[:l] == "env_" {
tokens = append(tokens, IToken{ITOK_CMD_ENV, trimmedWord[l:]})
} else if l := len("opt_"); len(trimmedWord) > l && trimmedWord[:l] == "opt_" {
@@ -463,7 +488,7 @@ func (inter Interpreter) Tokenize(line string) []IToken {
} else {
tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord})
}
- } else if prevType == ITOK_CMD_PARSE || prevType == ITOK_CMD_OPTIMIZE || prevType == ITOK_CMD_COMPILE {
+ } else if prevType == ITOK_CMD_PARSE || prevType == ITOK_CMD_OPTIMIZE || prevType == ITOK_CMD_LVL_OPTIMIZE || prevType == ITOK_CMD_COMPILE {
tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord})
} else if prevType == ITOK_VAR_NAME && trimmedWord[0] == '`' {
_, strLiteral, _ := strings.Cut(word, "`")
@@ -497,7 +522,8 @@ func printHelp(w io.Writer) {
fmt.Fprintln(w, "tokenize (string|name) - tokenize a string")
fmt.Fprintln(w, " ex. tokenize `author:me")
fmt.Fprintln(w, "parse (tokens|name) - parse tokens into a clause")
- fmt.Fprintln(w, "opt_<subcommand> (clause|name) - optimize clause tree")
+ fmt.Fprintln(w, "optimize_<level> (clause|name) - optimize clause tree to <level>")
+ fmt.Fprintln(w, "opt_<subcommand> (clause|name) - apply specific optimization to clause tree")
fmt.Fprintln(w, " sort - sort statements")
fmt.Fprintln(w, " flatten - flatten clauses")
fmt.Fprintln(w, " compact - compact equivalent statements")
diff --git a/pkg/shell/state.go b/pkg/shell/state.go
index bd7ecc8..5acdaf1 100644
--- a/pkg/shell/state.go
+++ b/pkg/shell/state.go
@@ -1,9 +1,7 @@
package shell
import (
- "errors"
"fmt"
- "os"
"strings"
"github.com/jpappel/atlas/pkg/query"
@@ -17,6 +15,7 @@ const (
VAL_STRING
VAL_TOKENS
VAL_CLAUSE
+ VAL_ARTIFACT
)
type Value struct {
@@ -38,6 +37,8 @@ func (t ValueType) String() string {
return "Tokens"
case VAL_CLAUSE:
return "Clause"
+ case VAL_ARTIFACT:
+ return "Compilation Artifact"
default:
return "Unkown"
}
@@ -48,27 +49,33 @@ func (v Value) String() string {
case VAL_INT:
i, ok := v.Val.(int)
if !ok {
- return "Corrupted Type (expected int)"
+ panic("Corrupted Type (expected int)")
}
return fmt.Sprint(i)
case VAL_STRING:
s, ok := v.Val.(string)
if !ok {
- return "Corrupted Type (expected string)"
+ panic("Corrupted Type (expected string)")
}
return s
case VAL_TOKENS:
ts, ok := v.Val.([]query.Token)
if !ok {
- return "Corrupted Type (expected []query.Token)"
+ panic("Corrupted Type (expected []query.Token)")
}
return query.TokensStringify(ts)
case VAL_CLAUSE:
- rootClause, ok := v.Val.(*query.Clause)
+ clause, ok := v.Val.(*query.Clause)
if !ok {
- return "Corrupted Type (expected *query.Clause)"
+ panic("Corrupted Type (expected *query.Clause)")
}
- return rootClause.String()
+ return clause.String()
+ case VAL_ARTIFACT:
+ artifact, ok := v.Val.(query.CompilationArtifact)
+ if !ok {
+ panic("Corrupted Type (expected query.CompilationArtifact)")
+ }
+ return artifact.String()
case VAL_INVALID:
return "Invalid"
}
@@ -92,6 +99,8 @@ func (s State) String() string {
b.WriteString(" Tokens")
case VAL_CLAUSE:
b.WriteString(" Clause")
+ case VAL_ARTIFACT:
+ b.WriteString(" Artifact")
default:
fmt.Fprintf(&b, " Unknown (%d)", v.Val)
}
@@ -100,69 +109,3 @@ func (s State) String() string {
return b.String()
}
-
-func (s State) CmdTokenize(input string) (Value, bool) {
- if len(input) == 0 {
- return Value{}, false
- }
-
- var rawQuery string
- if input[0] == '`' {
- rawQuery = input[1:]
- } else {
- variable, ok := s[input]
- if !ok {
- fmt.Fprintln(os.Stderr, "Cannot tokenize: no variable with name", input)
- return Value{}, false
- } else if variable.Type != VAL_STRING {
- fmt.Fprintln(os.Stderr, "Cannot tokenize: variable is not a string")
- return Value{}, false
- }
-
- rawQuery, ok = variable.Val.(string)
- if !ok {
- fmt.Fprintln(os.Stderr, "Cannot tokenize: type corruption")
- fmt.Fprintln(os.Stderr, "Type corruption, expected string")
- panic("Type corruption")
- }
- }
- tokens := query.Lex(rawQuery)
- return Value{VAL_TOKENS, tokens}, true
-}
-
-func (s State) CmdParse(args string) (Value, error) {
- if len(args) == 0 {
- return Value{}, errors.New("no arguments for parse")
- }
-
- var tokens []query.Token
- if tokenizeArgs, found := strings.CutPrefix(args, "tokenize "); found {
- val, ok := s.CmdTokenize(tokenizeArgs)
- if !ok {
- return Value{}, errors.New("error occured during tokenization")
- }
- tokens = val.Val.([]query.Token)
- } else {
- variable, ok := s[args]
- if !ok {
- fmt.Fprintln(os.Stderr, "Cannot parse: no variable with name", args)
- return Value{}, errors.New("variable does not exist")
- } else if variable.Type != VAL_TOKENS {
- fmt.Fprintln(os.Stderr, "Cannot parse: variable is not []query.Tokens")
- return Value{}, errors.New("bad variable type")
- }
-
- tokens, ok = variable.Val.([]query.Token)
- if !ok {
- fmt.Fprintln(os.Stderr, "Cannot parse: type corruption")
- fmt.Fprintln(os.Stderr, "Type corruption, expected []query.Tokens")
- panic("Type corruption")
- }
- }
-
- clause, err := query.Parse(tokens)
- if err != nil {
- return Value{}, err
- }
- return Value{VAL_CLAUSE, *clause}, err
-}