From 5d30265a850ab647c89e0cae0ffce44ae3e1ce48 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Thu, 26 Jun 2025 01:10:13 -0400 Subject: Move category and operator iterators --- cmd/atlas.go | 4 ++-- pkg/query/optimizer.go | 30 +++-------------------- pkg/query/parser.go | 65 ++++++++++++++++++++++++++++++++++++++++++++++---- 3 files changed, 66 insertions(+), 33 deletions(-) diff --git a/cmd/atlas.go b/cmd/atlas.go index 916e459..b0ec3af 100644 --- a/cmd/atlas.go +++ b/cmd/atlas.go @@ -211,8 +211,8 @@ func main() { env := make(map[string]string) env["workers"] = fmt.Sprint(globalFlags.NumWorkers) - env["db path"] = globalFlags.DBPath - env["index root"] = globalFlags.IndexRoot + env["db_path"] = globalFlags.DBPath + env["index_root"] = globalFlags.IndexRoot env["version"] = "0.0.1" interpreter := shell.NewInterpreter(state, env, globalFlags.NumWorkers) diff --git a/pkg/query/optimizer.go b/pkg/query/optimizer.go index 4e8caa5..c203b9c 100644 --- a/pkg/query/optimizer.go +++ b/pkg/query/optimizer.go @@ -2,7 +2,6 @@ package query import ( "fmt" - "iter" "os" "slices" "strings" @@ -90,29 +89,6 @@ func (o *Optimizer) serial(optimize func(*Clause)) { } } -// Partition statements by their category without copying (slices clause.Statements) -func (o *Optimizer) partitionStatemements(clause *Clause) iter.Seq2[catType, Statements] { - return func(yield func(catType, Statements) bool) { - var category, lastCategory catType - var lastCategoryStart int - for i, stmt := range clause.Statements { - category = stmt.Category - if category != lastCategory { - if !yield(lastCategory, clause.Statements[lastCategoryStart:i]) { - return - } - lastCategoryStart = i - } - lastCategory = category - } - - // handle leftover - if !yield(category, clause.Statements[lastCategoryStart:]) { - return - } - } -} - func (o *Optimizer) SortStatements() { o.parallel(func(c *Clause) { slices.SortFunc(c.Statements, StatementCmp) @@ -231,7 +207,7 @@ func (o *Optimizer) Contradictions() { o.parallel(func(c *Clause) { removals := make(map[int]bool, 8) var isContradiction func(s1, s2 Statement) bool - for category, stmts := range o.partitionStatemements(c) { + for category, stmts := range c.Statements.CategoryPartition() { if c.Operator == COP_AND && !category.IsSet() { isContradiction = func(s1, s2 Statement) bool { return (s1.Operator == OP_EQ && s1.Operator == s2.Operator) || inverseEq(s1, s2) @@ -283,7 +259,7 @@ func (o Optimizer) StrictEquality() { } stricts := make([]string, 0) - for category, stmts := range o.partitionStatemements(c) { + for category, stmts := range c.Statements.CategoryPartition() { if category.IsSet() { clear(stricts) for i, s := range stmts { @@ -328,7 +304,7 @@ func (o *Optimizer) Tighten() { } o.parallel(func(c *Clause) { - for category, stmts := range o.partitionStatemements(c) { + for category, stmts := range c.Statements.CategoryPartition() { if len(stmts) < 2 { continue } diff --git a/pkg/query/parser.go b/pkg/query/parser.go index 2bfc014..65d678a 100644 --- a/pkg/query/parser.go +++ b/pkg/query/parser.go @@ -4,6 +4,7 @@ import ( "fmt" "iter" "os" + "slices" "strings" "time" @@ -254,6 +255,61 @@ func (s *Statement) Simplify() { } } +// Partition statements by their category without copying +// +// Requires sorted slice! +func (s Statements) CategoryPartition() iter.Seq2[catType, Statements] { + if !slices.IsSortedFunc(s, StatementCmp) { + slices.SortFunc(s, StatementCmp) + } + + return func(yield func(catType, Statements) bool) { + var category, lastCategory catType + var lastCategoryStart int + for i, stmt := range s { + category = stmt.Category + if category != lastCategory { + if !yield(lastCategory, s[lastCategoryStart:i]) { + return + } + lastCategoryStart = i + } + lastCategory = category + } + + // handle leftover + if !yield(category, s[lastCategoryStart:]) { + return + } + } +} + +// Partition statemetns by their operator without copying, similar to +// CategoryPartition. +func (s Statements) OperatorPartition() iter.Seq2[opType, Statements] { + if !slices.IsSortedFunc(s, StatementCmp) { + slices.SortFunc(s, StatementCmp) + } + + return func(yield func(opType, Statements) bool) { + var op, lastOp opType + var lastOpStart int + for i, stmt := range s { + op = stmt.Operator + if op != lastOp { + if !yield(lastOp, s[lastOpStart:i]) { + return + } + } + lastOp = op + } + + if !yield(op, s[lastOpStart:]) { + return + } + } +} + func (c Clause) String() string { b := &strings.Builder{} c.buildString(b, 0) @@ -263,18 +319,19 @@ func (c Clause) String() string { func (c Clause) buildString(b *strings.Builder, level int) { writeIndent(b, level) b.WriteByte('(') - if c.Operator == COP_AND { + switch c.Operator { + case COP_AND: b.WriteString("and") - } else if c.Operator == COP_OR { + case COP_OR: b.WriteString("or") - } else { + default: b.WriteString("unknown_op") } b.WriteByte('\n') for _, stmt := range c.Statements { writeIndent(b, level+1) - b.WriteString(fmt.Sprintf("%+v", stmt)) + fmt.Fprintf(b, "%+v", stmt) b.WriteByte('\n') } -- cgit v1.2.3