aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/query
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-06-24 17:24:32 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-06-24 17:24:32 -0400
commite477518bd00b238808ea41b3f2fe829dbcac3b0c (patch)
tree413846bbf1bb732a9c5864b8293b4d86d133dfba /pkg/query
parent21d72b2f67fb065d9071907c5c3307434aad3795 (diff)
Add tests and fix minor bugs for optimizier
Diffstat (limited to 'pkg/query')
-rw-r--r--pkg/query/optimizer.go13
-rw-r--r--pkg/query/optimizer_test.go404
2 files changed, 392 insertions, 25 deletions
diff --git a/pkg/query/optimizer.go b/pkg/query/optimizer.go
index 8b99a64..4e8caa5 100644
--- a/pkg/query/optimizer.go
+++ b/pkg/query/optimizer.go
@@ -1,7 +1,9 @@
package query
import (
+ "fmt"
"iter"
+ "os"
"slices"
"strings"
"sync"
@@ -286,9 +288,10 @@ func (o Optimizer) StrictEquality() {
clear(stricts)
for i, s := range stmts {
val := strings.ToLower(s.Value.(StringValue).S)
- if s.Operator == OP_EQ {
+ switch s.Operator {
+ case OP_EQ:
stricts = append(stricts, val)
- } else if s.Operator == OP_AP {
+ case OP_AP:
if slices.ContainsFunc(stricts, func(strictStr string) bool {
return strings.Contains(strictStr, val) || strings.Contains(val, strictStr)
}) {
@@ -420,6 +423,7 @@ func (o *Optimizer) Tighten() {
}
}
if minIdx != -1 {
+ o.isSorted = false
start, stop := minIdx, maxIdx
if minS := stmts[minIdx]; minS.Operator == OP_GE || minS.Operator == OP_GT {
start++
@@ -440,8 +444,11 @@ func (o *Optimizer) Tighten() {
for j, s2 := range util.FilterIter(stmts[i+1:], func(s Statement) bool { return s.Operator == OP_AP }) {
val2 := strings.ToLower(s2.Value.(StringValue).S)
if strings.Contains(val2, val1) {
- removals[j] = true
+ fmt.Fprintf(os.Stderr, "%s > %s\nRemoving %s\n", val2, val1, val2)
+ // NOTE: slicing stmts offsets the all indices by 1, hence the correction
+ removals[j+1] = true
} else if strings.Contains(val1, val2) {
+ fmt.Fprintf(os.Stderr, "%s > %s\nRemoving %s\n", val1, val2, val1)
removals[i] = true
}
}
diff --git a/pkg/query/optimizer_test.go b/pkg/query/optimizer_test.go
index 00710f6..1826c81 100644
--- a/pkg/query/optimizer_test.go
+++ b/pkg/query/optimizer_test.go
@@ -4,10 +4,42 @@ import (
"runtime"
"slices"
"testing"
+ "time"
"github.com/jpappel/atlas/pkg/query"
)
+var WORKERS = uint(runtime.NumCPU())
+
+func clauseEqTest(t *testing.T, gotClause *query.Clause, wantClause *query.Clause) {
+ t.Helper()
+ o1 := query.NewOptimizer(gotClause, WORKERS)
+ o1.SortStatements()
+ o2 := query.NewOptimizer(wantClause, WORKERS)
+ o2.SortStatements()
+
+ got := slices.Collect(gotClause.DFS())
+ want := slices.Collect(wantClause.DFS())
+ gotL, wantL := len(got), len(want)
+ if gotL != wantL {
+ // only happens if written test case incorrectly
+ t.Errorf("Different number of clauses: got %d want %d", gotL, wantL)
+ }
+ for i := range min(gotL, wantL) {
+ gotClause, wantClause := got[i], want[i]
+
+ if gOp, wOp := gotClause.Operator, wantClause.Operator; gOp != wOp {
+ t.Errorf("Different operator for clause %d: want %v, got %v", i, gOp, wOp)
+ }
+
+ if !slices.Equal(gotClause.Statements, wantClause.Statements) {
+ t.Errorf("Different statements for clause %d", i)
+ t.Log("Got", gotClause.Statements)
+ t.Log("Want", wantClause.Statements)
+ }
+ }
+}
+
func TestClause_Flatten(t *testing.T) {
tests := []struct {
name string
@@ -164,9 +196,8 @@ func TestClause_Flatten(t *testing.T) {
},
}
for _, tt := range tests {
- workers := uint(runtime.NumCPU())
t.Run(tt.name, func(t *testing.T) {
- o := query.NewOptimizer(tt.root, workers)
+ o := query.NewOptimizer(tt.root, WORKERS)
o.Flatten()
slices.SortFunc(tt.root.Statements, query.StatementCmp)
@@ -258,32 +289,361 @@ func TestOptimizer_Compact(t *testing.T) {
},
}
for _, tt := range tests {
- workers := uint(runtime.NumCPU())
t.Run(tt.name, func(t *testing.T) {
- o := query.NewOptimizer(tt.c, workers)
+ o := query.NewOptimizer(tt.c, WORKERS)
o.Compact()
- got := slices.Collect(tt.c.DFS())
- want := slices.Collect(tt.want.DFS())
- gotL, wantL := len(got), len(want)
- if gotL != wantL {
- // only happens if written test case incorrectly
- t.Errorf("Different number of clauses: got %d want %d", gotL, wantL)
- }
+ clauseEqTest(t, tt.c, &tt.want)
+ })
+ }
+}
- for i := range min(gotL, wantL) {
- gotClause, wantClause := got[i], want[i]
+func TestOptimizer_Tidy(t *testing.T) {
+ tests := []struct {
+ name string
+ c *query.Clause
+ want query.Clause
+ }{
+ {
+ "already tidy",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"manufacturing"}},
+ },
+ Clauses: []*query.Clause{
+ {Operator: query.COP_OR, Statements: []query.Statement{
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Chomsky, Noam"}},
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Noam Chomsky"}},
+ }},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"manufacturing"}},
+ },
+ Clauses: []*query.Clause{
+ {Operator: query.COP_OR, Statements: []query.Statement{
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Chomsky, Noam"}},
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Noam Chomsky"}},
+ }},
+ },
+ },
+ },
+ {
+ "top level tidy",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"manufacturing"}},
+ },
+ Clauses: []*query.Clause{
+ {Operator: query.COP_OR, Statements: []query.Statement{
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Chomsky, Noam"}},
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Noam Chomsky"}},
+ {},
+ {Category: 2 << 16},
+ }},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"manufacturing"}},
+ },
+ Clauses: []*query.Clause{
+ {Operator: query.COP_OR, Statements: []query.Statement{
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Chomsky, Noam"}},
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Noam Chomsky"}},
+ }},
+ },
+ },
+ },
+ {
+ "nested tidy",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"industry"}},
+ {true, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Alan Dersowitz"}},
+ },
+ Clauses: []*query.Clause{
+ {Operator: query.COP_OR, Statements: []query.Statement{
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Finkelstein, Norman"}},
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Norman Finkelstein"}},
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Norm Finkelstein"}},
+ {},
+ {Category: CAT_META + 1},
+ }},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"industry"}},
+ {true, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Alan Dersowitz"}},
+ },
+ Clauses: []*query.Clause{
+ {Operator: query.COP_OR, Statements: []query.Statement{
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Finkelstein, Norman"}},
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Norman Finkelstein"}},
+ {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Norm Finkelstein"}},
+ }},
+ },
+ },
+ },
+ }
- if gOp, wOp := gotClause.Operator, wantClause.Operator; gOp != wOp {
- t.Errorf("Different operator for clause %d: want %v, got %v", i, gOp, wOp)
- }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ o := query.NewOptimizer(tt.c, WORKERS)
+ o.Tidy()
+ clauseEqTest(t, tt.c, &tt.want)
+ })
+ }
+}
- if !slices.Equal(gotClause.Statements, wantClause.Statements) {
- t.Errorf("Different statements for clause %d", i)
- t.Log("Got", gotClause.Statements)
- t.Log("Want", wantClause.Statements)
- }
- }
+func TestOptimizer_Contradictions(t *testing.T) {
+ tests := []struct {
+ name string
+ c *query.Clause
+ want query.Clause
+ }{
+ {
+ "two equals",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"carnival"}},
+ {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"carnivale"}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ },
+ },
+ {
+ "equal and not equal",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"apple"}},
+ {Negated: true, Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"apple"}},
+ {Category: CAT_TITLE, Operator: OP_NE, Value: query.StringValue{"apple"}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ },
+ },
+ {
+ "set contradiction",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"topology"}},
+ {Category: CAT_TAGS, Operator: OP_NE, Value: query.StringValue{"topology"}},
+ },
+ Clauses: []*query.Clause{{
+ Operator: query.COP_OR,
+ Statements: []query.Statement{
+ {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"algebra"}},
+ {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"differential"}},
+ {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"geometric"}},
+ },
+ }},
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{},
+ Clauses: []*query.Clause{{
+ Operator: query.COP_OR,
+ Statements: []query.Statement{
+ {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"algebra"}},
+ {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"differential"}},
+ {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"geometric"}},
+ },
+ }},
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ o := query.NewOptimizer(tt.c, WORKERS)
+ o.Contradictions()
+ o.Tidy()
+
+ clauseEqTest(t, tt.c, &tt.want)
+ })
+ }
+}
+
+func TestOptimizer_StrictEquality(t *testing.T) {
+ tests := []struct {
+ name string
+ c *query.Clause
+ want query.Clause
+ }{
+ {
+ "non-range, non-set",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"notes"}},
+ {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"monday standup"}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"notes"}},
+ },
+ },
+ },
+ {
+ "set",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alonzo Church"}},
+ {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alan Turing"}},
+ {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"turing"}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alonzo Church"}},
+ {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alan Turing"}},
+ },
+ },
+ },
+ {
+ "set, no strict eq",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alonzo Church"}},
+ {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alan Turing"}},
+ {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"djikstra"}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alonzo Church"}},
+ {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alan Turing"}},
+ {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"djikstra"}},
+ },
+ },
+ },
+ {
+ "dates",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_DATE, Operator: OP_EQ, Value: query.DatetimeValue{time.Date(1886, time.May, 1, 0, 0, 0, 0, time.UTC)}},
+ {Category: CAT_DATE, Operator: OP_GE, Value: query.DatetimeValue{time.Date(1880, time.January, 1, 0, 0, 0, 0, time.UTC)}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_DATE, Operator: OP_EQ, Value: query.DatetimeValue{time.Date(1886, time.May, 1, 0, 0, 0, 0, time.UTC)}},
+ },
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ o := query.NewOptimizer(tt.c, WORKERS)
+ o.StrictEquality()
+ o.Tidy()
+
+ clauseEqTest(t, tt.c, &tt.want)
+ })
+ }
+}
+
+func TestOptimizer_Tighten(t *testing.T) {
+ tests := []struct {
+ name string
+ c *query.Clause
+ want query.Clause
+ }{
+ {
+ "dates or",
+ &query.Clause{
+ Operator: query.COP_OR,
+ Statements: []query.Statement{
+ {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)}},
+ {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 2, 2, 0, 0, 0, 0, time.UTC)}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_OR,
+ Statements: []query.Statement{
+ {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)}},
+ },
+ },
+ },
+ {
+ "dates and",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)}},
+ {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 2, 2, 0, 0, 0, 0, time.UTC)}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 2, 2, 0, 0, 0, 0, time.UTC)}},
+ },
+ },
+ },
+ {
+ "nonordered or",
+ &query.Clause{
+ Operator: query.COP_OR,
+ Statements: []query.Statement{
+ {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital I"}},
+ {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital"}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_OR,
+ Statements: []query.Statement{
+ {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital"}},
+ },
+ },
+ },
+ {
+ "nonordered and",
+ &query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital I"}},
+ {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital"}},
+ },
+ },
+ query.Clause{
+ Operator: query.COP_AND,
+ Statements: []query.Statement{
+ {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital I"}},
+ },
+ },
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ o := query.NewOptimizer(tt.c, WORKERS)
+ o.Tighten()
+ o.Tidy()
+
+ clauseEqTest(t, tt.c, &tt.want)
})
}
}