From e477518bd00b238808ea41b3f2fe829dbcac3b0c Mon Sep 17 00:00:00 2001 From: JP Appel Date: Tue, 24 Jun 2025 17:24:32 -0400 Subject: Add tests and fix minor bugs for optimizier --- pkg/query/optimizer.go | 13 +- pkg/query/optimizer_test.go | 404 +++++++++++++++++++++++++++++++++++++++++--- pkg/util/util.go | 1 - 3 files changed, 392 insertions(+), 26 deletions(-) (limited to 'pkg') diff --git a/pkg/query/optimizer.go b/pkg/query/optimizer.go index 8b99a64..4e8caa5 100644 --- a/pkg/query/optimizer.go +++ b/pkg/query/optimizer.go @@ -1,7 +1,9 @@ package query import ( + "fmt" "iter" + "os" "slices" "strings" "sync" @@ -286,9 +288,10 @@ func (o Optimizer) StrictEquality() { clear(stricts) for i, s := range stmts { val := strings.ToLower(s.Value.(StringValue).S) - if s.Operator == OP_EQ { + switch s.Operator { + case OP_EQ: stricts = append(stricts, val) - } else if s.Operator == OP_AP { + case OP_AP: if slices.ContainsFunc(stricts, func(strictStr string) bool { return strings.Contains(strictStr, val) || strings.Contains(val, strictStr) }) { @@ -420,6 +423,7 @@ func (o *Optimizer) Tighten() { } } if minIdx != -1 { + o.isSorted = false start, stop := minIdx, maxIdx if minS := stmts[minIdx]; minS.Operator == OP_GE || minS.Operator == OP_GT { start++ @@ -440,8 +444,11 @@ func (o *Optimizer) Tighten() { for j, s2 := range util.FilterIter(stmts[i+1:], func(s Statement) bool { return s.Operator == OP_AP }) { val2 := strings.ToLower(s2.Value.(StringValue).S) if strings.Contains(val2, val1) { - removals[j] = true + fmt.Fprintf(os.Stderr, "%s > %s\nRemoving %s\n", val2, val1, val2) + // NOTE: slicing stmts offsets the all indices by 1, hence the correction + removals[j+1] = true } else if strings.Contains(val1, val2) { + fmt.Fprintf(os.Stderr, "%s > %s\nRemoving %s\n", val1, val2, val1) removals[i] = true } } diff --git a/pkg/query/optimizer_test.go b/pkg/query/optimizer_test.go index 00710f6..1826c81 100644 --- a/pkg/query/optimizer_test.go +++ b/pkg/query/optimizer_test.go @@ -4,10 +4,42 @@ import ( "runtime" "slices" "testing" + "time" "github.com/jpappel/atlas/pkg/query" ) +var WORKERS = uint(runtime.NumCPU()) + +func clauseEqTest(t *testing.T, gotClause *query.Clause, wantClause *query.Clause) { + t.Helper() + o1 := query.NewOptimizer(gotClause, WORKERS) + o1.SortStatements() + o2 := query.NewOptimizer(wantClause, WORKERS) + o2.SortStatements() + + got := slices.Collect(gotClause.DFS()) + want := slices.Collect(wantClause.DFS()) + gotL, wantL := len(got), len(want) + if gotL != wantL { + // only happens if written test case incorrectly + t.Errorf("Different number of clauses: got %d want %d", gotL, wantL) + } + for i := range min(gotL, wantL) { + gotClause, wantClause := got[i], want[i] + + if gOp, wOp := gotClause.Operator, wantClause.Operator; gOp != wOp { + t.Errorf("Different operator for clause %d: want %v, got %v", i, gOp, wOp) + } + + if !slices.Equal(gotClause.Statements, wantClause.Statements) { + t.Errorf("Different statements for clause %d", i) + t.Log("Got", gotClause.Statements) + t.Log("Want", wantClause.Statements) + } + } +} + func TestClause_Flatten(t *testing.T) { tests := []struct { name string @@ -164,9 +196,8 @@ func TestClause_Flatten(t *testing.T) { }, } for _, tt := range tests { - workers := uint(runtime.NumCPU()) t.Run(tt.name, func(t *testing.T) { - o := query.NewOptimizer(tt.root, workers) + o := query.NewOptimizer(tt.root, WORKERS) o.Flatten() slices.SortFunc(tt.root.Statements, query.StatementCmp) @@ -258,32 +289,361 @@ func TestOptimizer_Compact(t *testing.T) { }, } for _, tt := range tests { - workers := uint(runtime.NumCPU()) t.Run(tt.name, func(t *testing.T) { - o := query.NewOptimizer(tt.c, workers) + o := query.NewOptimizer(tt.c, WORKERS) o.Compact() - got := slices.Collect(tt.c.DFS()) - want := slices.Collect(tt.want.DFS()) - gotL, wantL := len(got), len(want) - if gotL != wantL { - // only happens if written test case incorrectly - t.Errorf("Different number of clauses: got %d want %d", gotL, wantL) - } + clauseEqTest(t, tt.c, &tt.want) + }) + } +} - for i := range min(gotL, wantL) { - gotClause, wantClause := got[i], want[i] +func TestOptimizer_Tidy(t *testing.T) { + tests := []struct { + name string + c *query.Clause + want query.Clause + }{ + { + "already tidy", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"manufacturing"}}, + }, + Clauses: []*query.Clause{ + {Operator: query.COP_OR, Statements: []query.Statement{ + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Chomsky, Noam"}}, + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Noam Chomsky"}}, + }}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"manufacturing"}}, + }, + Clauses: []*query.Clause{ + {Operator: query.COP_OR, Statements: []query.Statement{ + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Chomsky, Noam"}}, + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Noam Chomsky"}}, + }}, + }, + }, + }, + { + "top level tidy", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"manufacturing"}}, + }, + Clauses: []*query.Clause{ + {Operator: query.COP_OR, Statements: []query.Statement{ + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Chomsky, Noam"}}, + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Noam Chomsky"}}, + {}, + {Category: 2 << 16}, + }}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"manufacturing"}}, + }, + Clauses: []*query.Clause{ + {Operator: query.COP_OR, Statements: []query.Statement{ + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Chomsky, Noam"}}, + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Noam Chomsky"}}, + }}, + }, + }, + }, + { + "nested tidy", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"industry"}}, + {true, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Alan Dersowitz"}}, + }, + Clauses: []*query.Clause{ + {Operator: query.COP_OR, Statements: []query.Statement{ + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Finkelstein, Norman"}}, + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Norman Finkelstein"}}, + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Norm Finkelstein"}}, + {}, + {Category: CAT_META + 1}, + }}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: query.CAT_TITLE, Operator: query.OP_AP, Value: query.StringValue{"industry"}}, + {true, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Alan Dersowitz"}}, + }, + Clauses: []*query.Clause{ + {Operator: query.COP_OR, Statements: []query.Statement{ + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Finkelstein, Norman"}}, + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Norman Finkelstein"}}, + {false, query.CAT_AUTHOR, query.OP_EQ, query.StringValue{"Norm Finkelstein"}}, + }}, + }, + }, + }, + } - if gOp, wOp := gotClause.Operator, wantClause.Operator; gOp != wOp { - t.Errorf("Different operator for clause %d: want %v, got %v", i, gOp, wOp) - } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + o := query.NewOptimizer(tt.c, WORKERS) + o.Tidy() + clauseEqTest(t, tt.c, &tt.want) + }) + } +} - if !slices.Equal(gotClause.Statements, wantClause.Statements) { - t.Errorf("Different statements for clause %d", i) - t.Log("Got", gotClause.Statements) - t.Log("Want", wantClause.Statements) - } - } +func TestOptimizer_Contradictions(t *testing.T) { + tests := []struct { + name string + c *query.Clause + want query.Clause + }{ + { + "two equals", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"carnival"}}, + {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"carnivale"}}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + }, + }, + { + "equal and not equal", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"apple"}}, + {Negated: true, Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"apple"}}, + {Category: CAT_TITLE, Operator: OP_NE, Value: query.StringValue{"apple"}}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + }, + }, + { + "set contradiction", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"topology"}}, + {Category: CAT_TAGS, Operator: OP_NE, Value: query.StringValue{"topology"}}, + }, + Clauses: []*query.Clause{{ + Operator: query.COP_OR, + Statements: []query.Statement{ + {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"algebra"}}, + {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"differential"}}, + {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"geometric"}}, + }, + }}, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{}, + Clauses: []*query.Clause{{ + Operator: query.COP_OR, + Statements: []query.Statement{ + {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"algebra"}}, + {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"differential"}}, + {Category: CAT_TAGS, Operator: OP_EQ, Value: query.StringValue{"geometric"}}, + }, + }}, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + o := query.NewOptimizer(tt.c, WORKERS) + o.Contradictions() + o.Tidy() + + clauseEqTest(t, tt.c, &tt.want) + }) + } +} + +func TestOptimizer_StrictEquality(t *testing.T) { + tests := []struct { + name string + c *query.Clause + want query.Clause + }{ + { + "non-range, non-set", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"notes"}}, + {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"monday standup"}}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_TITLE, Operator: OP_EQ, Value: query.StringValue{"notes"}}, + }, + }, + }, + { + "set", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alonzo Church"}}, + {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alan Turing"}}, + {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"turing"}}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alonzo Church"}}, + {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alan Turing"}}, + }, + }, + }, + { + "set, no strict eq", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alonzo Church"}}, + {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alan Turing"}}, + {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"djikstra"}}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alonzo Church"}}, + {Category: CAT_AUTHOR, Operator: OP_EQ, Value: query.StringValue{"Alan Turing"}}, + {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"djikstra"}}, + }, + }, + }, + { + "dates", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_DATE, Operator: OP_EQ, Value: query.DatetimeValue{time.Date(1886, time.May, 1, 0, 0, 0, 0, time.UTC)}}, + {Category: CAT_DATE, Operator: OP_GE, Value: query.DatetimeValue{time.Date(1880, time.January, 1, 0, 0, 0, 0, time.UTC)}}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_DATE, Operator: OP_EQ, Value: query.DatetimeValue{time.Date(1886, time.May, 1, 0, 0, 0, 0, time.UTC)}}, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + o := query.NewOptimizer(tt.c, WORKERS) + o.StrictEquality() + o.Tidy() + + clauseEqTest(t, tt.c, &tt.want) + }) + } +} + +func TestOptimizer_Tighten(t *testing.T) { + tests := []struct { + name string + c *query.Clause + want query.Clause + }{ + { + "dates or", + &query.Clause{ + Operator: query.COP_OR, + Statements: []query.Statement{ + {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)}}, + {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 2, 2, 0, 0, 0, 0, time.UTC)}}, + }, + }, + query.Clause{ + Operator: query.COP_OR, + Statements: []query.Statement{ + {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)}}, + }, + }, + }, + { + "dates and", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)}}, + {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 2, 2, 0, 0, 0, 0, time.UTC)}}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: query.CAT_DATE, Operator: query.OP_GT, Value: query.DatetimeValue{time.Date(2025, 2, 2, 0, 0, 0, 0, time.UTC)}}, + }, + }, + }, + { + "nonordered or", + &query.Clause{ + Operator: query.COP_OR, + Statements: []query.Statement{ + {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital I"}}, + {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital"}}, + }, + }, + query.Clause{ + Operator: query.COP_OR, + Statements: []query.Statement{ + {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital"}}, + }, + }, + }, + { + "nonordered and", + &query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital I"}}, + {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital"}}, + }, + }, + query.Clause{ + Operator: query.COP_AND, + Statements: []query.Statement{ + {Category: CAT_TITLE, Operator: OP_AP, Value: query.StringValue{"Das Kapital I"}}, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + o := query.NewOptimizer(tt.c, WORKERS) + o.Tighten() + o.Tidy() + + clauseEqTest(t, tt.c, &tt.want) }) } } diff --git a/pkg/util/util.go b/pkg/util/util.go index a525bca..0ed64b8 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -2,7 +2,6 @@ package util import ( "iter" - "slices" "time" ) -- cgit v1.2.3