aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--pkg/query/compiler.go16
-rw-r--r--pkg/query/optimizer.go72
-rw-r--r--pkg/query/parser.go6
-rw-r--r--pkg/query/parser_test.go4
-rw-r--r--pkg/shell/interpreter.go68
-rw-r--r--pkg/util/util.go8
6 files changed, 121 insertions, 53 deletions
diff --git a/pkg/query/compiler.go b/pkg/query/compiler.go
index efdc3b4..dc6f93a 100644
--- a/pkg/query/compiler.go
+++ b/pkg/query/compiler.go
@@ -73,7 +73,7 @@ func (s Statements) buildCompile(b *strings.Builder, delim string) ([]any, error
if cat.IsOrdered() {
opStr = "BETWEEN "
} else {
- opStr = "LIKE "
+ opStr = "MATCH "
}
case OP_EQ:
if cat.IsSet() {
@@ -82,16 +82,12 @@ func (s Statements) buildCompile(b *strings.Builder, delim string) ([]any, error
opStr = "= "
}
case OP_GE:
- // NOTE: doesn't raise compiler error if operator used on invalid category
opStr = ">= "
case OP_GT:
- // NOTE: doesn't raise compiler error if operator used on invalid category
opStr = "> "
case OP_LE:
- // NOTE: doesn't raise compiler error if operator used on invalid category
opStr = "<= "
case OP_LT:
- // NOTE: doesn't raise compiler error if operator used on invalid category
opStr = "< "
case OP_RE:
opStr = "REGEXP "
@@ -156,10 +152,10 @@ func (s Statements) buildCompile(b *strings.Builder, delim string) ([]any, error
b.WriteString(opStr)
arg, ok := stmt.Value.buildCompile(b)
if ok {
- args = append(args, "%"+arg+"%")
+ args = append(args, arg)
}
if idx != len(opStmts)-1 {
- b.WriteString(" OR ")
+ b.WriteString(" " + delim + " ")
}
sCount++
idx++
@@ -200,11 +196,7 @@ func (s Statements) buildCompile(b *strings.Builder, delim string) ([]any, error
b.WriteString(opStr)
arg, ok := stmt.Value.buildCompile(b)
if ok {
- if op == OP_AP {
- args = append(args, "%"+arg+"%")
- } else {
- args = append(args, arg)
- }
+ args = append(args, arg)
}
b.WriteByte(' ')
if idx != len(opStmts)-1 {
diff --git a/pkg/query/optimizer.go b/pkg/query/optimizer.go
index 2cc610a..337ee35 100644
--- a/pkg/query/optimizer.go
+++ b/pkg/query/optimizer.go
@@ -9,6 +9,9 @@ import (
"github.com/jpappel/atlas/pkg/util"
)
+// FIXME: any substring checks on unorderd approximate statements will fail
+// this is because quotes are added to all approximate string values
+
type Optimizer struct {
workers uint
root *Clause
@@ -64,6 +67,7 @@ func (o Optimizer) Optimize(level int) {
o.Tighten()
o.Contradictions()
o.MergeRegex()
+ o.MergeApproximateMatches()
// parallel + serial
o.Tidy()
// purely serial
@@ -175,6 +179,7 @@ func (o *Optimizer) Compact() {
o.isSorted = false
}
+// Remove noop statements and clauses
func (o *Optimizer) Tidy() {
// ensure ordering
if !o.isSorted {
@@ -306,7 +311,7 @@ func (o Optimizer) StrictEquality() {
stricts = append(stricts, val)
case OP_AP:
if slices.ContainsFunc(stricts, func(strictStr string) bool {
- return strings.Contains(strictStr, val) || strings.Contains(val, strictStr)
+ return util.ContainsSliced(strictStr, val, 1, len(val)-1) || util.ContainsSliced(val, strictStr, 1, len(strictStr)-1)
}) {
stmts[i] = Statement{}
o.isSorted = false
@@ -327,7 +332,7 @@ func (o Optimizer) StrictEquality() {
})
}
-// Merge regular within a clause
+// Merge regular expressions within a clause
func (o *Optimizer) MergeRegex() {
if !o.isSorted {
o.SortStatements()
@@ -354,7 +359,7 @@ func (o *Optimizer) MergeRegex() {
}
for _, stmts := range opStmts.NegatedPartition() {
- if len(stmts) <= 1 {
+ if len(stmts) < 2 {
continue
}
sortChanged = true
@@ -387,6 +392,59 @@ func (o *Optimizer) MergeRegex() {
})
}
+func (o *Optimizer) MergeApproximateMatches() {
+ if !o.isSorted {
+ o.SortStatements()
+ }
+
+ pool := &sync.Pool{}
+ pool.New = func() any {
+ return &strings.Builder{}
+ }
+
+ o.parallel(func(c *Clause) {
+ var delim string
+ switch c.Operator {
+ case COP_AND:
+ delim = " AND "
+ case COP_OR:
+ delim = " OR "
+ }
+
+ b := pool.Get().(*strings.Builder)
+ defer pool.Put(b)
+ defer b.Reset()
+
+ changeSort := false
+ for category, catStmts := range c.Statements.CategoryPartition() {
+ if len(catStmts) < 2 || category.IsOrdered() {
+ continue
+ }
+ for op, opStmts := range catStmts.OperatorPartition() {
+ if op != OP_AP || len(opStmts) < 2 {
+ continue
+ }
+ changeSort = true
+ for i, stmt := range opStmts {
+ b.WriteString(stmt.Value.(StringValue).S)
+ if i != len(opStmts)-1 {
+ b.WriteString(delim)
+ }
+
+ if i != 0 {
+ opStmts[i] = Statement{}
+ }
+ }
+ opStmts[0].Value = StringValue{S: b.String()}
+ b.Reset()
+ }
+ }
+ if changeSort {
+ o.isSorted = false
+ }
+ })
+}
+
// Shrink approximate statements and ranges
//
// Examples:
@@ -468,9 +526,9 @@ func (o *Optimizer) Tighten() {
val1 := strings.ToLower(s1.Value.(StringValue).S)
for j, s2 := range util.FilterIter(stmts[i+1:], func(s Statement) bool { return s.Operator == OP_AP }) {
val2 := strings.ToLower(s2.Value.(StringValue).S)
- if strings.Contains(val2, val1) {
+ if util.ContainsSliced(val2, val1, 1, len(val1)-1) {
removals[i] = true
- } else if strings.Contains(val1, val2) {
+ } else if util.ContainsSliced(val1, val2, 1, len(val2)-1) {
removals[j] = true
}
}
@@ -516,10 +574,10 @@ func (o *Optimizer) Tighten() {
val1 := strings.ToLower(s1.Value.(StringValue).S)
for j, s2 := range util.FilterIter(stmts[i+1:], func(s Statement) bool { return s.Operator == OP_AP }) {
val2 := strings.ToLower(s2.Value.(StringValue).S)
- if strings.Contains(val2, val1) {
+ if util.ContainsSliced(val2, val1, 1, len(val1)-1) {
// NOTE: slicing stmts offsets the all indices by 1, hence the correction
removals[j+1] = true
- } else if strings.Contains(val1, val2) {
+ } else if util.ContainsSliced(val1, val2, 1, len(val2)-1) {
removals[i] = true
}
}
diff --git a/pkg/query/parser.go b/pkg/query/parser.go
index 3406bc6..019deac 100644
--- a/pkg/query/parser.go
+++ b/pkg/query/parser.go
@@ -527,7 +527,11 @@ func Parse(tokens []Token) (*Clause, error) {
}
}
- clause.Statements[len(clause.Statements)-1].Value = StringValue{token.Value}
+ if prevToken.Type == TOK_OP_AP {
+ clause.Statements[len(clause.Statements)-1].Value = StringValue{"\"" + token.Value + "\""}
+ } else {
+ clause.Statements[len(clause.Statements)-1].Value = StringValue{token.Value}
+ }
case TOK_VAL_DATETIME:
if !prevToken.Type.isDateOperation() {
return nil, &TokenError{
diff --git a/pkg/query/parser_test.go b/pkg/query/parser_test.go
index 5c837c0..b1a98f1 100644
--- a/pkg/query/parser_test.go
+++ b/pkg/query/parser_test.go
@@ -53,7 +53,7 @@ func TestParse(t *testing.T) {
&query.Clause{
Operator: query.COP_AND,
Statements: []query.Statement{
- {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"ken thompson"}},
+ {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"\"ken thompson\""}},
},
},
nil,
@@ -70,7 +70,7 @@ func TestParse(t *testing.T) {
&query.Clause{
Operator: query.COP_AND,
Statements: []query.Statement{
- {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"Alonzo Church"}},
+ {Category: CAT_AUTHOR, Operator: OP_AP, Value: query.StringValue{"\"Alonzo Church\""}},
},
Clauses: []*query.Clause{
{
diff --git a/pkg/shell/interpreter.go b/pkg/shell/interpreter.go
index 287c28f..4c6bb03 100644
--- a/pkg/shell/interpreter.go
+++ b/pkg/shell/interpreter.go
@@ -88,6 +88,7 @@ var optimizations = []string{
"compact",
"strictEq",
"mergeregex",
+ "mergeap",
}
var commands = map[string]ITokType{
@@ -444,37 +445,41 @@ out:
}
o := query.NewOptimizer(clause, inter.Workers)
- switch optName {
- case "simplify":
- o.Simplify()
- case "tighten":
- o.Tighten()
- case "flatten":
- o.Flatten()
- case "sort":
- o.SortStatements()
- case "tidy":
- o.Tidy()
- case "contradictions":
- o.Contradictions()
- case "compact":
- o.Compact()
- case "strictEq":
- o.StrictEquality()
- case "mergeregex":
- o.MergeRegex()
- default:
- suggestion, ok := util.Nearest(
- optName,
- inter.keywords.optimizations,
- util.LevensteinDistance,
- min(len(optName), 4),
- )
- suggestionTxt := ""
- if ok {
- suggestionTxt = fmt.Sprintf(": Did you mean '%s'?", suggestion)
+ for curOpt := range strings.SplitSeq(optName, ",") {
+ switch curOpt {
+ case "simplify":
+ o.Simplify()
+ case "tighten":
+ o.Tighten()
+ case "flatten":
+ o.Flatten()
+ case "sort":
+ o.SortStatements()
+ case "tidy":
+ o.Tidy()
+ case "contradictions":
+ o.Contradictions()
+ case "compact":
+ o.Compact()
+ case "strictEq":
+ o.StrictEquality()
+ case "mergeregex":
+ o.MergeRegex()
+ case "mergeap":
+ o.MergeApproximateMatches()
+ default:
+ suggestion, ok := util.Nearest(
+ optName,
+ inter.keywords.optimizations,
+ util.LevensteinDistance,
+ min(len(optName), 4),
+ )
+ suggestionTxt := ""
+ if ok {
+ suggestionTxt = fmt.Sprintf(": Did you mean '%s'?", suggestion)
+ }
+ return false, fmt.Errorf("Unrecognized optimization %s%s", t.Text, suggestionTxt)
}
- return false, fmt.Errorf("Unrecognized optimization %s%s", t.Text, suggestionTxt)
}
stack = append(stack, Value{VAL_CLAUSE, clause})
@@ -777,7 +782,7 @@ func PrintHelp(w io.Writer) {
fmt.Fprintln(w, " ex. tokenize `author:me")
fmt.Fprintln(w, "parse (tokens) - parse tokens into a clause")
fmt.Fprintln(w, "optimize <level> (clause) - optimize clause tree to <level>")
- fmt.Fprintln(w, "opt <subcommand> (clause) - apply specific optimization to clause tree")
+ fmt.Fprintln(w, "opt <subcommand1>,... (clause) - apply specific optimization(s) to clause tree")
fmt.Fprintln(w, " sort - sort statements")
fmt.Fprintln(w, " flatten - flatten clauses")
fmt.Fprintln(w, " compact - compact equivalent statements")
@@ -786,6 +791,7 @@ func PrintHelp(w io.Writer) {
fmt.Fprintln(w, " strictEq - zero fuzzy/range statements when an eq is present")
fmt.Fprintln(w, " tighten - zero redundant fuzzy/range statements when another mathes the same values")
fmt.Fprintln(w, " mergeregex - merge regexes")
+ fmt.Fprintln(w, " mergeap - merge unordered approximate statements")
fmt.Fprintln(w, "compile (clause) - compile clause into query")
fmt.Fprintln(w, "execute (artifact) - excute the compiled query against the connected database")
fmt.Fprintln(w, "\nBare commands which return a value assign to an implicit variable _")
diff --git a/pkg/util/util.go b/pkg/util/util.go
index b26b46a..7963cac 100644
--- a/pkg/util/util.go
+++ b/pkg/util/util.go
@@ -3,6 +3,7 @@ package util
import (
"iter"
"math"
+ "strings"
"time"
)
@@ -162,3 +163,10 @@ func Nearest[E any](candidate E, valid []E, cmp func(E, E) int, ceil int) (E, bo
}
return valid[minIdx], minDistance < ceil
}
+
+// Check if substr[left:right] is a substring of S.
+// If left > len(substr) use 0
+// If right < 0 use 0
+func ContainsSliced(s, substr string, left, right int) bool {
+ return strings.Contains(s, substr[min(left, len(substr)):max(right, 0)])
+}