diff options
| author | JP Appel <jeanpierre.appel01@gmail.com> | 2025-06-26 23:31:09 -0400 |
|---|---|---|
| committer | JP Appel <jeanpierre.appel01@gmail.com> | 2025-06-26 23:37:30 -0400 |
| commit | 916430e5d3f33a24b13e188428d5335862472411 (patch) | |
| tree | 9618a1ee28eb6cb6985fddbc507efdef525c0a5d | |
| parent | dd431b36c21b4a909d3ae0a12a4e607d326ac3cc (diff) | |
Improve compilation of clauses to sqlite3 SQL
| -rw-r--r-- | pkg/query/compiler.go | 159 | ||||
| -rw-r--r-- | pkg/query/lexer.go | 2 | ||||
| -rw-r--r-- | pkg/query/parser.go | 30 | ||||
| -rw-r--r-- | pkg/util/util.go | 26 |
4 files changed, 192 insertions, 25 deletions
diff --git a/pkg/query/compiler.go b/pkg/query/compiler.go index d566b05..f0df4b4 100644 --- a/pkg/query/compiler.go +++ b/pkg/query/compiler.go @@ -3,6 +3,8 @@ package query import ( "fmt" "strings" + + "github.com/jpappel/atlas/pkg/util" ) const MAX_CLAUSE_DEPTH int = 16 @@ -77,21 +79,150 @@ func (stmt Statement) Compile(b *strings.Builder) (*string, error) { return nil, nil } -func (stmts Statements) Compile(b *strings.Builder, delim string) ([]string, error) { +func (s Statements) Compile(b *strings.Builder, delim string) ([]string, error) { var args []string - // TODO: handle meta category - for i, stmt := range stmts { - if i != 0 { - b.WriteString(delim) + sCount := 0 + for cat, catStmts := range s.CategoryPartition() { + // TODO: make sure sorted + // TODO: loop over partitions + if len(catStmts) == 0 { + continue + } + var catStr string + switch cat { + case CAT_AUTHOR: + catStr = "author " + case CAT_DATE: + catStr = "date " + case CAT_FILETIME: + catStr = "fileTime " + case CAT_LINKS: + catStr = "link " + case CAT_META: + catStr = "meta " + case CAT_TAGS: + catStr = "tag " + case CAT_TITLE: + catStr = "title " + default: + return nil, &CompileError{ + fmt.Sprintf("unexpected query.catType %#v", cat), + } } - b.WriteByte(' ') - arg, err := stmt.Compile(b) - if err != nil { - return nil, err - } else if arg != nil { - args = append(args, *arg) + for op, opStmts := range catStmts.OperatorPartition() { + if len(opStmts) == 0 { + continue + } + var opStr string + switch op { + case OP_AP: + if cat.IsOrdered() { + opStr = "BETWEEN " + } else { + opStr = "LIKE " + } + case OP_EQ: + if cat.IsSet() { + opStr = "IN " + } else { + opStr = "= " + } + case OP_GE: + // NOTE: doesn't raise compiler error if operator used on invalid category + opStr = ">= " + case OP_GT: + // NOTE: doesn't raise compiler error if operator used on invalid category + opStr = "> " + case OP_LE: + // NOTE: doesn't raise compiler error if operator used on invalid category + opStr = "<= " + case OP_LT: + // NOTE: doesn't raise compiler error if operator used on invalid category + opStr = "< " + case OP_NE: + if cat.IsSet() { + opStr = "NOT IN " + } else { + opStr = "!= " + } + case OP_PIPE: + opStr = "?op_pipe " + case OP_ARG: + opStr = "?op_arg " + default: + return nil, &CompileError{ + fmt.Sprintf("unexpected query.opType %#v", op), + } + } + + if cat.IsSet() && op != OP_AP { + b.WriteString(catStr) + b.WriteString(opStr) + b.WriteByte('(') + idx := 0 + for _, stmt := range opStmts { + arg, ok := stmt.Value.buildCompile(b) + if ok { + args = append(args, arg) + } + if idx != len(opStmts)-1 { + b.WriteByte(',') + } + sCount++ + idx++ + } + b.WriteString(") ") + } else if cat.IsOrdered() && op == OP_AP { + idx := 0 + for _, stmt := range opStmts { + b.WriteString(catStr) + d, ok := stmt.Value.(DatetimeValue) + if !ok { + panic("type corruption, expected DatetimeValue") + } + + start, end := util.FuzzDatetime(d.D) + + b.WriteString(opStr) + fmt.Fprint(b, start.Unix(), " ") + b.WriteString("AND ") + fmt.Fprint(b, end.Unix(), " ") + if idx != len(opStmts)-1 { + b.WriteString(delim) + b.WriteByte(' ') + } + idx++ + sCount++ + } + } else { + idx := 0 + for _, stmt := range opStmts { + b.WriteString(catStr) + b.WriteString(opStr) + arg, ok := stmt.Value.buildCompile(b) + if ok { + if op == OP_AP { + args = append(args, "%"+arg+"%") + } else { + args = append(args, arg) + } + } + b.WriteByte(' ') + if idx != len(opStmts)-1 { + b.WriteString(delim) + b.WriteByte(' ') + } + idx++ + sCount++ + } + } + + if sCount != len(s) { + b.WriteString(delim) + b.WriteByte(' ') + } } } @@ -101,7 +232,7 @@ func (stmts Statements) Compile(b *strings.Builder, delim string) ([]string, err func (root Clause) Compile() (string, []string, error) { if d := root.Depth(); d > MAX_CLAUSE_DEPTH { return "", nil, &CompileError{ - fmt.Sprint("exceeded maximum clause depth of 8: ", d), + fmt.Sprintf("exceeded maximum clause depth: %d > %d", d, MAX_CLAUSE_DEPTH), } } @@ -117,13 +248,13 @@ func (c Clause) buildCompile(b *strings.Builder) ([]string, error) { b.WriteString("( ") var delim string - switch cop := c.Operator; cop { + switch c.Operator { case COP_AND: delim = "AND" case COP_OR: delim = "OR" default: - return nil, &CompileError{fmt.Sprint("invalid clause operator ", cop)} + return nil, &CompileError{fmt.Sprint("invalid clause operator ", c.Operator)} } args, err := c.Statements.Compile(b, delim) diff --git a/pkg/query/lexer.go b/pkg/query/lexer.go index 759b5bc..9e64b82 100644 --- a/pkg/query/lexer.go +++ b/pkg/query/lexer.go @@ -158,7 +158,6 @@ func Lex(query string) []Token { for _, match := range matches { if match[CLAUSE_START] != "" { tokens = append(tokens, Token{Type: TOK_CLAUSE_START}) - // TODO: set maximum nest level clauseLevel += 1 } if match[CLAUSE_OPERATOR] != "" { @@ -189,7 +188,6 @@ func Lex(query string) []Token { if match[CLAUSE_END] != "" { tokens = append(tokens, Token{Type: TOK_CLAUSE_END}) - // TODO: raise err if clauseLevel is 0 clauseLevel -= 1 } } diff --git a/pkg/query/parser.go b/pkg/query/parser.go index 65d678a..178665d 100644 --- a/pkg/query/parser.go +++ b/pkg/query/parser.go @@ -75,6 +75,7 @@ type Valuer interface { // TODO: define Type() valuerType Compare(Valuer) int + buildCompile(*strings.Builder) (string, bool) } var _ Valuer = StringValue{} @@ -103,6 +104,11 @@ func (v StringValue) Compare(other Valuer) int { } } +func (v StringValue) buildCompile(b *strings.Builder) (string, bool) { + b.WriteByte('?') + return v.S, true +} + type DatetimeValue struct { D time.Time } @@ -120,6 +126,11 @@ func (v DatetimeValue) Compare(other Valuer) int { return v.D.Compare(o.D) } +func (v DatetimeValue) buildCompile(b *strings.Builder) (string, bool) { + fmt.Fprint(b, v.D.Unix(), " ") + return "", false +} + var _ Valuer = StringValue{} var _ Valuer = DatetimeValue{} @@ -135,19 +146,19 @@ func (t catType) IsOrdered() bool { func (t catType) String() string { switch t { case CAT_TITLE: - return "Title" + return "title" case CAT_AUTHOR: - return "Author" + return "author" case CAT_DATE: - return "Date" + return "date" case CAT_FILETIME: - return "Filetime" + return "fileTime" case CAT_TAGS: - return "Tags" + return "tag" case CAT_LINKS: - return "Links" + return "links" case CAT_META: - return "Metadata" + return "meta" default: return "Invalid" } @@ -284,7 +295,7 @@ func (s Statements) CategoryPartition() iter.Seq2[catType, Statements] { } } -// Partition statemetns by their operator without copying, similar to +// Partition statements by their operator without copying, similar to // CategoryPartition. func (s Statements) OperatorPartition() iter.Seq2[opType, Statements] { if !slices.IsSortedFunc(s, StatementCmp) { @@ -300,6 +311,7 @@ func (s Statements) OperatorPartition() iter.Seq2[opType, Statements] { if !yield(lastOp, s[lastOpStart:i]) { return } + lastOpStart = i } lastOp = op } @@ -388,7 +400,7 @@ func (root *Clause) BFS() iter.Seq[*Clause] { write := 1 size := 1 - // FIXME: can potentially discard values if queue is too small + // WARN: can potentially discard values if queue is too small for size != 0 { node := queue[read] diff --git a/pkg/util/util.go b/pkg/util/util.go index 0ed64b8..577401e 100644 --- a/pkg/util/util.go +++ b/pkg/util/util.go @@ -37,6 +37,32 @@ func ParseDateTime(s string) (time.Time, error) { return time.Time{}, err } +// Estimate an interval around a time which is still "meaningful" +// +// Ex: 2025-06-14 -> [2025-06-10, 2025-06-18] +// Ex: 2025-06-14T12:00 -> [2025-06-14T8:00, 2025-06-14T16:00] +func FuzzDatetime(t time.Time) (start time.Time, stop time.Time) { + hour, minute, sec := t.Clock() + _, month, day := t.Date() + + var d time.Duration + if sec != 0 { + d = 5 * time.Minute + } else if minute != 0 { + d = 30 * time.Minute + } else if hour != 0 { + d = 4 * time.Hour + } else if day != 1 { + d = 84 * time.Hour // +- 3.5 days + } else if month != time.January { + d = 336 * time.Hour // +- .5 months + } else { + d = 4380 * time.Hour // search +- 6months + } + + return t.Add(-d), t.Add(d) +} + // Create a copy of a slice with all values that satisfy cond func Fitler[E any](s []E, cond func(e E) bool) []E { filtered := make([]E, 0, len(s)) |
