diff options
| -rw-r--r-- | pkg/index/index.go | 28 | ||||
| -rw-r--r-- | pkg/query/lang.md | 12 | ||||
| -rw-r--r-- | pkg/query/lexer.go | 27 | ||||
| -rw-r--r-- | pkg/query/lexer_test.go | 24 | ||||
| -rw-r--r-- | pkg/query/query.go | 34 | ||||
| -rw-r--r-- | pkg/util/util.go | 32 |
6 files changed, 104 insertions, 53 deletions
diff --git a/pkg/index/index.go b/pkg/index/index.go index 2467dcf..15a5e86 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -13,6 +13,7 @@ import ( "github.com/goccy/go-yaml" "github.com/goccy/go-yaml/ast" + "github.com/jpappel/atlas/pkg/util" ) var ErrHeaderParse error = errors.New("Unable to parse YAML header") @@ -119,33 +120,12 @@ func (doc *Document) parseDateNode(node ast.Node) error { return nil } - dateFormats := []string{ - "Jan _2, 2006", - "January 2, 2006", - time.DateOnly, - time.DateTime, - time.Layout, - time.ANSIC, - time.UnixDate, - time.RubyDate, - time.RFC822, - time.RFC822Z, - time.RFC850, - time.RFC1123, - time.RFC1123Z, - time.RFC3339, - } - - var t time.Time var err error - for _, layout := range dateFormats { - if t, err = time.Parse(layout, dateStr); err == nil { - doc.Date = t - return nil - } + if doc.Date, err = util.ParseDateTime(dateStr); err != nil { + return fmt.Errorf("Unable to parse date: %s", dateNode.Value) } - return fmt.Errorf("Unable to parse date: %s", dateNode.Value) + return nil } func (doc *Document) parseAuthor(node ast.Node) error { diff --git a/pkg/query/lang.md b/pkg/query/lang.md deleted file mode 100644 index a399cb8..0000000 --- a/pkg/query/lang.md +++ /dev/null @@ -1,12 +0,0 @@ -# Query Language Spec - -``` -<expr_list> := <expr> | <expr> <expr_list> - -<expr> := <statment> <bin_op> <statment> -<statment> := <statement_start> {strings} <statment_end> -<statment_start := -<statment_end> := - -<bin_op> := "and" | "or" | "not" | "similar" -``` diff --git a/pkg/query/lexer.go b/pkg/query/lexer.go index db7ea28..a53f0b4 100644 --- a/pkg/query/lexer.go +++ b/pkg/query/lexer.go @@ -9,7 +9,8 @@ import ( type queryTokenType int -var lexRegex, oldLexRegex *regexp.Regexp +var LexRegex *regexp.Regexp +var LexRegexPattern string const ( TOK_UNKNOWN queryTokenType = iota @@ -148,20 +149,20 @@ func Lex(query string) []Token { CLAUSE_END ) - matches := lexRegex.FindAllStringSubmatch(query, -1) + matches := LexRegex.FindAllStringSubmatch(query, -1) tokens := make([]Token, 0, 4*len(matches)) tokens = append(tokens, Token{Type: TOK_CLAUSE_START}) tokens = append(tokens, Token{TOK_CLAUSE_AND, "and"}) // default to and'ing all args clauseLevel := 1 - for i, match := range matches { + for _, match := range matches { if match[CLAUSE_START] != "" { tokens = append(tokens, Token{Type: TOK_CLAUSE_START}) // TODO: set maximum nest level clauseLevel += 1 } if match[CLAUSE_OPERATOR] != "" { - if i == 0 || tokens[i-1].Type != TOK_CLAUSE_START { + if len(tokens) == 0 || tokens[len(tokens)-1].Type != TOK_CLAUSE_START { tokens = append(tokens, Token{Type: TOK_CLAUSE_START}) clauseLevel += 1 } @@ -283,15 +284,10 @@ func tokenizeValue(s string, catType queryTokenType) Token { return t } -func treeStringify(tokens []Token) string { +func TokensStringify(tokens []Token) string { b := strings.Builder{} indentLvl := 0 - writeIndent := func(level int) { - for range level { - b.WriteString("\t") - } - } writeToken := func(t Token) { b.WriteByte('`') b.WriteString(t.String()) @@ -301,11 +297,11 @@ func treeStringify(tokens []Token) string { for i, token := range tokens { switch token.Type { case TOK_CLAUSE_START: - writeIndent(indentLvl) + writeIndent(&b, indentLvl) b.WriteByte('(') case TOK_CLAUSE_END: indentLvl -= 1 - writeIndent(indentLvl) + writeIndent(&b, indentLvl) b.WriteString(")\n") case TOK_CLAUSE_OR: b.WriteString("or\n") @@ -315,7 +311,7 @@ func treeStringify(tokens []Token) string { indentLvl += 1 case TOK_CAT_TITLE, TOK_CAT_AUTHOR, TOK_CAT_DATE, TOK_CAT_FILETIME, TOK_CAT_TAGS, TOK_CAT_LINKS, TOK_CAT_META, TOK_OP_NEG: if i == 0 || tokens[i-1].Type != TOK_OP_NEG { - writeIndent(indentLvl) + writeIndent(&b, indentLvl) } writeToken(token) case TOK_VAL_STR, TOK_VAL_DATETIME, TOK_UNKNOWN: @@ -340,7 +336,8 @@ func init() { clauseOpPattern := `(?<clause_operator>(?i)and|or)?` clauseStart := `(?<clause_start>\()?` clauseEnd := `(?<clause_end>\))?` - clausePattern := clauseStart + `\s*` + clauseOpPattern + `\s*(?:` + statementPattern + `|` + unknownPattern + `)\s*` + clauseEnd + clausePattern := clauseStart + `\s*` + clauseOpPattern + `\s*(?:` + statementPattern + `|` + unknownPattern + `)\s*` + clauseEnd + `\s*` + LexRegexPattern = clausePattern - lexRegex = regexp.MustCompile(clausePattern) + LexRegex = regexp.MustCompile(LexRegexPattern) } diff --git a/pkg/query/lexer_test.go b/pkg/query/lexer_test.go index 761055e..0cfc1de 100644 --- a/pkg/query/lexer_test.go +++ b/pkg/query/lexer_test.go @@ -28,6 +28,26 @@ func TestLex(t *testing.T) { {TOK_CAT_DATE, "d"}, {TOK_OP_AP, ":"}, {TOK_VAL_DATETIME, "01010001"}, {Type: TOK_CLAUSE_END}, }}, + {"leading subclause", "(or a:a a:b)", []Token{ + {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_AND, "and"}, + {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_OR, "or"}, + {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "a"}, + {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "b"}, + {Type: TOK_CLAUSE_END}, + {Type: TOK_CLAUSE_END}, + }}, + {"clause after clause", "(or a:a a:b) (or a:c a:d)", []Token{ + {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_AND, "and"}, + {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_OR, "or"}, + {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "a"}, + {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "b"}, + {Type: TOK_CLAUSE_END}, + {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_OR, "or"}, + {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "c"}, + {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "d"}, + {Type: TOK_CLAUSE_END}, + {Type: TOK_CLAUSE_END}, + }}, {"nested clauses", "a:a (or t:b t!=c) or d<=01010001 and -T~foo", []Token{ {Type: TOK_CLAUSE_START}, {TOK_CLAUSE_AND, "and"}, {TOK_CAT_AUTHOR, "a"}, {TOK_OP_AP, ":"}, {TOK_VAL_STR, "a"}, @@ -63,8 +83,8 @@ func TestLex(t *testing.T) { } if t.Failed() { - t.Log("Got\n", treeStringify(got)) - t.Log("Want\n", treeStringify(tt.want)) + t.Log("Got\n", TokensStringify(got)) + t.Log("Want\n", TokensStringify(tt.want)) } }) } diff --git a/pkg/query/query.go b/pkg/query/query.go new file mode 100644 index 0000000..5ddc724 --- /dev/null +++ b/pkg/query/query.go @@ -0,0 +1,34 @@ +package query + +import "strings" + +func Generate(ir *QueryIR) (any, error) { + // TODO: implement + return nil, nil +} + +func Compile(query string) (any, error) { + // TODO: logging + clause, err := Parse(Lex(query)) + if err != nil { + return nil, err + } + + ir, err := NewIR(*clause) + if err != nil { + return nil, err + } + + ir, err = Optimize(ir) + if err != nil { + return nil, err + } + + return Generate(ir) +} + +func writeIndent(b *strings.Builder, level int) { + for range level { + b.WriteByte('\t') + } +} diff --git a/pkg/util/util.go b/pkg/util/util.go new file mode 100644 index 0000000..9466e69 --- /dev/null +++ b/pkg/util/util.go @@ -0,0 +1,32 @@ +package util + +import "time" + +func ParseDateTime(s string) (time.Time, error) { + dateFormats := []string{ + "Jan _2, 2006", + "January 2, 2006", + time.DateOnly, + time.DateTime, + time.Layout, + time.ANSIC, + time.UnixDate, + time.RubyDate, + time.RFC822, + time.RFC822Z, + time.RFC850, + time.RFC1123, + time.RFC1123Z, + time.RFC3339, + } + + var t time.Time + var err error + for _, layout := range dateFormats { + if t, err = time.Parse(layout, s); err == nil { + return t, nil + } + } + + return time.Time{}, err +} |
