From 35ec0f5afb9800b25bd813bccc57a16bc9f837c4 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Mon, 28 Apr 2025 23:34:42 -0400 Subject: Add document output formatting --- .gitignore | 5 ++ atlas | Bin 7327520 -> 0 bytes pkg/query/data_structures.go | 81 -------------------- pkg/query/outputs.go | 176 +++++++++++++++++++++++++++++++++++++++++++ pkg/query/outputs_test.go | 62 +++++++++++++++ pkg/query/parser.go | 19 ----- pkg/query/query.go | 50 ------------ 7 files changed, 243 insertions(+), 150 deletions(-) delete mode 100755 atlas delete mode 100644 pkg/query/data_structures.go create mode 100644 pkg/query/outputs.go create mode 100644 pkg/query/outputs_test.go delete mode 100644 pkg/query/parser.go delete mode 100644 pkg/query/query.go diff --git a/.gitignore b/.gitignore index 564d530..e58c1f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +atlas +*.db +*.db-shm +*.db-wal + # Binaries for programs and plugins *.exe *.exe~ diff --git a/atlas b/atlas deleted file mode 100755 index 3577f05..0000000 Binary files a/atlas and /dev/null differ diff --git a/pkg/query/data_structures.go b/pkg/query/data_structures.go deleted file mode 100644 index c48ecde..0000000 --- a/pkg/query/data_structures.go +++ /dev/null @@ -1,81 +0,0 @@ -package query - -import "errors" - -// not threadsafe implementation of stack -type nodeStack struct { - buf []*Node -} - -func (s nodeStack) Push(n *Node) { - s.buf = append(s.buf, n) -} -func (s nodeStack) Pop() *Node { - last_index := len(s.buf) - 1 - n := s.buf[last_index] - s.buf = s.buf[:last_index] - return n -} -func (s nodeStack) Peek() *Node { - return s.buf[len(s.buf)-1] -} -func (s nodeStack) IsEmpty() bool { - return len(s.buf) == 0 -} - -type nodeQueue struct { - buf []*Node - head int - tail int -} - -func makeNodeQueue(initial *Node, cap int) nodeQueue { - if cap < 1 { - panic("Invalid nodeQueue Capacity") - } - - q := nodeQueue{ - buf: make([]*Node, 0, cap), - head: 0, - tail: 1, - } - q.buf[0] = initial - - return q -} - -func (q nodeQueue) Enqueue(n *Node) error { - - q.buf[q.tail] = n - new_tail := (q.tail + 1) % len(q.buf) - if new_tail == q.head { - return errors.New("Queue out of capacity") - } - - q.tail = new_tail - return nil -} -func (q nodeQueue) Dequeue() (*Node, error) { - if q.head == q.tail { - return nil, errors.New("Empty Queue") - } - - n := q.buf[q.head] - q.head = (q.head + 1) % len(q.buf) - return n, nil -} -func (q nodeQueue) PeekHead() (*Node, error) { - if q.head == q.tail { - return nil, errors.New("Empty queue") - } - return q.buf[q.head], nil -} -func (q nodeQueue) PeekTail() (*Node, error) { - if q.head == q.tail { - return nil, errors.New("Empty Queue") - } - return q.buf[q.tail-1], nil -} -func (q nodeQueue) IsEmpty() bool { - return q.head == q.tail -} diff --git a/pkg/query/outputs.go b/pkg/query/outputs.go new file mode 100644 index 0000000..a18fd16 --- /dev/null +++ b/pkg/query/outputs.go @@ -0,0 +1,176 @@ +package query + +import ( + "errors" + "fmt" + "strings" + + "github.com/jpappel/atlas/pkg/index" +) + +var ErrUnrecognizedOutputToken = errors.New("Unrecognized output token") +var ErrExpectedMoreStringTokens = errors.New("Expected more string tokens") + +const DefaultOutputFormat string = "%p %T %d authors:%a tags:%t" + +type OutputToken uint64 + +// TODO: support long token names +const ( + OUT_TOK_STR OutputToken = iota + OUT_TOK_PATH // %p %path + OUT_TOK_TITLE // %T %title + OUT_TOK_DATE // %d %date + OUT_TOK_FILETIME // %f %filetime + OUT_TOK_AUTHORS // %a %authors + OUT_TOK_TAGS // %t %tags + OUT_TOK_LINKS // %l %links + OUT_TOK_META // %m %meta +) + +// TODO: change interface to use byte slices +type Outputer interface { + OutputOne(doc index.Document) (string, error) + Output(docs []index.Document) (string, error) +} + +type JsonOutput struct{} +type CustomOutput struct { + stringTokens []string + tokens []OutputToken + datetimeFormat string +} + +func (o JsonOutput) OutputOne(doc index.Document) (string, error) { + // TODO: implement + return "", nil +} + +func (o JsonOutput) Output(docs []index.Document) (string, error) { + // TODO: implement + return "", nil +} + +func parseOutputFormat(formatStr string) ([]OutputToken, []string, error) { + toks := make([]OutputToken, 0, 16) + curTok := make([]rune, 0, 16) + strToks := make([]string, 0, 8) + + for _, c := range formatStr { + if c == '%' && len(curTok) > 0 && curTok[0] != '%' { + strToks = append(strToks, string(curTok)) + toks = append(toks, OUT_TOK_STR) + curTok = curTok[:0] + curTok = append(curTok, c) + continue + } + + curTok = append(curTok, c) + if curTok[0] == '%' && len(curTok) == 2 { + s := string(curTok) + if s == "%%" { + strToks = append(strToks, "%") + toks = append(toks, OUT_TOK_STR) + } else if s == "%p" { + toks = append(toks, OUT_TOK_PATH) + } else if s == "%T" { + toks = append(toks, OUT_TOK_TITLE) + } else if s == "%d" { + toks = append(toks, OUT_TOK_DATE) + } else if s == "%f" { + toks = append(toks, OUT_TOK_FILETIME) + } else if s == "%a" { + toks = append(toks, OUT_TOK_AUTHORS) + } else if s == "%t" { + toks = append(toks, OUT_TOK_TAGS) + } else if s == "%l" { + toks = append(toks, OUT_TOK_LINKS) + } else if s == "%m" { + toks = append(toks, OUT_TOK_META) + } else { + return nil, nil, ErrUnrecognizedOutputToken + } + curTok = curTok[:0] + } + } + + if len(curTok) != 0 && curTok[len(curTok)-1] == '%' { + fmt.Println("2") + return nil, nil, ErrUnrecognizedOutputToken + } else if len(curTok) != 0 { + strToks = append(strToks, string(curTok)) + toks = append(toks, OUT_TOK_STR) + } + + return toks, strToks, nil +} + +func NewCustomOutput(formatStr string, datetimeFormat string) (CustomOutput, error) { + outToks, strToks, err := parseOutputFormat(formatStr) + if err != nil { + return CustomOutput{}, err + } + + return CustomOutput{strToks, outToks, datetimeFormat}, nil +} + +func (o CustomOutput) OutputOne(doc index.Document) (string, error) { + b := strings.Builder{} + // TODO: determine realistic initial capacity + + if err := o.writeDoc(&b, doc); err != nil { + return "", err + } + + return b.String(), nil +} + +func (o CustomOutput) Output(docs []index.Document) (string, error) { + b := strings.Builder{} + // TODO: determine realistic initial capacity + + for i := range len(docs) - 1 { + if err := o.writeDoc(&b, docs[i]); err != nil { + return "", err + } + b.WriteRune('\n') + } + if err := o.writeDoc(&b, docs[len(docs)-1]); err != nil { + return "", err + } + b.WriteRune('\n') + + return b.String(), nil +} + +func (o CustomOutput) writeDoc(b *strings.Builder, doc index.Document) error { + curStrTok := 0 + for _, token := range o.tokens { + if token == OUT_TOK_STR { + if curStrTok >= len(o.stringTokens) { + return ErrExpectedMoreStringTokens + } + b.WriteString(o.stringTokens[curStrTok]) + curStrTok++ + } else if token == OUT_TOK_PATH { + b.WriteString(doc.Path) + } else if token == OUT_TOK_TITLE { + b.WriteString(doc.Title) + } else if token == OUT_TOK_DATE { + b.WriteString(doc.Date.Format(o.datetimeFormat)) + } else if token == OUT_TOK_FILETIME { + b.WriteString(doc.FileTime.Format(o.datetimeFormat)) + } else if token == OUT_TOK_AUTHORS { + b.WriteString(strings.Join(doc.Authors, ", ")) + } else if token == OUT_TOK_TAGS { + b.WriteString(strings.Join(doc.Tags, ", ")) + } else if token == OUT_TOK_LINKS { + b.WriteString(strings.Join(doc.Links, ", ")) + } else if token == OUT_TOK_META { + b.WriteString(doc.OtherMeta) + } else { + return ErrUnrecognizedOutputToken + } + } + return nil +} diff --git a/pkg/query/outputs_test.go b/pkg/query/outputs_test.go new file mode 100644 index 0000000..b5fdba7 --- /dev/null +++ b/pkg/query/outputs_test.go @@ -0,0 +1,62 @@ +package query + +import ( + "errors" + "slices" + "testing" +) + +func Test_parseOutputFormat(t *testing.T) { + tests := []struct { + name string + formatStr string + wantToks []OutputToken + wantStrToks []string + wantErr error + }{ + { + "one big string", + "here is a string with no placeholders", + []OutputToken{OUT_TOK_STR}, + []string{"here is a string with no placeholders"}, + nil, + }, + { + "default format", + "%p %T %d authors:%a tags:%t", + []OutputToken{OUT_TOK_PATH, OUT_TOK_STR, OUT_TOK_TITLE, OUT_TOK_STR, OUT_TOK_DATE, OUT_TOK_STR, OUT_TOK_AUTHORS, OUT_TOK_STR, OUT_TOK_TAGS}, + []string{" ", " ", " authors:", " tags:"}, + nil, + }, + { + "literal percents", + "%%%p%%%T%%", + []OutputToken{OUT_TOK_STR, OUT_TOK_PATH, OUT_TOK_STR, OUT_TOK_TITLE, OUT_TOK_STR}, + []string{"%", "%", "%"}, + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotToks, gotStrToks, gotErr := parseOutputFormat(tt.formatStr) + + if !errors.Is(gotErr, tt.wantErr) { + t.Errorf("Recieved unexpected error: got %v want %v", gotErr, tt.wantErr) + } else if gotErr != nil { + return + } + + if !slices.Equal(gotToks, tt.wantToks) { + t.Error("Unequal output tokens") + t.Log("Got:", gotToks) + t.Log("Want:", tt.wantToks) + } + + if !slices.Equal(gotStrToks, tt.wantStrToks) { + t.Error("Unequal string tokens") + t.Log("Got:", gotStrToks) + t.Log("Want:", tt.wantStrToks) + } + }) + } +} diff --git a/pkg/query/parser.go b/pkg/query/parser.go deleted file mode 100644 index 355b18c..0000000 --- a/pkg/query/parser.go +++ /dev/null @@ -1,19 +0,0 @@ -package query - -type TokenType uint64 - -const ( - TOKEN_ERROR TokenType = iota - TOKEN_EOF - TOKEN_AND - TOKEN_OR - TOKEN_NOT - TOKEN_SIMILAR - TOKEN_STATEMENT - // TODO: consider adding regex token -) - -type Token struct { - Type TokenType - Content string -} diff --git a/pkg/query/query.go b/pkg/query/query.go deleted file mode 100644 index b712370..0000000 --- a/pkg/query/query.go +++ /dev/null @@ -1,50 +0,0 @@ -package query - - -type Node struct { - Parent *Node - Children []*Node - Token -} - -type AST struct { - root Node - size uint64 -} - -// Walk an ast depth first -func (T AST) dfWalk() func() (*Node, bool) { - stack := nodeStack{make([]*Node, 0, T.size)} - stack.Push(&T.root) - - return func() (*Node, bool) { - n := stack.Pop() - for _, child := range n.Children { - stack.Push(child) - } - - if stack.IsEmpty() { - return n, false - } - return n, true - } -} - -// Walk an ast breadth first -func (T AST) bfWalk() func() (*Node, bool) { - queue := nodeQueue{} - queue.buf = make([]*Node, 0, T.size) - queue.Enqueue(&T.root) - - return func() (*Node, bool) { - n, err := queue.Dequeue() - if err != nil { - return nil, false - } - - for _, child := range n.Children { - queue.Enqueue(child) - } - return n, true - } -} -- cgit v1.2.3