diff options
Diffstat (limited to 'pkg/index')
| -rw-r--r-- | pkg/index/filters.go | 14 | ||||
| -rw-r--r-- | pkg/index/index.go | 168 | ||||
| -rw-r--r-- | pkg/index/index_test.go | 142 |
3 files changed, 295 insertions, 29 deletions
diff --git a/pkg/index/filters.go b/pkg/index/filters.go index f59a5d6..315d125 100644 --- a/pkg/index/filters.go +++ b/pkg/index/filters.go @@ -5,6 +5,7 @@ import ( "path/filepath" ) +// NOTE: in the future it would be interesting lua filters // TODO: create excluded path filter factory type DocFilter func(infoPath, io.ReadSeeker) bool @@ -21,6 +22,17 @@ func NewMaxFilesizeFilter(size int64) DocFilter { } } +func NewFilenameFilter(excluded []string) DocFilter { + excludedSet := make(map[string]bool, len(excluded)) + for _, filename := range excluded { + excludedSet[filename] = true + } + return func(ip infoPath, _ io.ReadSeeker) bool { + _, ok := excludedSet[filepath.Base(ip.path)] + return ok + } +} + func YamlHeaderFilter(_ infoPath, r io.ReadSeeker) bool { const bufSize = 4096 buf := make([]byte, bufSize) @@ -80,5 +92,5 @@ func YamlHeaderFilter(_ infoPath, r io.ReadSeeker) bool { } func DefaultFilters() []DocFilter { - return []DocFilter{NewExtensionFilter(".md"), NewMaxFilesizeFilter(200 * 1024)} + return []DocFilter{NewExtensionFilter(".md"), NewMaxFilesizeFilter(200 * 1024), YamlHeaderFilter} } diff --git a/pkg/index/index.go b/pkg/index/index.go index 4bab21b..d6839bd 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -4,23 +4,28 @@ import ( "errors" "fmt" "io" + "log/slog" "os" "slices" + "strings" "sync" "time" "github.com/goccy/go-yaml" + "github.com/goccy/go-yaml/ast" ) +var ErrHeaderParse error = errors.New("Unable to parse YAML header") + type Document struct { Path string `yaml:"-" json:"path"` Title string `yaml:"title" json:"title"` - Date time.Time `yaml:"date" json:"date"` + Date time.Time `yaml:"-" json:"date"` FileTime time.Time `yaml:"-" json:"filetime"` - Authors []string `yaml:"authors" json:"authors"` - Tags []string `yaml:"tags" json:"tags"` - Links []string - OtherMeta string // unsure about how to handle this + Authors []string `yaml:"-" json:"authors"` + Tags []string `yaml:"tags,omitempty" json:"tags"` + Links []string `yaml:"-" json:"links"` + OtherMeta string `yaml:"-" json:"meta"` } type infoPath struct { @@ -39,8 +44,125 @@ func (idx Index) String() string { return fmt.Sprintf("%s Documents[%d] Filters[%d]", idx.Root, len(idx.Documents), len(idx.Filters)) } +var _ yaml.NodeUnmarshaler = (*Document)(nil) + +func (doc *Document) UnmarshalYAML(node ast.Node) error { + // parse top level fields + type alias Document + var temp alias + if err := yaml.NodeToValue(node, &temp); err != nil { + return err + } + doc.Title = temp.Title + doc.Tags = temp.Tags + + mapnode, ok := node.(*ast.MappingNode) + if !ok { + return ErrHeaderParse + } + + ignored_keyPaths := map[string]bool{ + "$.title": true, + "$.tags": true, + } + + buf := strings.Builder{} + for _, kv := range mapnode.Values { + k, v := kv.Key, kv.Value + keyPath := k.GetPath() + + if ignored_keyPaths[keyPath] { + continue + } + + if keyPath == "$.date" { + if err := doc.parseDateNode(v); err != nil { + return err + } + } else if keyPath == "$.author" { + if err := doc.parseAuthor(v); err != nil { + return err + } + } else { + field, err := kv.MarshalYAML() + if err != nil { + return err + } + buf.Write(field) + buf.WriteByte('\n') + } + } + + doc.OtherMeta = buf.String() + + return nil +} + +func (doc *Document) parseDateNode(node ast.Node) error { + dateNode, ok := node.(*ast.StringNode) + if !ok { + return ErrHeaderParse + } + dateStr := dateNode.Value + + if dateStr == "" { + return nil + } + + dateFormats := []string{ + "Jan _2, 2006", + "January 2, 2006", + time.DateOnly, + time.DateTime, + time.Layout, + time.ANSIC, + time.UnixDate, + time.RubyDate, + time.RFC822, + time.RFC822Z, + time.RFC850, + time.RFC1123, + time.RFC1123Z, + time.RFC3339, + } + + var t time.Time + var err error + for _, layout := range dateFormats { + if t, err = time.Parse(layout, dateStr); err == nil { + doc.Date = t + return nil + } + } + + return fmt.Errorf("Unable to parse date: %s", dateNode.Value) +} + +func (doc *Document) parseAuthor(node ast.Node) error { + authorsNode, ok := node.(*ast.SequenceNode) + if ok { + doc.Authors = make([]string, 0, len(authorsNode.Values)) + for _, authorNode := range authorsNode.Values { + authorStrNode, ok := authorNode.(*ast.StringNode) + if !ok { + return ErrHeaderParse + } + doc.Authors = append(doc.Authors, authorStrNode.Value) + } + } else { + authorNode, ok := node.(*ast.StringNode) + if ok { + doc.Authors = []string{authorNode.Value} + } else { + return ErrHeaderParse + } + } + + return nil +} + func (doc Document) Equal(other Document) bool { - if len(doc.Authors) != len(other.Authors) || len(doc.Tags) != len(other.Tags) || len(doc.Links) != len(other.Links) || doc.Path != other.Path || doc.Title != other.Title || doc.OtherMeta != other.OtherMeta || !doc.Date.Equal(other.Date) || !doc.FileTime.Equal(other.FileTime) { + if len(doc.Authors) != len(other.Authors) || len(doc.Tags) != len(other.Tags) || len(doc.Links) != len(other.Links) || doc.Path != other.Path || doc.Title != other.Title || doc.OtherMeta != other.OtherMeta || !doc.Date.Equal(other.Date) { return false } @@ -126,7 +248,7 @@ func (idx Index) Traverse(numWorkers uint) []string { activeJobs.Add(1) jobs <- infoPath{path: idx.Root, info: rootInfo} - // TODO: close jobs queue + // close jobs queue go func() { activeJobs.Wait() close(jobs) @@ -201,6 +323,7 @@ func (idx Index) Filter(paths []string, numWorkers uint) []string { return fPaths } +// TODO: extract from struct func (idx Index) ParseOne(path string) (*Document, error) { doc := &Document{} doc.Path = path @@ -209,6 +332,7 @@ func (idx Index) ParseOne(path string) (*Document, error) { if err != nil { return nil, err } + defer f.Close() info, err := f.Stat() if err != nil { @@ -216,31 +340,16 @@ func (idx Index) ParseOne(path string) (*Document, error) { } doc.FileTime = info.ModTime() - buf := make([]byte, 4, 1024) - n, err := f.Read(buf) - if err != nil { - return nil, err - } else if n != 4 { - return nil, errors.New("Short read") + if err := yaml.NewDecoder(f).Decode(doc); err != nil { + return nil, errors.Join(ErrHeaderParse, err) } - // FIXME: unmarshalling is **VERY** borked up rn - if err := yaml.Unmarshal(buf, &doc); err != nil { - return nil, err - } - // TODO: implement custom unmarshaller, for singular `Author` - // dec := yaml.NewDecoder(f) - // TODO: handle no yaml header error - // if err := dec.Decode(&doc); err != nil { - // panic(err) - // } - - // TODO: body parsing - + // TODO: read the rest of the file to find links return doc, nil } -func (idx Index) Parse(paths []string, numWorkers uint) { +// TODO: separate method from struct +func (idx *Index) Parse(paths []string, numWorkers uint) { jobs := make(chan string, numWorkers) results := make(chan Document, numWorkers) idx.Documents = make(map[string]*Document, len(paths)) @@ -253,7 +362,10 @@ func (idx Index) Parse(paths []string, numWorkers uint) { doc, err := idx.ParseOne(path) if err != nil { // TODO: propagate error - panic(err) + slog.Error("Error occured while parsing file", + slog.String("path", path), slog.String("err", err.Error()), + ) + continue } results <- *doc diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go index 0b5d2f2..ed7e550 100644 --- a/pkg/index/index_test.go +++ b/pkg/index/index_test.go @@ -1,10 +1,12 @@ package index import ( + "errors" "fmt" "os" "slices" "testing" + "time" ) var indexCases map[string]func(t *testing.T) Index @@ -136,3 +138,143 @@ func TestIndex_Filter(t *testing.T) { }) } } + +func newTestFile(t *testing.T, name string) (*os.File, string) { + dir := t.TempDir() + path := dir + "/" + name + f, err := os.Create(path) + if err != nil { + panic(err) + } + + return f, path +} + +func TestIndex_ParseOne(t *testing.T) { + tests := []struct { + name string + pathMaker func(t *testing.T) string + want *Document + wantErr error + }{ + { + "title only header", + func(t *testing.T) string { + f, path := newTestFile(t, "title") + defer f.Close() + + f.WriteString("---\ntitle: A title\n---\n") + return path + }, + &Document{Title: "A title"}, + nil, + }, + { + "tags", + func(t *testing.T) string { + f, path := newTestFile(t, "tags") + defer f.Close() + + f.WriteString("---\n") + f.WriteString("tags:\n") + f.WriteString("- a\n") + f.WriteString("- b\n") + f.WriteString("- c\n") + f.WriteString("---\n") + + return path + }, + &Document{Tags: []string{"a", "b", "c"}}, + nil, + }, + { + "date", + func(t *testing.T) string { + f, path := newTestFile(t, "date") + defer f.Close() + + f.WriteString("---\ndate: May 1, 2025") + + return path + }, + &Document{Date: time.Date(2025, time.May, 1, 0, 0, 0, 0, time.UTC)}, + nil, + }, + { + "single author", + func(t *testing.T) string { + f, path := newTestFile(t, "author") + defer f.Close() + + f.WriteString("---\nauthor: Rob Pike\n---\n") + + return path + }, + &Document{Authors: []string{"Rob Pike"}}, + nil, + }, + { + "multi author", + func(t *testing.T) string { + f, path := newTestFile(t, "author") + defer f.Close() + + f.WriteString("---\nauthor:\n- Robert Griesemer\n- Rob Pike\n- Ken Thompson\n---\n") + + return path + }, + &Document{Authors: []string{"Robert Griesemer", "Rob Pike", "Ken Thompson"}}, + nil, + }, + { + "meta", + func(t *testing.T) string { + f, path := newTestFile(t, "metadata") + defer f.Close() + + f.WriteString("---\n") + f.WriteString("unknownKey: value\n") + f.WriteString("---\n") + + return path + }, + &Document{OtherMeta: "unknownKey: value\n"}, + nil, + }, + { + "bad tags", + func(t *testing.T) string { + f, path := newTestFile(t, "badtags") + defer f.Close() + + f.WriteString("---\n") + f.WriteString("tags:\n- good tag\n-bad tag\n") + f.WriteString("---\n") + + return path + }, + &Document{}, + ErrHeaderParse, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + path := tt.pathMaker(t) + tt.want.Path = path + + got, gotErr := Index{}.ParseOne(path) + + if !errors.Is(gotErr,tt.wantErr) { + t.Errorf("Recieved unexpected error: want %v got %v", tt.wantErr, gotErr) + } else if gotErr != nil { + return + } + + if !got.Equal(*tt.want) { + t.Error("Recieved document is not equal") + t.Logf("Got = %+v", got) + t.Logf("Want = %+v", tt.want) + } + }) + } +} |
