From 3e533e65f1baf26355675cfe244fd4da64e8807c Mon Sep 17 00:00:00 2001 From: JP Appel Date: Sun, 4 May 2025 18:23:44 -0400 Subject: Add names to doc fitlers --- pkg/index/filters.go | 53 +++++++++++++++++++++++++++++++++++++---------- pkg/index/filters_test.go | 6 +++--- pkg/index/index.go | 32 ++++++++++++++++++---------- pkg/index/index_test.go | 4 ++-- 4 files changed, 68 insertions(+), 27 deletions(-) (limited to 'pkg') diff --git a/pkg/index/filters.go b/pkg/index/filters.go index 315d125..7766b4c 100644 --- a/pkg/index/filters.go +++ b/pkg/index/filters.go @@ -1,39 +1,70 @@ package index import ( + "fmt" "io" "path/filepath" ) // NOTE: in the future it would be interesting lua filters -// TODO: create excluded path filter factory -type DocFilter func(infoPath, io.ReadSeeker) bool +type DocFilter struct { + Name string + Filter func(infoPath, io.ReadSeeker) bool +} func NewExtensionFilter(ext string) DocFilter { - return func(ip infoPath, _ io.ReadSeeker) bool { - return filepath.Ext(ip.path) == ext + return DocFilter{ + ext + " Filter", + func(ip infoPath, _ io.ReadSeeker) bool { + return filepath.Ext(ip.path) == ext + }, } } func NewMaxFilesizeFilter(size int64) DocFilter { - return func(ip infoPath, _ io.ReadSeeker) bool { - return ip.info.Size() <= size + return DocFilter{ + fmt.Sprintf("Max Size Filter %d", size), + func(ip infoPath, _ io.ReadSeeker) bool { + return ip.info.Size() <= size + }, } } -func NewFilenameFilter(excluded []string) DocFilter { +func NewExcludeFilenameFilter(excluded []string) DocFilter { excludedSet := make(map[string]bool, len(excluded)) for _, filename := range excluded { excludedSet[filename] = true } - return func(ip infoPath, _ io.ReadSeeker) bool { - _, ok := excludedSet[filepath.Base(ip.path)] - return ok + return DocFilter{ + "Excluded Filename filter", + func(ip infoPath, _ io.ReadSeeker) bool { + _, ok := excludedSet[filepath.Base(ip.path)] + return !ok + }, + } +} + +func NewIncludeFilenameFilter(included []string) DocFilter { + includedSet := make(map[string]bool, len(included)) + for _, filename := range included { + includedSet[filename] = true } + return DocFilter{ + "Included Filename filter", + func(ip infoPath, _ io.ReadSeeker) bool { + _, ok := includedSet[filepath.Base(ip.path)] + return ok + }, + } +} + +var YamlHeaderFilter = DocFilter{ + "YAML Header Filter", + yamlHeaderFilterFunc, } -func YamlHeaderFilter(_ infoPath, r io.ReadSeeker) bool { +func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool { const bufSize = 4096 buf := make([]byte, bufSize) diff --git a/pkg/index/filters_test.go b/pkg/index/filters_test.go index 897a82f..f1226b9 100644 --- a/pkg/index/filters_test.go +++ b/pkg/index/filters_test.go @@ -78,7 +78,7 @@ func TestYamlHeaderFilter(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got := YamlHeaderFilter(infoPath{}, tt.r) + got := yamlHeaderFilterFunc(infoPath{}, tt.r) if got != tt.want { t.Errorf("YamlHeaderFilter() = %v, want %v", got, tt.want) } @@ -100,9 +100,9 @@ func TestExtensionFilter(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - filter := NewExtensionFilter(tt.ext) + docFilter := NewExtensionFilter(tt.ext) ip := tt.infoGen(t) - got := filter(ip, nil) + got := docFilter.Filter(ip, nil) if got != tt.want { t.Errorf("ExtensionFilter() = %v, want %v", got, tt.want) diff --git a/pkg/index/index.go b/pkg/index/index.go index d6839bd..2467dcf 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -40,8 +40,18 @@ type Index struct { } func (idx Index) String() string { - // TODO: print info about active filters - return fmt.Sprintf("%s Documents[%d] Filters[%d]", idx.Root, len(idx.Documents), len(idx.Filters)) + b := strings.Builder{} + fmt.Fprintf(&b, "%s Documents[%d]\n", idx.Root, len(idx.Documents)) + fmt.Fprintf(&b, "Filters[%d]: ", len(idx.Filters)) + + for i, docFilter := range idx.Filters { + b.WriteString(docFilter.Name) + if i != len(idx.Filters) { + b.WriteByte(',') + } + } + + return b.String() } var _ yaml.NodeUnmarshaler = (*Document)(nil) @@ -275,8 +285,8 @@ func (idx Index) FilterOne(path string) bool { } defer f.Close() - for _, filter := range idx.Filters { - if !filter(infoPath{string(path), info}, f) { + for _, docFilter := range idx.Filters { + if !docFilter.Filter(infoPath{string(path), info}, f) { return false } if _, err := f.Seek(0, io.SeekStart); err != nil { @@ -323,8 +333,7 @@ func (idx Index) Filter(paths []string, numWorkers uint) []string { return fPaths } -// TODO: extract from struct -func (idx Index) ParseOne(path string) (*Document, error) { +func ParseDoc(path string) (*Document, error) { doc := &Document{} doc.Path = path @@ -348,18 +357,17 @@ func (idx Index) ParseOne(path string) (*Document, error) { return doc, nil } -// TODO: separate method from struct -func (idx *Index) Parse(paths []string, numWorkers uint) { +func ParseDocs(paths []string, numWorkers uint) map[string]*Document { jobs := make(chan string, numWorkers) results := make(chan Document, numWorkers) - idx.Documents = make(map[string]*Document, len(paths)) + docs := make(map[string]*Document, len(paths)) wg := &sync.WaitGroup{} wg.Add(int(numWorkers)) for range numWorkers { go func(jobs <-chan string, results chan<- Document, wg *sync.WaitGroup) { for path := range jobs { - doc, err := idx.ParseOne(path) + doc, err := ParseDoc(path) if err != nil { // TODO: propagate error slog.Error("Error occured while parsing file", @@ -387,6 +395,8 @@ func (idx *Index) Parse(paths []string, numWorkers uint) { }(results, wg) for doc := range results { - idx.Documents[doc.Path] = &doc + docs[doc.Path] = &doc } + + return docs } diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go index ed7e550..4bf49e3 100644 --- a/pkg/index/index_test.go +++ b/pkg/index/index_test.go @@ -262,9 +262,9 @@ func TestIndex_ParseOne(t *testing.T) { path := tt.pathMaker(t) tt.want.Path = path - got, gotErr := Index{}.ParseOne(path) + got, gotErr := ParseDoc(path) - if !errors.Is(gotErr,tt.wantErr) { + if !errors.Is(gotErr, tt.wantErr) { t.Errorf("Recieved unexpected error: want %v got %v", tt.wantErr, gotErr) } else if gotErr != nil { return -- cgit v1.2.3