aboutsummaryrefslogtreecommitdiffstats
path: root/pkg
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-05-04 18:23:44 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-05-04 18:23:44 -0400
commit3e533e65f1baf26355675cfe244fd4da64e8807c (patch)
tree3c8a752644ed4da1f0bf64ce3e049cac3775e646 /pkg
parent37a96c43f6df141dc745f239891f4163b8870c02 (diff)
Add names to doc fitlers
Diffstat (limited to 'pkg')
-rw-r--r--pkg/index/filters.go53
-rw-r--r--pkg/index/filters_test.go6
-rw-r--r--pkg/index/index.go32
-rw-r--r--pkg/index/index_test.go4
4 files changed, 68 insertions, 27 deletions
diff --git a/pkg/index/filters.go b/pkg/index/filters.go
index 315d125..7766b4c 100644
--- a/pkg/index/filters.go
+++ b/pkg/index/filters.go
@@ -1,39 +1,70 @@
package index
import (
+ "fmt"
"io"
"path/filepath"
)
// NOTE: in the future it would be interesting lua filters
-// TODO: create excluded path filter factory
-type DocFilter func(infoPath, io.ReadSeeker) bool
+type DocFilter struct {
+ Name string
+ Filter func(infoPath, io.ReadSeeker) bool
+}
func NewExtensionFilter(ext string) DocFilter {
- return func(ip infoPath, _ io.ReadSeeker) bool {
- return filepath.Ext(ip.path) == ext
+ return DocFilter{
+ ext + " Filter",
+ func(ip infoPath, _ io.ReadSeeker) bool {
+ return filepath.Ext(ip.path) == ext
+ },
}
}
func NewMaxFilesizeFilter(size int64) DocFilter {
- return func(ip infoPath, _ io.ReadSeeker) bool {
- return ip.info.Size() <= size
+ return DocFilter{
+ fmt.Sprintf("Max Size Filter %d", size),
+ func(ip infoPath, _ io.ReadSeeker) bool {
+ return ip.info.Size() <= size
+ },
}
}
-func NewFilenameFilter(excluded []string) DocFilter {
+func NewExcludeFilenameFilter(excluded []string) DocFilter {
excludedSet := make(map[string]bool, len(excluded))
for _, filename := range excluded {
excludedSet[filename] = true
}
- return func(ip infoPath, _ io.ReadSeeker) bool {
- _, ok := excludedSet[filepath.Base(ip.path)]
- return ok
+ return DocFilter{
+ "Excluded Filename filter",
+ func(ip infoPath, _ io.ReadSeeker) bool {
+ _, ok := excludedSet[filepath.Base(ip.path)]
+ return !ok
+ },
+ }
+}
+
+func NewIncludeFilenameFilter(included []string) DocFilter {
+ includedSet := make(map[string]bool, len(included))
+ for _, filename := range included {
+ includedSet[filename] = true
}
+ return DocFilter{
+ "Included Filename filter",
+ func(ip infoPath, _ io.ReadSeeker) bool {
+ _, ok := includedSet[filepath.Base(ip.path)]
+ return ok
+ },
+ }
+}
+
+var YamlHeaderFilter = DocFilter{
+ "YAML Header Filter",
+ yamlHeaderFilterFunc,
}
-func YamlHeaderFilter(_ infoPath, r io.ReadSeeker) bool {
+func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
const bufSize = 4096
buf := make([]byte, bufSize)
diff --git a/pkg/index/filters_test.go b/pkg/index/filters_test.go
index 897a82f..f1226b9 100644
--- a/pkg/index/filters_test.go
+++ b/pkg/index/filters_test.go
@@ -78,7 +78,7 @@ func TestYamlHeaderFilter(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- got := YamlHeaderFilter(infoPath{}, tt.r)
+ got := yamlHeaderFilterFunc(infoPath{}, tt.r)
if got != tt.want {
t.Errorf("YamlHeaderFilter() = %v, want %v", got, tt.want)
}
@@ -100,9 +100,9 @@ func TestExtensionFilter(t *testing.T) {
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
- filter := NewExtensionFilter(tt.ext)
+ docFilter := NewExtensionFilter(tt.ext)
ip := tt.infoGen(t)
- got := filter(ip, nil)
+ got := docFilter.Filter(ip, nil)
if got != tt.want {
t.Errorf("ExtensionFilter() = %v, want %v", got, tt.want)
diff --git a/pkg/index/index.go b/pkg/index/index.go
index d6839bd..2467dcf 100644
--- a/pkg/index/index.go
+++ b/pkg/index/index.go
@@ -40,8 +40,18 @@ type Index struct {
}
func (idx Index) String() string {
- // TODO: print info about active filters
- return fmt.Sprintf("%s Documents[%d] Filters[%d]", idx.Root, len(idx.Documents), len(idx.Filters))
+ b := strings.Builder{}
+ fmt.Fprintf(&b, "%s Documents[%d]\n", idx.Root, len(idx.Documents))
+ fmt.Fprintf(&b, "Filters[%d]: ", len(idx.Filters))
+
+ for i, docFilter := range idx.Filters {
+ b.WriteString(docFilter.Name)
+ if i != len(idx.Filters) {
+ b.WriteByte(',')
+ }
+ }
+
+ return b.String()
}
var _ yaml.NodeUnmarshaler = (*Document)(nil)
@@ -275,8 +285,8 @@ func (idx Index) FilterOne(path string) bool {
}
defer f.Close()
- for _, filter := range idx.Filters {
- if !filter(infoPath{string(path), info}, f) {
+ for _, docFilter := range idx.Filters {
+ if !docFilter.Filter(infoPath{string(path), info}, f) {
return false
}
if _, err := f.Seek(0, io.SeekStart); err != nil {
@@ -323,8 +333,7 @@ func (idx Index) Filter(paths []string, numWorkers uint) []string {
return fPaths
}
-// TODO: extract from struct
-func (idx Index) ParseOne(path string) (*Document, error) {
+func ParseDoc(path string) (*Document, error) {
doc := &Document{}
doc.Path = path
@@ -348,18 +357,17 @@ func (idx Index) ParseOne(path string) (*Document, error) {
return doc, nil
}
-// TODO: separate method from struct
-func (idx *Index) Parse(paths []string, numWorkers uint) {
+func ParseDocs(paths []string, numWorkers uint) map[string]*Document {
jobs := make(chan string, numWorkers)
results := make(chan Document, numWorkers)
- idx.Documents = make(map[string]*Document, len(paths))
+ docs := make(map[string]*Document, len(paths))
wg := &sync.WaitGroup{}
wg.Add(int(numWorkers))
for range numWorkers {
go func(jobs <-chan string, results chan<- Document, wg *sync.WaitGroup) {
for path := range jobs {
- doc, err := idx.ParseOne(path)
+ doc, err := ParseDoc(path)
if err != nil {
// TODO: propagate error
slog.Error("Error occured while parsing file",
@@ -387,6 +395,8 @@ func (idx *Index) Parse(paths []string, numWorkers uint) {
}(results, wg)
for doc := range results {
- idx.Documents[doc.Path] = &doc
+ docs[doc.Path] = &doc
}
+
+ return docs
}
diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go
index ed7e550..4bf49e3 100644
--- a/pkg/index/index_test.go
+++ b/pkg/index/index_test.go
@@ -262,9 +262,9 @@ func TestIndex_ParseOne(t *testing.T) {
path := tt.pathMaker(t)
tt.want.Path = path
- got, gotErr := Index{}.ParseOne(path)
+ got, gotErr := ParseDoc(path)
- if !errors.Is(gotErr,tt.wantErr) {
+ if !errors.Is(gotErr, tt.wantErr) {
t.Errorf("Recieved unexpected error: want %v got %v", tt.wantErr, gotErr)
} else if gotErr != nil {
return