aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/index/filters.go
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-04-27 00:49:27 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-04-27 00:49:27 -0400
commit34b8d8ff1f9d65c08a9156d72f08cf548183c6f4 (patch)
treea00fa0410a7bcde125a37b50b3a4956c838fa569 /pkg/index/filters.go
parent42527fdb0aca0d30652bb3052b80ab75ab057572 (diff)
Large commit; many features
Diffstat (limited to 'pkg/index/filters.go')
-rw-r--r--pkg/index/filters.go84
1 files changed, 84 insertions, 0 deletions
diff --git a/pkg/index/filters.go b/pkg/index/filters.go
new file mode 100644
index 0000000..f59a5d6
--- /dev/null
+++ b/pkg/index/filters.go
@@ -0,0 +1,84 @@
+package index
+
+import (
+ "io"
+ "path/filepath"
+)
+
+// TODO: create excluded path filter factory
+
+type DocFilter func(infoPath, io.ReadSeeker) bool
+
+func NewExtensionFilter(ext string) DocFilter {
+ return func(ip infoPath, _ io.ReadSeeker) bool {
+ return filepath.Ext(ip.path) == ext
+ }
+}
+
+func NewMaxFilesizeFilter(size int64) DocFilter {
+ return func(ip infoPath, _ io.ReadSeeker) bool {
+ return ip.info.Size() <= size
+ }
+}
+
+func YamlHeaderFilter(_ infoPath, r io.ReadSeeker) bool {
+ const bufSize = 4096
+ buf := make([]byte, bufSize)
+
+ carry := make([]byte, 4)
+ cmp := make([]byte, 4)
+ n, err := r.Read(carry)
+ if err != nil || n < 4 || string(carry) != "---\n" {
+ return false
+ }
+
+ headerFound := false
+ readMore := true
+ for readMore {
+ buf = buf[:bufSize]
+ n, err := r.Read(buf)
+ if err == io.EOF {
+ readMore = false
+ } else if err != nil {
+ return false
+ }
+ buf = buf[:n]
+
+ // PERF: the carry doesn't need to be checked on the first loop iteration
+ for i := range min(4, n) {
+ b := carry[i]
+ for j := range 4 {
+ if i+j < 4 {
+ cmp[j] = carry[i+j]
+ } else {
+ cmp[j] = buf[(i+j)%4]
+ }
+ }
+ if b == '\n' && string(cmp) == "\n---\n" {
+ headerFound = true
+ readMore = false
+ break
+ }
+ }
+ for i := range n - 4 {
+ b := buf[i]
+ if b == '\n' && string(buf[i:i+5]) == "\n---\n" {
+ headerFound = true
+ readMore = false
+ break
+ }
+ }
+
+ if readMore {
+ for i := range 4 {
+ carry[i] = buf[n-4+i]
+ }
+ }
+ }
+
+ return headerFound
+}
+
+func DefaultFilters() []DocFilter {
+ return []DocFilter{NewExtensionFilter(".md"), NewMaxFilesizeFilter(200 * 1024)}
+}