From 34b8d8ff1f9d65c08a9156d72f08cf548183c6f4 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Sun, 27 Apr 2025 00:49:27 -0400 Subject: Large commit; many features --- pkg/index/filters.go | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 pkg/index/filters.go (limited to 'pkg/index/filters.go') diff --git a/pkg/index/filters.go b/pkg/index/filters.go new file mode 100644 index 0000000..f59a5d6 --- /dev/null +++ b/pkg/index/filters.go @@ -0,0 +1,84 @@ +package index + +import ( + "io" + "path/filepath" +) + +// TODO: create excluded path filter factory + +type DocFilter func(infoPath, io.ReadSeeker) bool + +func NewExtensionFilter(ext string) DocFilter { + return func(ip infoPath, _ io.ReadSeeker) bool { + return filepath.Ext(ip.path) == ext + } +} + +func NewMaxFilesizeFilter(size int64) DocFilter { + return func(ip infoPath, _ io.ReadSeeker) bool { + return ip.info.Size() <= size + } +} + +func YamlHeaderFilter(_ infoPath, r io.ReadSeeker) bool { + const bufSize = 4096 + buf := make([]byte, bufSize) + + carry := make([]byte, 4) + cmp := make([]byte, 4) + n, err := r.Read(carry) + if err != nil || n < 4 || string(carry) != "---\n" { + return false + } + + headerFound := false + readMore := true + for readMore { + buf = buf[:bufSize] + n, err := r.Read(buf) + if err == io.EOF { + readMore = false + } else if err != nil { + return false + } + buf = buf[:n] + + // PERF: the carry doesn't need to be checked on the first loop iteration + for i := range min(4, n) { + b := carry[i] + for j := range 4 { + if i+j < 4 { + cmp[j] = carry[i+j] + } else { + cmp[j] = buf[(i+j)%4] + } + } + if b == '\n' && string(cmp) == "\n---\n" { + headerFound = true + readMore = false + break + } + } + for i := range n - 4 { + b := buf[i] + if b == '\n' && string(buf[i:i+5]) == "\n---\n" { + headerFound = true + readMore = false + break + } + } + + if readMore { + for i := range 4 { + carry[i] = buf[n-4+i] + } + } + } + + return headerFound +} + +func DefaultFilters() []DocFilter { + return []DocFilter{NewExtensionFilter(".md"), NewMaxFilesizeFilter(200 * 1024)} +} -- cgit v1.2.3