aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/index/filters.go
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-06-12 02:15:59 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-06-12 02:15:59 -0400
commit6b59c00eb2803eda22261a3347b080244874945f (patch)
tree6dd0ec7241df3732e98b86c89b79a75ee8f20a3e /pkg/index/filters.go
parent96e68148369e4076bcee45f91810669e4898c59e (diff)
Change doc parsing to only read YAML header
Fixes unintended YAML decode errors when a documents has a valid header but the rest of its contents are not valid YAML.
Diffstat (limited to 'pkg/index/filters.go')
-rw-r--r--pkg/index/filters.go18
1 files changed, 15 insertions, 3 deletions
diff --git a/pkg/index/filters.go b/pkg/index/filters.go
index 7766b4c..a439185 100644
--- a/pkg/index/filters.go
+++ b/pkg/index/filters.go
@@ -65,6 +65,11 @@ var YamlHeaderFilter = DocFilter{
}
func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
+ return yamlHeaderPos(r) > 0
+}
+
+// Position of the end of a yaml header, negative
+func yamlHeaderPos(r io.ReadSeeker) int64 {
const bufSize = 4096
buf := make([]byte, bufSize)
@@ -72,9 +77,10 @@ func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
cmp := make([]byte, 4)
n, err := r.Read(carry)
if err != nil || n < 4 || string(carry) != "---\n" {
- return false
+ return -1
}
+ pos := int64(3)
headerFound := false
readMore := true
for readMore {
@@ -83,12 +89,13 @@ func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
if err == io.EOF {
readMore = false
} else if err != nil {
- return false
+ return -1
}
buf = buf[:n]
// PERF: the carry doesn't need to be checked on the first loop iteration
for i := range min(4, n) {
+ pos++
b := carry[i]
for j := range 4 {
if i+j < 4 {
@@ -104,6 +111,7 @@ func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
}
}
for i := range n - 4 {
+ pos++
b := buf[i]
if b == '\n' && string(buf[i:i+5]) == "\n---\n" {
headerFound = true
@@ -119,7 +127,11 @@ func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
}
}
- return headerFound
+ if headerFound {
+ return pos
+ } else {
+ return -1
+ }
}
func DefaultFilters() []DocFilter {