aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-06-12 02:15:59 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-06-12 02:15:59 -0400
commit6b59c00eb2803eda22261a3347b080244874945f (patch)
tree6dd0ec7241df3732e98b86c89b79a75ee8f20a3e
parent96e68148369e4076bcee45f91810669e4898c59e (diff)
Change doc parsing to only read YAML header
Fixes unintended YAML decode errors when a documents has a valid header but the rest of its contents are not valid YAML.
-rw-r--r--pkg/index/filters.go18
-rw-r--r--pkg/index/index.go9
-rw-r--r--pkg/index/index_test.go3
3 files changed, 25 insertions, 5 deletions
diff --git a/pkg/index/filters.go b/pkg/index/filters.go
index 7766b4c..a439185 100644
--- a/pkg/index/filters.go
+++ b/pkg/index/filters.go
@@ -65,6 +65,11 @@ var YamlHeaderFilter = DocFilter{
}
func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
+ return yamlHeaderPos(r) > 0
+}
+
+// Position of the end of a yaml header, negative
+func yamlHeaderPos(r io.ReadSeeker) int64 {
const bufSize = 4096
buf := make([]byte, bufSize)
@@ -72,9 +77,10 @@ func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
cmp := make([]byte, 4)
n, err := r.Read(carry)
if err != nil || n < 4 || string(carry) != "---\n" {
- return false
+ return -1
}
+ pos := int64(3)
headerFound := false
readMore := true
for readMore {
@@ -83,12 +89,13 @@ func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
if err == io.EOF {
readMore = false
} else if err != nil {
- return false
+ return -1
}
buf = buf[:n]
// PERF: the carry doesn't need to be checked on the first loop iteration
for i := range min(4, n) {
+ pos++
b := carry[i]
for j := range 4 {
if i+j < 4 {
@@ -104,6 +111,7 @@ func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
}
}
for i := range n - 4 {
+ pos++
b := buf[i]
if b == '\n' && string(buf[i:i+5]) == "\n---\n" {
headerFound = true
@@ -119,7 +127,11 @@ func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
}
}
- return headerFound
+ if headerFound {
+ return pos
+ } else {
+ return -1
+ }
}
func DefaultFilters() []DocFilter {
diff --git a/pkg/index/index.go b/pkg/index/index.go
index 15a5e86..5d23e14 100644
--- a/pkg/index/index.go
+++ b/pkg/index/index.go
@@ -329,7 +329,14 @@ func ParseDoc(path string) (*Document, error) {
}
doc.FileTime = info.ModTime()
- if err := yaml.NewDecoder(f).Decode(doc); err != nil {
+ pos := yamlHeaderPos(f)
+ f.Seek(0, io.SeekStart)
+ if pos < 0 {
+ return nil, fmt.Errorf("Can't find YAML header in %s", path)
+ }
+
+ // FIXME: decoder reads past yaml header into document
+ if err := yaml.NewDecoder(io.LimitReader(f, pos)).Decode(doc); err != nil {
return nil, errors.Join(ErrHeaderParse, err)
}
diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go
index 4bf49e3..4e55325 100644
--- a/pkg/index/index_test.go
+++ b/pkg/index/index_test.go
@@ -193,7 +193,7 @@ func TestIndex_ParseOne(t *testing.T) {
f, path := newTestFile(t, "date")
defer f.Close()
- f.WriteString("---\ndate: May 1, 2025")
+ f.WriteString("---\ndate: May 1, 2025\n---\n")
return path
},
@@ -266,6 +266,7 @@ func TestIndex_ParseOne(t *testing.T) {
if !errors.Is(gotErr, tt.wantErr) {
t.Errorf("Recieved unexpected error: want %v got %v", tt.wantErr, gotErr)
+ return
} else if gotErr != nil {
return
}