From ba68130862dc004a7a1b50d99fc70872d39fd065 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Wed, 2 Jul 2025 00:06:10 -0400 Subject: Add link parsing --- pkg/index/index.go | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'pkg/index/index.go') diff --git a/pkg/index/index.go b/pkg/index/index.go index d49636f..13c4f45 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -1,11 +1,13 @@ package index import ( + "bytes" "errors" "fmt" "io" "log/slog" "os" + "regexp" "slices" "strings" "sync" @@ -17,6 +19,7 @@ import ( ) var ErrHeaderParse error = errors.New("Unable to parse YAML header") +var linkRegex *regexp.Regexp type Document struct { Path string `yaml:"-" json:"path"` @@ -32,6 +35,7 @@ type Document struct { type ParseOpts struct { ParseMeta bool + ParseLinks bool IgnoreDateError bool IgnoreMetaError bool } @@ -345,12 +349,31 @@ func ParseDoc(path string, opts ParseOpts) (*Document, error) { if pos < 0 { return nil, fmt.Errorf("Can't find YAML header in %s", path) } + header := io.NewSectionReader(f, 0, pos) - if err := yaml.NewDecoder(io.LimitReader(f, pos)).Decode(doc); err != nil { + if err := yaml.NewDecoder(header).Decode(doc); err != nil { return nil, errors.Join(ErrHeaderParse, err) } - // TODO: read the rest of the file to find links + if opts.ParseLinks { + var buf bytes.Buffer + f.Seek(pos, io.SeekStart) + if _, err := io.Copy(&buf, f); err != nil { + return nil, err + } + + matches := linkRegex.FindAllSubmatch(buf.Bytes(), -1) + for _, match := range matches { + if len(match) != 2 { + panic("Link parsing regex returned unexpected number of matches") + } + link := string(match[1]) + if len(link) > 0 && len(strings.TrimSpace(link)) > 0 { + doc.Links = append(doc.Links, link) + } + } + } + return doc, nil } @@ -396,3 +419,7 @@ func ParseDocs(paths []string, numWorkers uint, opts ParseOpts) map[string]*Docu return docs } + +func init() { + linkRegex = regexp.MustCompile(`\[.*\]\((.*)\)`) +} -- cgit v1.2.3