diff options
| author | JP Appel <jeanpierre.appel01@gmail.com> | 2025-07-02 00:06:10 -0400 |
|---|---|---|
| committer | JP Appel <jeanpierre.appel01@gmail.com> | 2025-07-02 00:06:10 -0400 |
| commit | ba68130862dc004a7a1b50d99fc70872d39fd065 (patch) | |
| tree | ef4f08b1d91ff86f717fff4b30474048f77ae4c1 /pkg/index | |
| parent | 4582265de0c0472755880652dc7b390b342cf3e0 (diff) | |
Add link parsing
Diffstat (limited to 'pkg/index')
| -rw-r--r-- | pkg/index/index.go | 31 | ||||
| -rw-r--r-- | pkg/index/index_test.go | 25 |
2 files changed, 54 insertions, 2 deletions
diff --git a/pkg/index/index.go b/pkg/index/index.go index d49636f..13c4f45 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -1,11 +1,13 @@ package index import ( + "bytes" "errors" "fmt" "io" "log/slog" "os" + "regexp" "slices" "strings" "sync" @@ -17,6 +19,7 @@ import ( ) var ErrHeaderParse error = errors.New("Unable to parse YAML header") +var linkRegex *regexp.Regexp type Document struct { Path string `yaml:"-" json:"path"` @@ -32,6 +35,7 @@ type Document struct { type ParseOpts struct { ParseMeta bool + ParseLinks bool IgnoreDateError bool IgnoreMetaError bool } @@ -345,12 +349,31 @@ func ParseDoc(path string, opts ParseOpts) (*Document, error) { if pos < 0 { return nil, fmt.Errorf("Can't find YAML header in %s", path) } + header := io.NewSectionReader(f, 0, pos) - if err := yaml.NewDecoder(io.LimitReader(f, pos)).Decode(doc); err != nil { + if err := yaml.NewDecoder(header).Decode(doc); err != nil { return nil, errors.Join(ErrHeaderParse, err) } - // TODO: read the rest of the file to find links + if opts.ParseLinks { + var buf bytes.Buffer + f.Seek(pos, io.SeekStart) + if _, err := io.Copy(&buf, f); err != nil { + return nil, err + } + + matches := linkRegex.FindAllSubmatch(buf.Bytes(), -1) + for _, match := range matches { + if len(match) != 2 { + panic("Link parsing regex returned unexpected number of matches") + } + link := string(match[1]) + if len(link) > 0 && len(strings.TrimSpace(link)) > 0 { + doc.Links = append(doc.Links, link) + } + } + } + return doc, nil } @@ -396,3 +419,7 @@ func ParseDocs(paths []string, numWorkers uint, opts ParseOpts) map[string]*Docu return docs } + +func init() { + linkRegex = regexp.MustCompile(`\[.*\]\((.*)\)`) +} diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go index 0a3239d..69600c2 100644 --- a/pkg/index/index_test.go +++ b/pkg/index/index_test.go @@ -251,6 +251,31 @@ func TestIndex_ParseOne(t *testing.T) { nil, }, { + "links", + func(t *testing.T) string { + f, path := newTestFile(t, "links") + defer f.Close() + + f.WriteString("---\n") + f.WriteString("title: Link test\n") + f.WriteString("---\n") + f.WriteString(` + Here are some words in a *markdown* file. + In this sentence there is a valid [hyperlink](https://jpappel.xyz). + But in this sentence, the [link]() should not get parsed. + The same is true for the [link]( ) in this sentence. + `) + + return path + }, + index.ParseOpts{ParseLinks: true}, + &index.Document{ + Title: "Link test", + Links: []string{"https://jpappel.xyz"}, + }, + nil, + }, + { "bad tags", func(t *testing.T) string { f, path := newTestFile(t, "badtags") |
