aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/index
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-07-02 00:06:10 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-07-02 00:06:10 -0400
commitba68130862dc004a7a1b50d99fc70872d39fd065 (patch)
treeef4f08b1d91ff86f717fff4b30474048f77ae4c1 /pkg/index
parent4582265de0c0472755880652dc7b390b342cf3e0 (diff)
Add link parsing
Diffstat (limited to 'pkg/index')
-rw-r--r--pkg/index/index.go31
-rw-r--r--pkg/index/index_test.go25
2 files changed, 54 insertions, 2 deletions
diff --git a/pkg/index/index.go b/pkg/index/index.go
index d49636f..13c4f45 100644
--- a/pkg/index/index.go
+++ b/pkg/index/index.go
@@ -1,11 +1,13 @@
package index
import (
+ "bytes"
"errors"
"fmt"
"io"
"log/slog"
"os"
+ "regexp"
"slices"
"strings"
"sync"
@@ -17,6 +19,7 @@ import (
)
var ErrHeaderParse error = errors.New("Unable to parse YAML header")
+var linkRegex *regexp.Regexp
type Document struct {
Path string `yaml:"-" json:"path"`
@@ -32,6 +35,7 @@ type Document struct {
type ParseOpts struct {
ParseMeta bool
+ ParseLinks bool
IgnoreDateError bool
IgnoreMetaError bool
}
@@ -345,12 +349,31 @@ func ParseDoc(path string, opts ParseOpts) (*Document, error) {
if pos < 0 {
return nil, fmt.Errorf("Can't find YAML header in %s", path)
}
+ header := io.NewSectionReader(f, 0, pos)
- if err := yaml.NewDecoder(io.LimitReader(f, pos)).Decode(doc); err != nil {
+ if err := yaml.NewDecoder(header).Decode(doc); err != nil {
return nil, errors.Join(ErrHeaderParse, err)
}
- // TODO: read the rest of the file to find links
+ if opts.ParseLinks {
+ var buf bytes.Buffer
+ f.Seek(pos, io.SeekStart)
+ if _, err := io.Copy(&buf, f); err != nil {
+ return nil, err
+ }
+
+ matches := linkRegex.FindAllSubmatch(buf.Bytes(), -1)
+ for _, match := range matches {
+ if len(match) != 2 {
+ panic("Link parsing regex returned unexpected number of matches")
+ }
+ link := string(match[1])
+ if len(link) > 0 && len(strings.TrimSpace(link)) > 0 {
+ doc.Links = append(doc.Links, link)
+ }
+ }
+ }
+
return doc, nil
}
@@ -396,3 +419,7 @@ func ParseDocs(paths []string, numWorkers uint, opts ParseOpts) map[string]*Docu
return docs
}
+
+func init() {
+ linkRegex = regexp.MustCompile(`\[.*\]\((.*)\)`)
+}
diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go
index 0a3239d..69600c2 100644
--- a/pkg/index/index_test.go
+++ b/pkg/index/index_test.go
@@ -251,6 +251,31 @@ func TestIndex_ParseOne(t *testing.T) {
nil,
},
{
+ "links",
+ func(t *testing.T) string {
+ f, path := newTestFile(t, "links")
+ defer f.Close()
+
+ f.WriteString("---\n")
+ f.WriteString("title: Link test\n")
+ f.WriteString("---\n")
+ f.WriteString(`
+ Here are some words in a *markdown* file.
+ In this sentence there is a valid [hyperlink](https://jpappel.xyz).
+ But in this sentence, the [link]() should not get parsed.
+ The same is true for the [link]( ) in this sentence.
+ `)
+
+ return path
+ },
+ index.ParseOpts{ParseLinks: true},
+ &index.Document{
+ Title: "Link test",
+ Links: []string{"https://jpappel.xyz"},
+ },
+ nil,
+ },
+ {
"bad tags",
func(t *testing.T) string {
f, path := newTestFile(t, "badtags")