diff options
Diffstat (limited to 'pkg')
| -rw-r--r-- | pkg/data/db.go | 20 | ||||
| -rw-r--r-- | pkg/data/get.go | 18 | ||||
| -rw-r--r-- | pkg/data/get_test.go | 22 | ||||
| -rw-r--r-- | pkg/data/put.go | 30 | ||||
| -rw-r--r-- | pkg/data/put_test.go | 77 | ||||
| -rw-r--r-- | pkg/index/index.go | 31 | ||||
| -rw-r--r-- | pkg/index/index_test.go | 25 |
7 files changed, 170 insertions, 53 deletions
diff --git a/pkg/data/db.go b/pkg/data/db.go index 24a8793..9c8c5b1 100644 --- a/pkg/data/db.go +++ b/pkg/data/db.go @@ -19,7 +19,7 @@ type Query struct { // // output is in the form // -// <query> <start><(n-1)*(<val><delim)>><val><delim><stop> +// <query> <start><(n-1)*(<val><delim)>><val><stop> func BatchQuery[T any](query string, start string, val string, delim string, stop string, n int, baseArgs []T) (string, []any) { args := make([]any, len(baseArgs)) for i, arg := range baseArgs { @@ -139,10 +139,10 @@ func createSchema(db *sql.DB) error { _, err = tx.Exec(` CREATE TABLE IF NOT EXISTS Links( - referencedId INT, - refererId INT, - FOREIGN KEY (referencedId) REFERENCES Documents(id), - FOREIGN KEY (refererId) REFERENCES Documents(id) + docId INT, + link TEXT NOT NULL, + FOREIGN KEY (docId) REFERENCES Documents(id), + UNIQUE(docId, link) )`) if err != nil { tx.Rollback() @@ -198,6 +198,12 @@ func createSchema(db *sql.DB) error { return err } + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_links_link ON Links(link)") + if err != nil { + tx.Rollback() + return err + } + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doctags_tagid ON DocumentTags (tagId)") if err != nil { tx.Rollback() @@ -214,13 +220,15 @@ func createSchema(db *sql.DB) error { d.fileTime, d.meta, COALESCE(a.name, al.alias) AS author, - t.name AS tag + t.name AS tag, + l.link FROM Documents d LEFT JOIN DocumentAuthors da ON d.id = da.docId LEFT JOIN Authors a ON da.authorId = a.id LEFT JOIN Aliases al ON a.id = al.authorId LEFT JOIN DocumentTags dt ON d.id = dt.docId LEFT JOIN Tags t ON dt.tagId = t.id + LEFT JOIN Links l ON d.id = l.docId `) if err != nil { tx.Rollback() diff --git a/pkg/data/get.go b/pkg/data/get.go index 09d4587..8dafb24 100644 --- a/pkg/data/get.go +++ b/pkg/data/get.go @@ -108,6 +108,8 @@ func (f *FillMany) documents(ctx context.Context, rows *sql.Rows) error { return err } defer rows.Close() + } else { + // TODO: check if rows.ColumnTypes() matches expected } var id int @@ -273,11 +275,9 @@ func (f FillMany) tags(ctx context.Context) error { func (f Fill) links(ctx context.Context) error { rows, err := f.Db.QueryContext(ctx, ` - SELECT path - FROM Documents - JOIN Links - ON Links.referencedId = Documents.id - WHERE Links.refererId = ? + SELECT link + FROM Links + WHERE Links.docId = ? `, f.id) if err != nil { return err @@ -299,11 +299,9 @@ func (f Fill) links(ctx context.Context) error { func (f FillMany) links(ctx context.Context) error { stmt, err := f.Db.PrepareContext(ctx, ` - SELECT path - FROM Documents - JOIN Links - ON Links.referencedId = Documents.id - WHERE Links.refererId = ? + SELECT link + FROM Links + WHERE Links.docId = ? `) if err != nil { return err diff --git a/pkg/data/get_test.go b/pkg/data/get_test.go index 14d6920..22e5af2 100644 --- a/pkg/data/get_test.go +++ b/pkg/data/get_test.go @@ -1,7 +1,6 @@ package data_test import ( - "context" "database/sql" "errors" "testing" @@ -57,6 +56,13 @@ func singleDoc(t *testing.T) *sql.DB { t.Fatal("err inserting docTags:", err) } + if _, err := db.Exec(` + INSERT INTO Links (docId, link) + VALUES (1, 'link1'), (1, 'link2') + `); err != nil { + t.Fatal("err inserting links:", err) + } + return db } @@ -106,6 +112,13 @@ func multiDoc(t *testing.T) *sql.DB { t.Fatal("err inserting docTags:", err) } + if _, err := db.Exec(` + INSERT INTO Links (docId, link) + VALUES (1, '/home'), (2, 'rsync://rsync.kernel.org/pub/') + `); err != nil { + t.Fatal("err inserting links:", err) + } + return db } @@ -129,6 +142,7 @@ func TestFill_Get(t *testing.T) { FileTime: time.Unix(2, 0), Authors: []string{"jp"}, Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link1", "link2"}, }, nil, }, @@ -136,7 +150,7 @@ func TestFill_Get(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { f := tt.newFill(t) - got, gotErr := f.Get(context.Background()) + got, gotErr := f.Get(t.Context()) if !errors.Is(gotErr, tt.wantErr) { t.Fatalf("Recieved unexpected error: got %v want %v", gotErr, tt.wantErr) @@ -172,6 +186,7 @@ func TestFillMany_Get(t *testing.T) { FileTime: time.Unix(2, 0), Authors: []string{"jp"}, Tags: []string{"foo", "baz"}, + Links: []string{"/home"}, }, "README.md": { Path: "README.md", @@ -180,6 +195,7 @@ func TestFillMany_Get(t *testing.T) { FileTime: time.Unix(4, 0), Authors: []string{"anonymous", "jp"}, Tags: []string{"bar", "oof"}, + Links: []string{"rsync://rsync.kernel.org/pub/"}, }, }, nil, @@ -187,7 +203,7 @@ func TestFillMany_Get(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - ctx := context.Background() + ctx := t.Context() f := tt.newFillMany(t) got, gotErr := f.Get(ctx) diff --git a/pkg/data/put.go b/pkg/data/put.go index e0185ae..0d49e60 100644 --- a/pkg/data/put.go +++ b/pkg/data/put.go @@ -226,14 +226,13 @@ func (p Put) links() error { return nil } - preQuery := fmt.Sprintf(` - INSERT INTO Links (referencedId, refererId) - SELECT id, %d - FROM Documents - WHERE path IN - `, p.Id) - query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(p.Doc.Links), p.Doc.Links) - if _, err := p.tx.Exec(query, args...); err != nil { + preQuery := ` + INSERT INTO Links (docId, link) + VALUES + ` + valueStr := fmt.Sprintf("(%d,?)", p.Id) + query, args := BatchQuery(preQuery, "", valueStr, ",", "", len(p.Doc.Links), p.Doc.Links) + if _, err := p.tx.Exec(query + "\n ON CONFLICT DO NOTHING", args...); err != nil { return err } @@ -251,14 +250,13 @@ func (p PutMany) links(ctx context.Context) error { continue } - preQuery := fmt.Sprintf(` - INSERT INTO Links (referencedId, refererId) - SELECT id, %d - FROM Documents - WHERE path IN - `, id) - query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(doc.Links), doc.Links) - if _, err := tx.Exec(query, args...); err != nil { + preQuery := ` + INSERT INTO Links (docId, link) + VALUES + ` + valueStr := fmt.Sprintf("(%d,?)", id) + query, args := BatchQuery(preQuery, "", valueStr, ",", "", len(doc.Links), doc.Links) + if _, err := tx.Exec(query +"\n ON CONFLICT DO NOTHING", args...); err != nil { tx.Rollback() return err } diff --git a/pkg/data/put_test.go b/pkg/data/put_test.go index 7e5ad38..a97187b 100644 --- a/pkg/data/put_test.go +++ b/pkg/data/put_test.go @@ -1,7 +1,6 @@ package data_test import ( - "context" "database/sql" "errors" "testing" @@ -31,13 +30,14 @@ func TestPut_Insert(t *testing.T) { FileTime: time.Unix(2, 0), Authors: []string{"jp"}, Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, }, nil, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - ctx := context.Background() + ctx := t.Context() db := tt.newDb(t) defer db.Close() @@ -68,31 +68,76 @@ func TestPut_Insert(t *testing.T) { func TestPutMany_Insert(t *testing.T) { tests := []struct { - name string // description of this test case - // Named input parameters for receiver constructor. - db *sql.DB + name string + newDb func(t *testing.T) *sql.DB documents map[string]*index.Document - wantErr bool + wantErr error }{ - // TODO: Add test cases. + { + name: "insert on empty", + newDb: func(t *testing.T) *sql.DB { + t.Helper() + return data.NewMemDB() + }, + documents: map[string]*index.Document{ + "/file": { + Path: "/file", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, + }, + "/file2": { + Path: "/file2", + Title: "A different file", + Date: time.Unix(3, 0), + FileTime: time.Unix(4, 0), + Authors: []string{"pj"}, + Tags: []string{"apple", "pear", "peach"}, + Links: []string{"a very useful link"}, + }, + }, + wantErr: nil, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - p, err := data.NewPutMany(tt.db, tt.documents) + db := tt.newDb(t) + p, err := data.NewPutMany(db, tt.documents) if err != nil { t.Fatalf("could not construct receiver type: %v", err) } - gotErr := p.Insert(context.Background()) - if gotErr != nil { - if !tt.wantErr { - t.Errorf("Insert() failed: %v", gotErr) - } + + gotErr := p.Insert(t.Context()) + if !errors.Is(gotErr, tt.wantErr) { + t.Fatalf("Recieved unexpected error, got %v want %v", gotErr, tt.wantErr) + } else if err != nil { return } - if tt.wantErr { - t.Fatal("Insert() succeeded unexpectedly") + + f := data.FillMany{Db: db} + gotDocs, err := f.Get(t.Context()) + if err != nil { + t.Fatal("Error while retrieving documents for comparison:", err) + } + + wantLen, gotLen := len(tt.documents), len(gotDocs) + if wantLen != gotLen { + t.Fatalf("Recieved differnt number of documents than expected: got %d, want %d", gotLen, wantLen) + } + + for path, wantDoc := range tt.documents { + gotDoc, ok := gotDocs[path] + if !ok { + t.Errorf("Wanted doc with path %s but did not recieve it", path) + } + + if !wantDoc.Equal(*gotDoc) { + t.Errorf("Difference betwen docs!\ngot: %+v\nwant: %+v", gotDoc, wantDoc) + } } }) } } - diff --git a/pkg/index/index.go b/pkg/index/index.go index d49636f..13c4f45 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -1,11 +1,13 @@ package index import ( + "bytes" "errors" "fmt" "io" "log/slog" "os" + "regexp" "slices" "strings" "sync" @@ -17,6 +19,7 @@ import ( ) var ErrHeaderParse error = errors.New("Unable to parse YAML header") +var linkRegex *regexp.Regexp type Document struct { Path string `yaml:"-" json:"path"` @@ -32,6 +35,7 @@ type Document struct { type ParseOpts struct { ParseMeta bool + ParseLinks bool IgnoreDateError bool IgnoreMetaError bool } @@ -345,12 +349,31 @@ func ParseDoc(path string, opts ParseOpts) (*Document, error) { if pos < 0 { return nil, fmt.Errorf("Can't find YAML header in %s", path) } + header := io.NewSectionReader(f, 0, pos) - if err := yaml.NewDecoder(io.LimitReader(f, pos)).Decode(doc); err != nil { + if err := yaml.NewDecoder(header).Decode(doc); err != nil { return nil, errors.Join(ErrHeaderParse, err) } - // TODO: read the rest of the file to find links + if opts.ParseLinks { + var buf bytes.Buffer + f.Seek(pos, io.SeekStart) + if _, err := io.Copy(&buf, f); err != nil { + return nil, err + } + + matches := linkRegex.FindAllSubmatch(buf.Bytes(), -1) + for _, match := range matches { + if len(match) != 2 { + panic("Link parsing regex returned unexpected number of matches") + } + link := string(match[1]) + if len(link) > 0 && len(strings.TrimSpace(link)) > 0 { + doc.Links = append(doc.Links, link) + } + } + } + return doc, nil } @@ -396,3 +419,7 @@ func ParseDocs(paths []string, numWorkers uint, opts ParseOpts) map[string]*Docu return docs } + +func init() { + linkRegex = regexp.MustCompile(`\[.*\]\((.*)\)`) +} diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go index 0a3239d..69600c2 100644 --- a/pkg/index/index_test.go +++ b/pkg/index/index_test.go @@ -251,6 +251,31 @@ func TestIndex_ParseOne(t *testing.T) { nil, }, { + "links", + func(t *testing.T) string { + f, path := newTestFile(t, "links") + defer f.Close() + + f.WriteString("---\n") + f.WriteString("title: Link test\n") + f.WriteString("---\n") + f.WriteString(` + Here are some words in a *markdown* file. + In this sentence there is a valid [hyperlink](https://jpappel.xyz). + But in this sentence, the [link]() should not get parsed. + The same is true for the [link]( ) in this sentence. + `) + + return path + }, + index.ParseOpts{ParseLinks: true}, + &index.Document{ + Title: "Link test", + Links: []string{"https://jpappel.xyz"}, + }, + nil, + }, + { "bad tags", func(t *testing.T) string { f, path := newTestFile(t, "badtags") |
