From ba68130862dc004a7a1b50d99fc70872d39fd065 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Wed, 2 Jul 2025 00:06:10 -0400 Subject: Add link parsing --- pkg/data/db.go | 20 ++++++++++---- pkg/data/get.go | 18 ++++++------ pkg/data/get_test.go | 22 +++++++++++++-- pkg/data/put.go | 30 ++++++++++---------- pkg/data/put_test.go | 77 +++++++++++++++++++++++++++++++++++++++++----------- 5 files changed, 116 insertions(+), 51 deletions(-) (limited to 'pkg/data') diff --git a/pkg/data/db.go b/pkg/data/db.go index 24a8793..9c8c5b1 100644 --- a/pkg/data/db.go +++ b/pkg/data/db.go @@ -19,7 +19,7 @@ type Query struct { // // output is in the form // -// <(n-1)*(> +// <(n-1)*(> func BatchQuery[T any](query string, start string, val string, delim string, stop string, n int, baseArgs []T) (string, []any) { args := make([]any, len(baseArgs)) for i, arg := range baseArgs { @@ -139,10 +139,10 @@ func createSchema(db *sql.DB) error { _, err = tx.Exec(` CREATE TABLE IF NOT EXISTS Links( - referencedId INT, - refererId INT, - FOREIGN KEY (referencedId) REFERENCES Documents(id), - FOREIGN KEY (refererId) REFERENCES Documents(id) + docId INT, + link TEXT NOT NULL, + FOREIGN KEY (docId) REFERENCES Documents(id), + UNIQUE(docId, link) )`) if err != nil { tx.Rollback() @@ -198,6 +198,12 @@ func createSchema(db *sql.DB) error { return err } + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_links_link ON Links(link)") + if err != nil { + tx.Rollback() + return err + } + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doctags_tagid ON DocumentTags (tagId)") if err != nil { tx.Rollback() @@ -214,13 +220,15 @@ func createSchema(db *sql.DB) error { d.fileTime, d.meta, COALESCE(a.name, al.alias) AS author, - t.name AS tag + t.name AS tag, + l.link FROM Documents d LEFT JOIN DocumentAuthors da ON d.id = da.docId LEFT JOIN Authors a ON da.authorId = a.id LEFT JOIN Aliases al ON a.id = al.authorId LEFT JOIN DocumentTags dt ON d.id = dt.docId LEFT JOIN Tags t ON dt.tagId = t.id + LEFT JOIN Links l ON d.id = l.docId `) if err != nil { tx.Rollback() diff --git a/pkg/data/get.go b/pkg/data/get.go index 09d4587..8dafb24 100644 --- a/pkg/data/get.go +++ b/pkg/data/get.go @@ -108,6 +108,8 @@ func (f *FillMany) documents(ctx context.Context, rows *sql.Rows) error { return err } defer rows.Close() + } else { + // TODO: check if rows.ColumnTypes() matches expected } var id int @@ -273,11 +275,9 @@ func (f FillMany) tags(ctx context.Context) error { func (f Fill) links(ctx context.Context) error { rows, err := f.Db.QueryContext(ctx, ` - SELECT path - FROM Documents - JOIN Links - ON Links.referencedId = Documents.id - WHERE Links.refererId = ? + SELECT link + FROM Links + WHERE Links.docId = ? `, f.id) if err != nil { return err @@ -299,11 +299,9 @@ func (f Fill) links(ctx context.Context) error { func (f FillMany) links(ctx context.Context) error { stmt, err := f.Db.PrepareContext(ctx, ` - SELECT path - FROM Documents - JOIN Links - ON Links.referencedId = Documents.id - WHERE Links.refererId = ? + SELECT link + FROM Links + WHERE Links.docId = ? `) if err != nil { return err diff --git a/pkg/data/get_test.go b/pkg/data/get_test.go index 14d6920..22e5af2 100644 --- a/pkg/data/get_test.go +++ b/pkg/data/get_test.go @@ -1,7 +1,6 @@ package data_test import ( - "context" "database/sql" "errors" "testing" @@ -57,6 +56,13 @@ func singleDoc(t *testing.T) *sql.DB { t.Fatal("err inserting docTags:", err) } + if _, err := db.Exec(` + INSERT INTO Links (docId, link) + VALUES (1, 'link1'), (1, 'link2') + `); err != nil { + t.Fatal("err inserting links:", err) + } + return db } @@ -106,6 +112,13 @@ func multiDoc(t *testing.T) *sql.DB { t.Fatal("err inserting docTags:", err) } + if _, err := db.Exec(` + INSERT INTO Links (docId, link) + VALUES (1, '/home'), (2, 'rsync://rsync.kernel.org/pub/') + `); err != nil { + t.Fatal("err inserting links:", err) + } + return db } @@ -129,6 +142,7 @@ func TestFill_Get(t *testing.T) { FileTime: time.Unix(2, 0), Authors: []string{"jp"}, Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link1", "link2"}, }, nil, }, @@ -136,7 +150,7 @@ func TestFill_Get(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { f := tt.newFill(t) - got, gotErr := f.Get(context.Background()) + got, gotErr := f.Get(t.Context()) if !errors.Is(gotErr, tt.wantErr) { t.Fatalf("Recieved unexpected error: got %v want %v", gotErr, tt.wantErr) @@ -172,6 +186,7 @@ func TestFillMany_Get(t *testing.T) { FileTime: time.Unix(2, 0), Authors: []string{"jp"}, Tags: []string{"foo", "baz"}, + Links: []string{"/home"}, }, "README.md": { Path: "README.md", @@ -180,6 +195,7 @@ func TestFillMany_Get(t *testing.T) { FileTime: time.Unix(4, 0), Authors: []string{"anonymous", "jp"}, Tags: []string{"bar", "oof"}, + Links: []string{"rsync://rsync.kernel.org/pub/"}, }, }, nil, @@ -187,7 +203,7 @@ func TestFillMany_Get(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - ctx := context.Background() + ctx := t.Context() f := tt.newFillMany(t) got, gotErr := f.Get(ctx) diff --git a/pkg/data/put.go b/pkg/data/put.go index e0185ae..0d49e60 100644 --- a/pkg/data/put.go +++ b/pkg/data/put.go @@ -226,14 +226,13 @@ func (p Put) links() error { return nil } - preQuery := fmt.Sprintf(` - INSERT INTO Links (referencedId, refererId) - SELECT id, %d - FROM Documents - WHERE path IN - `, p.Id) - query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(p.Doc.Links), p.Doc.Links) - if _, err := p.tx.Exec(query, args...); err != nil { + preQuery := ` + INSERT INTO Links (docId, link) + VALUES + ` + valueStr := fmt.Sprintf("(%d,?)", p.Id) + query, args := BatchQuery(preQuery, "", valueStr, ",", "", len(p.Doc.Links), p.Doc.Links) + if _, err := p.tx.Exec(query + "\n ON CONFLICT DO NOTHING", args...); err != nil { return err } @@ -251,14 +250,13 @@ func (p PutMany) links(ctx context.Context) error { continue } - preQuery := fmt.Sprintf(` - INSERT INTO Links (referencedId, refererId) - SELECT id, %d - FROM Documents - WHERE path IN - `, id) - query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(doc.Links), doc.Links) - if _, err := tx.Exec(query, args...); err != nil { + preQuery := ` + INSERT INTO Links (docId, link) + VALUES + ` + valueStr := fmt.Sprintf("(%d,?)", id) + query, args := BatchQuery(preQuery, "", valueStr, ",", "", len(doc.Links), doc.Links) + if _, err := tx.Exec(query +"\n ON CONFLICT DO NOTHING", args...); err != nil { tx.Rollback() return err } diff --git a/pkg/data/put_test.go b/pkg/data/put_test.go index 7e5ad38..a97187b 100644 --- a/pkg/data/put_test.go +++ b/pkg/data/put_test.go @@ -1,7 +1,6 @@ package data_test import ( - "context" "database/sql" "errors" "testing" @@ -31,13 +30,14 @@ func TestPut_Insert(t *testing.T) { FileTime: time.Unix(2, 0), Authors: []string{"jp"}, Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, }, nil, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - ctx := context.Background() + ctx := t.Context() db := tt.newDb(t) defer db.Close() @@ -68,31 +68,76 @@ func TestPut_Insert(t *testing.T) { func TestPutMany_Insert(t *testing.T) { tests := []struct { - name string // description of this test case - // Named input parameters for receiver constructor. - db *sql.DB + name string + newDb func(t *testing.T) *sql.DB documents map[string]*index.Document - wantErr bool + wantErr error }{ - // TODO: Add test cases. + { + name: "insert on empty", + newDb: func(t *testing.T) *sql.DB { + t.Helper() + return data.NewMemDB() + }, + documents: map[string]*index.Document{ + "/file": { + Path: "/file", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, + }, + "/file2": { + Path: "/file2", + Title: "A different file", + Date: time.Unix(3, 0), + FileTime: time.Unix(4, 0), + Authors: []string{"pj"}, + Tags: []string{"apple", "pear", "peach"}, + Links: []string{"a very useful link"}, + }, + }, + wantErr: nil, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - p, err := data.NewPutMany(tt.db, tt.documents) + db := tt.newDb(t) + p, err := data.NewPutMany(db, tt.documents) if err != nil { t.Fatalf("could not construct receiver type: %v", err) } - gotErr := p.Insert(context.Background()) - if gotErr != nil { - if !tt.wantErr { - t.Errorf("Insert() failed: %v", gotErr) - } + + gotErr := p.Insert(t.Context()) + if !errors.Is(gotErr, tt.wantErr) { + t.Fatalf("Recieved unexpected error, got %v want %v", gotErr, tt.wantErr) + } else if err != nil { return } - if tt.wantErr { - t.Fatal("Insert() succeeded unexpectedly") + + f := data.FillMany{Db: db} + gotDocs, err := f.Get(t.Context()) + if err != nil { + t.Fatal("Error while retrieving documents for comparison:", err) + } + + wantLen, gotLen := len(tt.documents), len(gotDocs) + if wantLen != gotLen { + t.Fatalf("Recieved differnt number of documents than expected: got %d, want %d", gotLen, wantLen) + } + + for path, wantDoc := range tt.documents { + gotDoc, ok := gotDocs[path] + if !ok { + t.Errorf("Wanted doc with path %s but did not recieve it", path) + } + + if !wantDoc.Equal(*gotDoc) { + t.Errorf("Difference betwen docs!\ngot: %+v\nwant: %+v", gotDoc, wantDoc) + } } }) } } - -- cgit v1.2.3