diff options
| author | JP Appel <jeanpierre.appel01@gmail.com> | 2025-04-27 00:49:27 -0400 |
|---|---|---|
| committer | JP Appel <jeanpierre.appel01@gmail.com> | 2025-04-27 00:49:27 -0400 |
| commit | 34b8d8ff1f9d65c08a9156d72f08cf548183c6f4 (patch) | |
| tree | a00fa0410a7bcde125a37b50b3a4956c838fa569 /pkg/data | |
| parent | 42527fdb0aca0d30652bb3052b80ab75ab057572 (diff) | |
Large commit; many features
Diffstat (limited to 'pkg/data')
| -rw-r--r-- | pkg/data/db.go | 259 | ||||
| -rw-r--r-- | pkg/data/db_test.go | 65 | ||||
| -rw-r--r-- | pkg/data/get.go | 330 | ||||
| -rw-r--r-- | pkg/data/get_test.go | 217 | ||||
| -rw-r--r-- | pkg/data/put.go | 384 | ||||
| -rw-r--r-- | pkg/data/put_test.go | 98 |
6 files changed, 1353 insertions, 0 deletions
diff --git a/pkg/data/db.go b/pkg/data/db.go new file mode 100644 index 0000000..fcf56f9 --- /dev/null +++ b/pkg/data/db.go @@ -0,0 +1,259 @@ +package data + +import ( + "context" + "database/sql" + "strings" + + "github.com/jpappel/atlas/pkg/index" + _ "github.com/mattn/go-sqlite3" +) + +type Query struct { + db *sql.DB +} + +// Append n copies of val to query +// +// output is in the form +// +// <query> <start><(n-1)*(<val><delim)>><val><delim><stop> +func BatchQuery[T any](query string, start string, val string, delim string, stop string, n int, baseArgs []T) (string, []any) { + args := make([]any, len(baseArgs)) + for i, arg := range baseArgs { + args[i] = arg + } + + b := strings.Builder{} + b.Grow(len(query) + 1 + len(start) + n*len(val) + ((n - 1) * len(delim)) + len(stop)) + + b.WriteString(query) + b.WriteRune(' ') + b.WriteString(start) + for range n - 1 { + b.WriteString(val) + b.WriteString(delim) + } + b.WriteString(val) + b.WriteString(stop) + + return b.String(), args +} + +func NewQuery(filename string) *Query { + query := &Query{NewDB(filename)} + return query +} + +func NewDB(filename string) *sql.DB { + connStr := "file:" + filename + "?_fk=true&_journal=WAL" + db, err := sql.Open("sqlite3", connStr) + if err != nil { + panic(err) + } + + if err := createSchema(db); err != nil { + panic(err) + } + + return db +} + +func NewMemDB() *sql.DB { + db, err := sql.Open("sqlite3", ":memory:?_fk=true") + if err != nil { + panic(err) + } + + if err := createSchema(db); err != nil { + panic(err) + } + + return db +} + +func createSchema(db *sql.DB) error { + ctx := context.TODO() + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return err + } + defer tx.Commit() + + _, err = tx.Exec(` + CREATE TABLE IF NOT EXISTS Indexes( + root TEXT NOT NULL, + followSym DATE + )`) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TABLE IF NOT EXISTS Documents( + id INTEGER PRIMARY KEY, + path TEXT UNIQUE NOT NULL, + title TEXT, + date INT, + fileTime INT, + meta BLOB + )`) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TABLE IF NOT EXISTS Authors( + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL + )`) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TABLE IF NOT EXISTS Aliases( + authorId INT NOT NULL, + alias TEXT UNIQUE NOT NULL, + FOREIGN KEY (authorId) REFERENCES Authors(id) + )`) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TABLE IF NOT EXISTS Tags( + id INTEGER PRIMARY KEY, + name TEXT UNIQUE NOT NULL + )`) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TABLE IF NOT EXISTS Links( + referencedId INT, + refererId INT, + FOREIGN KEY (referencedId) REFERENCES Documents(id), + FOREIGN KEY (refererId) REFERENCES Documents(id) + )`) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TABLE IF NOT EXISTS DocumentAuthors( + docId INT NOT NULL, + authorId INT NOT NULL, + FOREIGN KEY (docId) REFERENCES Documents(id), + FOREIGN KEY (authorId) REFERENCES Authors(id) + )`) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TABLE IF NOT EXISTS DocumentTags( + docId INT NOT NULL, + tagId INT NOT NULL, + FOREIGN KEY (docId) REFERENCES Documents(id), + FOREIGN KEY (tagId) REFERENCES Tags(id), + UNIQUE(docId, tagId) + )`) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doc_dates ON Documents (date)") + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doc_titles ON Documents (title)") + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_aliases_alias ON Aliases(alias)") + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_aliases_authorId ON Aliases(authorId)") + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doctags_tagid ON DocumentTags (tagId)") + if err != nil { + tx.Rollback() + return err + } + + return nil +} + +func (q Query) Close() error { + return q.db.Close() +} + +// Create an index +func (q Query) Get(indexRoot string) (*index.Index, error) { + ctx := context.TODO() + + docs, err := FillMany{Db: q.db}.Get(ctx) + if err != nil { + return nil, err + } + + idx := &index.Index{ + Root: indexRoot, + Documents: docs, + Filters: index.DefaultFilters(), + } + + return idx, nil +} + +// Write from index to database +func (q Query) Put(idx index.Index) error { + ctx := context.TODO() + + insertCtx, cancel := context.WithCancelCause(ctx) + defer cancel(nil) + + p, err := NewPutMany(q.db, idx.Documents) + if err != nil { + return err + } + + if err := p.Insert(insertCtx); err != nil { + return err + } + + return nil +} + +// Update database with values from index +func (q Query) Update(idx index.Index) error { + // TODO: implement + return nil +} + +func (q Query) GetDocument(path string) (*index.Document, error) { + ctx := context.TODO() + f := Fill{Path: path, Db: q.db} + return f.Get(ctx) +} diff --git a/pkg/data/db_test.go b/pkg/data/db_test.go new file mode 100644 index 0000000..b90a943 --- /dev/null +++ b/pkg/data/db_test.go @@ -0,0 +1,65 @@ +package data_test + +import ( + "slices" + "testing" + + "github.com/jpappel/atlas/pkg/data" +) + +func TestBatchQuery(t *testing.T) { + tests := []struct { + name string + query string + start string + val string + delim string + stop string + n int + args []int + wantQuery string + wantArgs []any + }{ + { + "1 val group", + "INSERT INTO Foo VALUES", + "(", + "?", + ",", + ")", + 5, + []int{1, 2, 3, 4, 5}, + "INSERT INTO Foo VALUES (?,?,?,?,?)", + []any{1, 2, 3, 4, 5}, + }, + { + "multiple val groups", + "INSERT INTO Bar VALUES", + "", + "(?,?)", + ",", + "", + 2, + []int{1, 2, 3, 4}, + "INSERT INTO Bar VALUES (?,?),(?,?)", + []any{1, 2, 3, 4}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotQuery, gotArgs := data.BatchQuery(tt.query, tt.start, tt.val, tt.delim, tt.stop, tt.n, tt.args) + if gotQuery != tt.wantQuery { + t.Error("Got different query than wanted") + t.Log("Wanted:\n" + tt.wantQuery) + t.Log("Got:\n" + gotQuery) + } + + if !slices.Equal(tt.wantArgs, gotArgs) { + t.Error("Got different args than wanted") + t.Logf("Wanted:\t%v", tt.wantArgs) + t.Logf("Got:\t%v", gotArgs) + } + }) + } +} diff --git a/pkg/data/get.go b/pkg/data/get.go new file mode 100644 index 0000000..7cae03a --- /dev/null +++ b/pkg/data/get.go @@ -0,0 +1,330 @@ +package data + +import ( + "context" + "database/sql" + "time" + + "github.com/jpappel/atlas/pkg/index" +) + +// TODO: rename struct +// +// Use to build a document from a database connection +type Fill struct { + Path string + Db *sql.DB + id int + doc *index.Document +} + +// TODO: rename struct +// +// Use to build documents and aliases from a database connection +type FillMany struct { + docs map[string]*index.Document + ids map[string]int + Db *sql.DB +} + +func (f Fill) Get(ctx context.Context) (*index.Document, error) { + f.doc = &index.Document{Path: f.Path} + if err := f.document(ctx); err != nil { + return nil, err + } + if err := f.tags(ctx); err != nil { + return nil, err + } + if err := f.authors(ctx); err != nil { + return nil, err + } + if err := f.links(ctx); err != nil { + return nil, err + } + + return f.doc, nil +} + +func (f FillMany) Get(ctx context.Context) (map[string]*index.Document, error) { + f.docs = make(map[string]*index.Document) + f.ids = make(map[string]int) + + if err := f.documents(ctx); err != nil { + return nil, err + } + if err := f.tags(ctx); err != nil { + return nil, err + } + if err := f.links(ctx); err != nil { + return nil, err + } + if err := f.authors(ctx); err != nil { + return nil, err + } + + return f.docs, nil +} + +func (f *Fill) document(ctx context.Context) error { + var title sql.NullString + var dateEpoch sql.NullInt64 + var fileTimeEpoch sql.NullInt64 + var meta sql.NullString + + row := f.Db.QueryRowContext(ctx, ` + SELECT id, title, date, fileTime, meta + FROM Documents + WHERE path = ? + `, f.Path) + if err := row.Scan(&f.id, &title, &dateEpoch, &fileTimeEpoch, &meta); err != nil { + return err + } + + if title.Valid { + f.doc.Title = title.String + } + if dateEpoch.Valid { + f.doc.Date = time.Unix(dateEpoch.Int64, 0) + } + if fileTimeEpoch.Valid { + f.doc.FileTime = time.Unix(fileTimeEpoch.Int64, 0) + } + if meta.Valid { + f.doc.OtherMeta = meta.String + } + return nil +} + +func (f *FillMany) documents(ctx context.Context) error { + rows, err := f.Db.QueryContext(ctx, ` + SELECT id, path, title, date, fileTime, meta + FROM Documents + `) + if err != nil { + return err + } + defer rows.Close() + + var id int + var docPath string + var title, meta sql.NullString + var dateEpoch, filetimeEpoch sql.NullInt64 + + for rows.Next() { + if err := rows.Scan(&id, &docPath, &title, &dateEpoch, &filetimeEpoch, &meta); err != nil { + return err + } + + doc := &index.Document{ + Path: docPath, + } + + if title.Valid { + doc.Title = title.String + } + if dateEpoch.Valid { + doc.Date = time.Unix(dateEpoch.Int64, 0) + } + if filetimeEpoch.Valid { + doc.FileTime = time.Unix(filetimeEpoch.Int64, 0) + } + if meta.Valid { + doc.OtherMeta = meta.String + } + + f.docs[docPath] = doc + f.ids[docPath] = id + } + + return nil +} + +func (f Fill) authors(ctx context.Context) error { + rows, err := f.Db.QueryContext(ctx, ` + SELECT name + FROM Authors + JOIN DocumentAuthors + ON Authors.id = DocumentAuthors.authorId + WHERE docId = ? + `, f.id) + if err != nil { + return err + } + defer rows.Close() + + var name string + authors := make([]string, 0, 4) + for rows.Next() { + if err := rows.Scan(&name); err != nil { + return err + } + authors = append(authors, name) + } + + f.doc.Authors = authors + + return nil +} + +func (f FillMany) authors(ctx context.Context) error { + stmt, err := f.Db.PrepareContext(ctx, ` + SELECT name + FROM Authors + JOIN DocumentAuthors + ON Authors.id = DocumentAuthors.authorId + WHERE docId = ? + `) + if err != nil { + return err + } + defer stmt.Close() + + // PERF: parallelize + var name string + for path, id := range f.ids { + rows, err := stmt.QueryContext(ctx, id) + if err != nil { + return err + } + + doc := f.docs[path] + for rows.Next() { + if err := rows.Scan(&name); err != nil { + rows.Close() + return err + } + + doc.Authors = append(doc.Authors, name) + } + + rows.Close() + } + + return nil +} + +func (f Fill) tags(ctx context.Context) error { + rows, err := f.Db.QueryContext(ctx, ` + SELECT name + FROM Tags + JOIN DocumentTags + ON Tags.id = DocumentTags.tagId + WHERE docId = ? + `, f.id) + if err != nil { + panic(err) + } + defer rows.Close() + + var tag string + tags := make([]string, 0, 2) + for rows.Next() { + if err := rows.Scan(&tag); err != nil { + return err + } + tags = append(tags, tag) + } + + f.doc.Tags = tags + + return nil +} + +func (f FillMany) tags(ctx context.Context) error { + stmt, err := f.Db.PrepareContext(ctx, ` + SELECT name + FROM Tags + JOIN DocumentTags + ON Tags.id = DocumentTags.tagId + WHERE docId = ? + `) + if err != nil { + return err + } + defer stmt.Close() + + // PERF: parallelize + var tag string + for docPath, id := range f.ids { + rows, err := stmt.QueryContext(ctx, id) + if err != nil { + return err + } + + doc := f.docs[docPath] + for rows.Next() { + if err := rows.Scan(&tag); err != nil { + rows.Close() + return err + } + + doc.Tags = append(doc.Tags, tag) + } + + rows.Close() + } + + return nil +} + +func (f Fill) links(ctx context.Context) error { + rows, err := f.Db.QueryContext(ctx, ` + SELECT path + FROM Documents + JOIN Links + ON Links.referencedId = Documents.id + WHERE Links.refererId = ? + `, f.id) + if err != nil { + return err + } + defer rows.Close() + + var link string + links := make([]string, 0) + for rows.Next() { + if err := rows.Scan(&link); err != nil { + return err + } + links = append(links, link) + } + f.doc.Links = links + + return nil +} + +func (f FillMany) links(ctx context.Context) error { + stmt, err := f.Db.PrepareContext(ctx, ` + SELECT path + FROM Documents + JOIN Links + ON Links.referencedId = Documents.id + WHERE Links.refererId = ? + `) + if err != nil { + return err + } + defer stmt.Close() + + // PERF: parallelize + var linkPath string + for path, id := range f.ids { + rows, err := stmt.QueryContext(ctx, id) + if err != nil { + return err + } + + doc := f.docs[path] + for rows.Next() { + if err := rows.Scan(&linkPath); err != nil { + rows.Close() + return err + } + doc.Links = append(doc.Links, linkPath) + } + + rows.Close() + } + + return nil +} diff --git a/pkg/data/get_test.go b/pkg/data/get_test.go new file mode 100644 index 0000000..14d6920 --- /dev/null +++ b/pkg/data/get_test.go @@ -0,0 +1,217 @@ +package data_test + +import ( + "context" + "database/sql" + "errors" + "testing" + "time" + + "github.com/jpappel/atlas/pkg/data" + "github.com/jpappel/atlas/pkg/index" +) + +func singleDoc(t *testing.T) *sql.DB { + t.Helper() + db := data.NewMemDB() + + if _, err := db.Exec(` + INSERT INTO Documents (path, title, date, fileTime) + VALUES ("/file", "A file", 1, 2) + `); err != nil { + t.Fatal("err inserting doc:", err) + } + + if _, err := db.Exec(` + INSERT INTO Authors (name) + VALUES ("jp") + `); err != nil { + t.Fatal("err inserting author:", err) + } + + if _, err := db.Exec(` + INSERT INTO Aliases (authorId, alias) + VALUES (1,"pj"), (1,"JP") + `); err != nil { + t.Fatal("err inserting aliases:", err) + } + + if _, err := db.Exec(` + INSERT INTO Tags (name) + VALUES ("foo"), ("bar"), ("baz"), ("oof") + `); err != nil { + t.Fatal("err inserting tags:", err) + } + + if _, err := db.Exec(` + INSERT INTO DocumentAuthors (docId, authorId) + VALUES (1, 1) + `); err != nil { + t.Fatal("err inserting docAuthors:", err) + } + + if _, err := db.Exec(` + INSERT INTO DocumentTags (docId, tagId) + VALUES (1,1), (1,2), (1,3), (1,4) + `); err != nil { + t.Fatal("err inserting docTags:", err) + } + + return db +} + +func multiDoc(t *testing.T) *sql.DB { + t.Helper() + db := data.NewMemDB() + + if _, err := db.Exec(` + INSERT INTO Documents (path, title, date, fileTime) + VALUES ("/notes/anote.md", "A note", 1, 2), ("README.md", "read this file!", 3, 4) + `); err != nil { + t.Fatal("err inserting doc:", err) + } + + if _, err := db.Exec(` + INSERT INTO Authors (name) + VALUES ("jp"), ("anonymous") + `); err != nil { + t.Fatal("err inserting author:", err) + } + + if _, err := db.Exec(` + INSERT INTO Aliases (authorId, alias) + VALUES (1,"pj"), (1,"JP") + `); err != nil { + t.Fatal("err inserting aliases:", err) + } + + if _, err := db.Exec(` + INSERT INTO Tags (name) + VALUES ("foo"), ("bar"), ("baz"), ("oof") + `); err != nil { + t.Fatal("err inserting tags:", err) + } + + if _, err := db.Exec(` + INSERT INTO DocumentAuthors (docId, authorId) + VALUES (1, 1), (2, 2), (2, 1) + `); err != nil { + t.Fatal("err inserting docAuthors:", err) + } + + if _, err := db.Exec(` + INSERT INTO DocumentTags (docId, tagId) + VALUES (1,1), (2,2), (1,3), (2,4) + `); err != nil { + t.Fatal("err inserting docTags:", err) + } + + return db +} + +func TestFill_Get(t *testing.T) { + tests := []struct { + name string + newFill func(t *testing.T) data.Fill + want index.Document + wantErr error + }{ + { + "single doc", + func(t *testing.T) data.Fill { + t.Helper() + return data.Fill{Path: "/file", Db: singleDoc(t)} + }, + index.Document{ + Path: "/file", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + }, + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + f := tt.newFill(t) + got, gotErr := f.Get(context.Background()) + + if !errors.Is(gotErr, tt.wantErr) { + t.Fatalf("Recieved unexpected error: got %v want %v", gotErr, tt.wantErr) + } else if gotErr != nil { + return + } + + if !got.Equal(tt.want) { + t.Errorf("Get() = %+v\nWant %+v", got, tt.want) + } + }) + } +} + +func TestFillMany_Get(t *testing.T) { + tests := []struct { + name string + newFillMany func(t *testing.T) data.FillMany + want map[string]*index.Document + wantErr error + }{ + { + "multi doc", + func(t *testing.T) data.FillMany { + t.Helper() + return data.FillMany{Db: multiDoc(t)} + }, + map[string]*index.Document{ + "/notes/anote.md": { + Path: "/notes/anote.md", + Title: "A note", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "baz"}, + }, + "README.md": { + Path: "README.md", + Title: "read this file!", + Date: time.Unix(3, 0), + FileTime: time.Unix(4, 0), + Authors: []string{"anonymous", "jp"}, + Tags: []string{"bar", "oof"}, + }, + }, + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + f := tt.newFillMany(t) + + got, gotErr := f.Get(ctx) + if !errors.Is(gotErr, tt.wantErr) { + t.Fatalf("Recieved unexpected error: got %v want %v", gotErr, tt.wantErr) + } else if gotErr != nil { + return + } + + if len(tt.want) != len(got) { + t.Errorf("Recieved incorrect amount of documents: got %d want %d", len(got), len(tt.want)) + } + + for path, wantDoc := range tt.want { + gotDoc, ok := got[path] + if !ok { + t.Errorf("Can't find %s in recieved docs", path) + continue + } + + if !gotDoc.Equal(*wantDoc) { + t.Errorf("%s not equal %+v\nWant %+v", path, gotDoc, wantDoc) + } + } + }) + } +} diff --git a/pkg/data/put.go b/pkg/data/put.go new file mode 100644 index 0000000..f3fe4b6 --- /dev/null +++ b/pkg/data/put.go @@ -0,0 +1,384 @@ +package data + +import ( + "context" + "database/sql" + "fmt" + + "github.com/jpappel/atlas/pkg/index" +) + +// TODO: rename struct +type Put struct { + Id int64 + Doc index.Document + tx *sql.Tx +} + +// TODO: rename struct +type PutMany struct { + Docs map[int64]*index.Document + pathDocs map[string]*index.Document + db *sql.DB +} + +func NewPut(ctx context.Context, db *sql.DB, doc index.Document) (Put, error) { + tx, err := db.BeginTx(ctx, nil) + if err != nil { + return Put{}, nil + } + p := Put{Doc: doc, tx: tx} + return p, nil +} + +func NewPutMany(db *sql.DB, documents map[string]*index.Document) (PutMany, error) { + docs := make(map[int64]*index.Document, len(documents)) + p := PutMany{ + Docs: docs, + pathDocs: documents, + db: db, + } + return p, nil +} + +func (p Put) Insert() error { + if err := p.document(); err != nil { + p.tx.Rollback() + return err + } + + if err := p.tags(); err != nil { + p.tx.Rollback() + return err + } + + if err := p.links(); err != nil { + p.tx.Rollback() + return err + } + + if err := p.authors(); err != nil { + p.tx.Rollback() + return err + } + + return p.tx.Commit() +} + +func (p PutMany) Insert(ctx context.Context) error { + if err := p.documents(ctx); err != nil { + return err + } + + if err := p.tags(ctx); err != nil { + return err + } + + if err := p.links(ctx); err != nil { + return err + } + + if err := p.authors(ctx); err != nil { + return err + } + + return nil +} + +func (p *Put) document() error { + title := sql.NullString{String: p.Doc.Title, Valid: p.Doc.Title != ""} + + dateUnix := p.Doc.Date.Unix() + date := sql.NullInt64{Int64: dateUnix, Valid: dateUnix != 0} + + filetimeUnix := p.Doc.FileTime.Unix() + filetime := sql.NullInt64{Int64: filetimeUnix, Valid: filetimeUnix != 0} + + meta := sql.NullString{String: p.Doc.OtherMeta, Valid: p.Doc.OtherMeta != ""} + + result, err := p.tx.Exec(` + INSERT INTO Documents(path, title, date, fileTime, meta) + VALUES (?,?,?,?,?) + `, p.Doc.Path, title, date, filetime, meta) + if err != nil { + return err + } + + p.Id, err = result.LastInsertId() + if err != nil { + return err + } + + return nil +} + +func (p *PutMany) documents(ctx context.Context) error { + stmt, err := p.db.PrepareContext(ctx, ` + INSERT INTO Documents(path, title, date, fileTime, meta) + VALUES (?,?,?,?,?) + `) + if err != nil { + return err + } + defer stmt.Close() + + tx, err := p.db.BeginTx(ctx, nil) + if err != nil { + return err + } + + txStmt := tx.StmtContext(ctx, stmt) + + // PERF: profile this, grabbing the docId here might save time by simpliyfying + // future inserts + for _, doc := range p.pathDocs { + title := sql.NullString{String: doc.Title, Valid: doc.Title != ""} + dateUnix := doc.Date.Unix() + date := sql.NullInt64{Int64: dateUnix, Valid: dateUnix != 0} + + filetimeUnix := doc.FileTime.Unix() + filetime := sql.NullInt64{Int64: filetimeUnix, Valid: filetimeUnix != 0} + + meta := sql.NullString{String: doc.OtherMeta, Valid: doc.OtherMeta != ""} + + res, err := txStmt.ExecContext(ctx, doc.Path, title, date, filetime, meta) + if err != nil { + tx.Rollback() + return err + } + + id, err := res.LastInsertId() + if err != nil { + tx.Rollback() + return err + } + + p.Docs[id] = doc + } + + return tx.Commit() +} + +func (p Put) tags() error { + query, args := BatchQuery("INSERT OR IGNORE INTO Tags (name) VALUES", "", "(?)", ",", "", len(p.Doc.Tags), p.Doc.Tags) + if _, err := p.tx.Exec(query, args...); err != nil { + return err + } + + preQuery := fmt.Sprintf(` + INSERT INTO DocumentTags + SELECT %d, Tags.id + FROM Tags + WHERE name IN + `, p.Id) + + query, args = BatchQuery(preQuery, "(", "?", ",", ")", len(p.Doc.Tags), p.Doc.Tags) + if _, err := p.tx.Exec(query, args...); err != nil { + return err + } + + return nil +} + +func (p PutMany) tags(ctx context.Context) error { + newTagStmt, err := p.db.PrepareContext(ctx, "INSERT OR IGNORE INTO Tags (name) VALUES (?)") + if err != nil { + return err + } + defer newTagStmt.Close() + + tx, err := p.db.BeginTx(ctx, nil) + if err != nil { + return err + } + + txNewTagStmt := tx.StmtContext(ctx, newTagStmt) + + for id, doc := range p.Docs { + for _, tag := range doc.Tags { + if _, err := txNewTagStmt.ExecContext(ctx, tag); err != nil { + tx.Rollback() + return err + } + } + + preQuery := fmt.Sprintf(` + INSERT INTO DocumentTags (docId, tagId) + SELECT %d, Tags.id + FROM Tags + WHERE name IN + `, id) + query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(doc.Tags), doc.Tags) + if _, err := tx.Exec(query, args); err != nil { + tx.Rollback() + return err + } + } + + return tx.Commit() +} + +func (p Put) links() error { + if len(p.Doc.Links) == 0 { + return nil + } + + preQuery := fmt.Sprintf(` + INSERT INTO Links (referencedId, refererId) + SELECT id, %d + FROM Documents + WHERE path IN + `, p.Id) + query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(p.Doc.Links), p.Doc.Links) + if _, err := p.tx.Exec(query, args...); err != nil { + return err + } + + return nil +} + +func (p PutMany) links(ctx context.Context) error { + tx, err := p.db.BeginTx(ctx, nil) + if err != nil { + return err + } + + for id, doc := range p.Docs { + preQuery := fmt.Sprintf(` + INSERT INTO Links (referencedId, refererId) + SELECT id, %d + FROM Documents + WHERE path IN + `, id) + query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(doc.Links), doc.Links) + if _, err := tx.Exec(query, args...); err != nil { + tx.Rollback() + return err + } + } + + return tx.Commit() +} + +func (p Put) authors() error { + // TODO: consider using temp table instead of cte + namesCTE, args := BatchQuery("WITH names(n) AS", + "( VALUES ", "(?)", ",", "),", len(p.Doc.Authors), p.Doc.Authors) + + newAuthorsQuery := namesCTE + ` + filtered_names AS ( + SELECT n + FROM names + LEFT JOIN Authors on Authors.name = n + LEFT JOIN Aliases on Aliases.alias = n + WHERE Authors.name IS NULL AND Aliases.alias IS NULL + ) + INSERT INTO Authors(name) + SELECT n FROM filtered_names + ` + if _, err := p.tx.Exec(newAuthorsQuery, args...); err != nil { + return err + } + + docAuthorsQuery := namesCTE + fmt.Sprintf(` + matched_authors AS ( + SELECT Authors.id AS author_id + FROM Authors + LEFT JOIN Aliases + ON Authors.id = Aliases.authorId + JOIN names + ON Authors.name = n OR Aliases.alias = n + ) + INSERT INTO DocumentAuthors(docId, authorId) + SELECT %d, author_id FROM matched_authors + `, p.Id) + if _, err := p.tx.Exec(docAuthorsQuery, args...); err != nil { + return err + } + + return nil +} + +func (p PutMany) authors(ctx context.Context) error { + tx, err := p.db.BeginTx(ctx, nil) + if err != nil { + return err + } + + _, err = p.db.Exec("CREATE TEMPORARY TABLE names (name TEXT UNIQUE NOT NULL)") + // _, err = tx.Exec("CREATE TEMPORARY TABLE names (name TEXT UNIQUE NOT NULL)") + if err != nil { + tx.Rollback() + return err + } + defer p.db.Exec("DROP TABLE IF EXISTS temp.names") + + nameStmt, err := p.db.PrepareContext(ctx, "INSERT OR IGNORE INTO temp.names VALUES (?)") + if err != nil { + return err + } + defer nameStmt.Close() + + txNameStmt := tx.StmtContext(ctx, nameStmt) + for _, doc := range p.Docs { + for _, name := range doc.Authors { + if _, err := txNameStmt.Exec(name); err != nil { + tx.Rollback() + return err + } + } + } + + newAuthorsQuery := ` + WITH new_names AS ( + SELECT name + FROM temp.names + LEFT JOIN Authors on Authors.name = temp.names.name + LEFT JOIN Aliases on Aliases.alias = tmep.names.name + WHERE Authors.name IS NULL AND Aliases.alias IS NULL + ) + INSERT INTO Authors(name) + SELECT name FROM new_names + ` + + if _, err := tx.Exec(newAuthorsQuery); err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TEMPORARY TABLE name_ids AS + SELECT names.name AS name, COALESCE(Authors.id, Aliases.authorId) AS authorId + FROM names + LEFT JOIN Authors ON temp.names.name = Authors.name + LEFT JOIN Aliases ON temp.names.name = Aliases.name + `) + if err != nil { + tx.Rollback() + return err + } + defer p.db.Exec("DROP TABLE IF EXISTS temp.name_ids") + + docAuthorsStmt, err := p.db.Prepare(` + INSERT INTO DocumentAuthors (docId, authorId) + SELECT ?, authorId + FROM temp.name_ids + WHERE name = ? + `) + if err != nil { + tx.Rollback() + return err + } + defer docAuthorsStmt.Close() + + for id, doc := range p.Docs { + for _, name := range doc.Authors { + if _, err := tx.Stmt(docAuthorsStmt).Exec(id, name); err != nil { + tx.Rollback() + return err + } + } + } + + return tx.Commit() +} diff --git a/pkg/data/put_test.go b/pkg/data/put_test.go new file mode 100644 index 0000000..7e5ad38 --- /dev/null +++ b/pkg/data/put_test.go @@ -0,0 +1,98 @@ +package data_test + +import ( + "context" + "database/sql" + "errors" + "testing" + "time" + + "github.com/jpappel/atlas/pkg/data" + "github.com/jpappel/atlas/pkg/index" +) + +func TestPut_Insert(t *testing.T) { + tests := []struct { + name string + newDb func(t *testing.T) *sql.DB + doc index.Document + wantErr error + }{ + { + "insert on empty", + func(t *testing.T) *sql.DB { + t.Helper() + return data.NewMemDB() + }, + index.Document{ + Path: "/file", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + }, + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + ctx := context.Background() + db := tt.newDb(t) + defer db.Close() + + p, err := data.NewPut(ctx, db, tt.doc) + if err != nil { + t.Fatalf("could not construct receiver type: %v", err) + } + + gotErr := p.Insert() + if !errors.Is(gotErr, tt.wantErr) { + t.Fatalf("Unexpected error on Insert():, want %v got %v", tt.wantErr, gotErr) + } else if gotErr != nil { + return + } + + f := data.Fill{Path: tt.doc.Path, Db: db} + gotDoc, err := f.Get(ctx) + if err != nil { + t.Fatal("Error while retrieving document for comparison:", err) + } + + if !gotDoc.Equal(tt.doc) { + t.Errorf("Retrieved doc is not stored doc!\nrecv: %+v\nsent: %+v", gotDoc, tt.doc) + } + }) + } +} + +func TestPutMany_Insert(t *testing.T) { + tests := []struct { + name string // description of this test case + // Named input parameters for receiver constructor. + db *sql.DB + documents map[string]*index.Document + wantErr bool + }{ + // TODO: Add test cases. + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + p, err := data.NewPutMany(tt.db, tt.documents) + if err != nil { + t.Fatalf("could not construct receiver type: %v", err) + } + gotErr := p.Insert(context.Background()) + if gotErr != nil { + if !tt.wantErr { + t.Errorf("Insert() failed: %v", gotErr) + } + return + } + if tt.wantErr { + t.Fatal("Insert() succeeded unexpectedly") + } + }) + } +} + |
