diff options
Diffstat (limited to 'pkg')
| -rw-r--r-- | pkg/data/db.go | 90 | ||||
| -rw-r--r-- | pkg/data/get.go | 2 | ||||
| -rw-r--r-- | pkg/data/put.go | 86 | ||||
| -rw-r--r-- | pkg/data/put_test.go | 12 | ||||
| -rw-r--r-- | pkg/data/update.go | 478 | ||||
| -rw-r--r-- | pkg/data/update_test.go | 266 |
6 files changed, 871 insertions, 63 deletions
diff --git a/pkg/data/db.go b/pkg/data/db.go index 9c8c5b1..a8d1fe3 100644 --- a/pkg/data/db.go +++ b/pkg/data/db.go @@ -120,7 +120,7 @@ func createSchema(db *sql.DB) error { CREATE TABLE IF NOT EXISTS Aliases( authorId INT NOT NULL, alias TEXT UNIQUE NOT NULL, - FOREIGN KEY (authorId) REFERENCES Authors(id) + FOREIGN KEY (authorId) REFERENCES Authors(id) ON DELETE CASCADE )`) if err != nil { tx.Rollback() @@ -141,7 +141,7 @@ func createSchema(db *sql.DB) error { CREATE TABLE IF NOT EXISTS Links( docId INT, link TEXT NOT NULL, - FOREIGN KEY (docId) REFERENCES Documents(id), + FOREIGN KEY (docId) REFERENCES Documents(id) ON DELETE CASCADE, UNIQUE(docId, link) )`) if err != nil { @@ -153,7 +153,7 @@ func createSchema(db *sql.DB) error { CREATE TABLE IF NOT EXISTS DocumentAuthors( docId INT NOT NULL, authorId INT NOT NULL, - FOREIGN KEY (docId) REFERENCES Documents(id), + FOREIGN KEY (docId) REFERENCES Documents(id) ON DELETE CASCADE, FOREIGN KEY (authorId) REFERENCES Authors(id) )`) if err != nil { @@ -165,7 +165,7 @@ func createSchema(db *sql.DB) error { CREATE TABLE IF NOT EXISTS DocumentTags( docId INT NOT NULL, tagId INT NOT NULL, - FOREIGN KEY (docId) REFERENCES Documents(id), + FOREIGN KEY (docId) REFERENCES Documents(id) ON DELETE CASCADE, FOREIGN KEY (tagId) REFERENCES Tags(id), UNIQUE(docId, tagId) )`) @@ -174,6 +174,12 @@ func createSchema(db *sql.DB) error { return err } + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doc_paths ON Documents (path)") + if err != nil { + tx.Rollback() + return err + } + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doc_dates ON Documents (date)") if err != nil { tx.Rollback() @@ -211,6 +217,34 @@ func createSchema(db *sql.DB) error { } _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_new_author + BEFORE INSERT ON Authors + BEGIN + SELECT CASE WHEN NEW.name IN (SELECT alias FROM Aliases) THEN + RAISE(IGNORE) + END; + END + `) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_new_alias + BEFORE INSERT ON Aliases + BEGIN + SELECT CASE WHEN NEW.alias IN (SELECT name FROM Authors) THEN + RAISE(IGNORE) + END; + END + `) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` CREATE VIEW IF NOT EXISTS Search AS SELECT d.id AS docId, @@ -235,6 +269,10 @@ func createSchema(db *sql.DB) error { return err } + if _, err = tx.Exec("PRAGMA OPTIMIZE"); err != nil { + return err + } + return nil } @@ -246,7 +284,8 @@ func (q Query) Close() error { func (q Query) Get(indexRoot string) (*index.Index, error) { ctx := context.TODO() - docs, err := FillMany{Db: q.db}.Get(ctx) + f := FillMany{Db: q.db} + docs, err := f.Get(ctx) if err != nil { return nil, err } @@ -264,22 +303,19 @@ func (q Query) Get(indexRoot string) (*index.Index, error) { func (q Query) Put(idx index.Index) error { ctx := context.TODO() - p, err := NewPutMany(q.db, idx.Documents) + p, err := NewPutMany(ctx, q.db, idx.Documents) if err != nil { return err } - if err := p.Insert(ctx); err != nil { - return err - } - - return nil + return p.Insert() } -// Update database with values from index +// Update database with values from index, removes entries for deleted files func (q Query) Update(idx index.Index) error { - // TODO: implement - return nil + ctx := context.TODO() + u := NewUpdateMany(q.db, idx.Documents) + return u.Update(ctx) } func (q Query) GetDocument(path string) (*index.Document, error) { @@ -288,6 +324,32 @@ func (q Query) GetDocument(path string) (*index.Document, error) { return f.Get(ctx) } +// Shrink database by removing unused authors, aliases, tags and VACUUM-ing +func (q Query) Tidy() error { + _, err := q.db.Exec(` + DELETE FROM Authors + WHERE id NOT IN ( + SELECT authorId FROM DocumentAuthors + ) + `) + if err != nil { + return err + } + + _, err = q.db.Exec(` + DELETE FROM Tags + WHERE id NOT IN ( + SELECT tagId FROM DocumentTags + ) + `) + if err != nil { + return err + } + + _, err = q.db.Exec("VACUUM") + return err +} + func (q Query) Execute(artifact query.CompilationArtifact) (map[string]*index.Document, error) { ctx := context.TODO() f := FillMany{ diff --git a/pkg/data/get.go b/pkg/data/get.go index 8dafb24..840ca53 100644 --- a/pkg/data/get.go +++ b/pkg/data/get.go @@ -45,7 +45,7 @@ func (f Fill) Get(ctx context.Context) (*index.Document, error) { return f.doc, nil } -func (f FillMany) Get(ctx context.Context) (map[string]*index.Document, error) { +func (f *FillMany) Get(ctx context.Context) (map[string]*index.Document, error) { f.docs = make(map[string]*index.Document) f.ids = make(map[string]int) diff --git a/pkg/data/put.go b/pkg/data/put.go index 0d49e60..98cb6ba 100644 --- a/pkg/data/put.go +++ b/pkg/data/put.go @@ -13,6 +13,7 @@ type Put struct { Id int64 Doc index.Document tx *sql.Tx + db *sql.DB } // TODO: rename struct @@ -20,28 +21,31 @@ type PutMany struct { Docs map[int64]*index.Document pathDocs map[string]*index.Document db *sql.DB + ctx context.Context } -func NewPut(ctx context.Context, db *sql.DB, doc index.Document) (Put, error) { - tx, err := db.BeginTx(ctx, nil) - if err != nil { - return Put{}, nil - } - p := Put{Doc: doc, tx: tx} - return p, nil +func NewPut(db *sql.DB, doc index.Document) Put { + return Put{Doc: doc, db: db} } -func NewPutMany(db *sql.DB, documents map[string]*index.Document) (PutMany, error) { +func NewPutMany(ctx context.Context, db *sql.DB, documents map[string]*index.Document) (PutMany, error) { docs := make(map[int64]*index.Document, len(documents)) p := PutMany{ Docs: docs, pathDocs: documents, db: db, + ctx: ctx, } return p, nil } -func (p Put) Insert() error { +func (p *Put) Insert(ctx context.Context) error { + var err error + p.tx, err = p.db.BeginTx(ctx, nil) + if err != nil { + return nil + } + if err := p.document(); err != nil { p.tx.Rollback() return err @@ -65,20 +69,20 @@ func (p Put) Insert() error { return p.tx.Commit() } -func (p PutMany) Insert(ctx context.Context) error { - if err := p.documents(ctx); err != nil { +func (p PutMany) Insert() error { + if err := p.documents(p.ctx); err != nil { return fmt.Errorf("failed to insert documents: %v", err) } - if err := p.tags(ctx); err != nil { + if err := p.tags(p.ctx); err != nil { return fmt.Errorf("failed to insert tags: %v", err) } - if err := p.links(ctx); err != nil { + if err := p.links(p.ctx); err != nil { return fmt.Errorf("failed to insert links: %v", err) } - if err := p.authors(ctx); err != nil { + if err := p.authors(p.ctx); err != nil { return fmt.Errorf("failed to insert authors: %v", err) } @@ -181,18 +185,17 @@ func (p Put) tags() error { } func (p PutMany) tags(ctx context.Context) error { - newTagStmt, err := p.db.PrepareContext(ctx, "INSERT OR IGNORE INTO Tags (name) VALUES (?)") + tx, err := p.db.BeginTx(ctx, nil) if err != nil { return err } - defer newTagStmt.Close() - tx, err := p.db.BeginTx(ctx, nil) + txNewTagStmt, err := tx.Prepare("INSERT OR IGNORE INTO Tags (name) VALUES (?)") if err != nil { + tx.Rollback() return err } - - txNewTagStmt := tx.StmtContext(ctx, newTagStmt) + defer txNewTagStmt.Close() for id, doc := range p.Docs { if len(doc.Tags) == 0 { @@ -232,7 +235,7 @@ func (p Put) links() error { ` valueStr := fmt.Sprintf("(%d,?)", p.Id) query, args := BatchQuery(preQuery, "", valueStr, ",", "", len(p.Doc.Links), p.Doc.Links) - if _, err := p.tx.Exec(query + "\n ON CONFLICT DO NOTHING", args...); err != nil { + if _, err := p.tx.Exec(query+"\n ON CONFLICT DO NOTHING", args...); err != nil { return err } @@ -256,7 +259,7 @@ func (p PutMany) links(ctx context.Context) error { ` valueStr := fmt.Sprintf("(%d,?)", id) query, args := BatchQuery(preQuery, "", valueStr, ",", "", len(doc.Links), doc.Links) - if _, err := tx.Exec(query +"\n ON CONFLICT DO NOTHING", args...); err != nil { + if _, err := tx.Exec(query+"\n ON CONFLICT DO NOTHING", args...); err != nil { tx.Rollback() return err } @@ -270,7 +273,7 @@ func (p Put) authors() error { return nil } - // TODO: consider using temp table instead of cte + // PERF: consider using temp table instead of cte namesCTE, args := BatchQuery("WITH names(n) AS", "( VALUES ", "(?)", ",", "),", len(p.Doc.Authors), p.Doc.Authors) @@ -278,9 +281,12 @@ func (p Put) authors() error { filtered_names AS ( SELECT n FROM names - LEFT JOIN Authors on Authors.name = n - LEFT JOIN Aliases on Aliases.alias = n - WHERE Authors.name IS NULL AND Aliases.alias IS NULL + LEFT JOIN ( + SELECT * FROM Authors + UNION ALL + SELECT * FROM Aliases + ) AS existing ON existing.name = names.n + WHERE existing.name IS NULL ) INSERT INTO Authors(name) SELECT n FROM filtered_names @@ -293,10 +299,8 @@ func (p Put) authors() error { matched_authors AS ( SELECT Authors.id AS author_id FROM Authors - LEFT JOIN Aliases - ON Authors.id = Aliases.authorId - JOIN names - ON Authors.name = n OR Aliases.alias = n + LEFT JOIN Aliases ON Authors.id = Aliases.authorId + JOIN names ON Authors.name = n OR Aliases.alias = n ) INSERT INTO DocumentAuthors(docId, authorId) SELECT %d, author_id FROM matched_authors @@ -344,9 +348,12 @@ func (p PutMany) authors(ctx context.Context) error { WITH new_names AS ( SELECT temp.names.name FROM temp.names - LEFT JOIN Authors on Authors.name = temp.names.name - LEFT JOIN Aliases on Aliases.alias = temp.names.name - WHERE Authors.name IS NULL AND Aliases.alias IS NULL + LEFT JOIN ( + SELECT * FROM Authors + UNION ALL + SELECT * FROM Aliases + ) AS existing ON existing.name = temp.names.name + WHERE existing.name IS NULL ) INSERT INTO Authors(name) SELECT name FROM new_names @@ -359,10 +366,13 @@ func (p PutMany) authors(ctx context.Context) error { _, err = tx.Exec(` CREATE TEMPORARY TABLE name_ids AS - SELECT names.name AS name, COALESCE(Authors.id, Aliases.authorId) AS authorId + SELECT names.name AS name, existing.id AS authorId FROM temp.names - LEFT JOIN Authors ON temp.names.name = Authors.name - LEFT JOIN Aliases ON temp.names.name = Aliases.alias + LEFT JOIN ( + SELECT * FROM Authors + UNION ALL + SELECT * FROM Aliases + ) AS existing ON existing.name = temp.names.name `) if err != nil { tx.Rollback() @@ -394,9 +404,5 @@ func (p PutMany) authors(ctx context.Context) error { } } - if err := tx.Commit(); err != nil { - return err - } - - return nil + return tx.Commit() } diff --git a/pkg/data/put_test.go b/pkg/data/put_test.go index a97187b..8188b21 100644 --- a/pkg/data/put_test.go +++ b/pkg/data/put_test.go @@ -41,12 +41,8 @@ func TestPut_Insert(t *testing.T) { db := tt.newDb(t) defer db.Close() - p, err := data.NewPut(ctx, db, tt.doc) - if err != nil { - t.Fatalf("could not construct receiver type: %v", err) - } - - gotErr := p.Insert() + p := data.NewPut(db, tt.doc) + gotErr := p.Insert(t.Context()) if !errors.Is(gotErr, tt.wantErr) { t.Fatalf("Unexpected error on Insert():, want %v got %v", tt.wantErr, gotErr) } else if gotErr != nil { @@ -105,12 +101,12 @@ func TestPutMany_Insert(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { db := tt.newDb(t) - p, err := data.NewPutMany(db, tt.documents) + p, err := data.NewPutMany(t.Context(), db, tt.documents) if err != nil { t.Fatalf("could not construct receiver type: %v", err) } - gotErr := p.Insert(t.Context()) + gotErr := p.Insert() if !errors.Is(gotErr, tt.wantErr) { t.Fatalf("Recieved unexpected error, got %v want %v", gotErr, tt.wantErr) } else if err != nil { diff --git a/pkg/data/update.go b/pkg/data/update.go new file mode 100644 index 0000000..a606256 --- /dev/null +++ b/pkg/data/update.go @@ -0,0 +1,478 @@ +package data + +import ( + "context" + "database/sql" + "fmt" + + "github.com/jpappel/atlas/pkg/index" +) + +type Update struct { + Id int64 + Doc index.Document + db *sql.DB + tx *sql.Tx +} + +type UpdateMany struct { + Docs map[int64]*index.Document + pathDocs map[string]*index.Document + tx *sql.Tx + db *sql.DB +} + +func NewUpdate(ctx context.Context, db *sql.DB, doc index.Document) Update { + return Update{Doc: doc, db: db} +} + +func NewUpdateMany(db *sql.DB, docs map[string]*index.Document) UpdateMany { + return UpdateMany{pathDocs: docs, db: db} +} + +// Replace a document if its filetime is newer than the one in the database. +func (u *Update) Update(ctx context.Context) error { + var err error + u.tx, err = u.db.BeginTx(ctx, nil) + if err != nil { + return err + } + + isUpdate, err := u.document() + if !isUpdate || err != nil { + u.tx.Rollback() + return err + } + + if err := u.tags(); err != nil { + u.tx.Rollback() + return err + } + + if err := u.links(); err != nil { + u.tx.Rollback() + return err + } + + if err := u.authors(); err != nil { + u.tx.Rollback() + return err + } + + return u.tx.Commit() +} + +func (u *UpdateMany) Update(ctx context.Context) error { + var err error + u.tx, err = u.db.BeginTx(ctx, nil) + if err != nil { + return err + } + + hasUpdates, err := u.documents() + if !hasUpdates || err != nil { + u.tx.Rollback() + return err + } + + if err := u.tags(); err != nil { + u.tx.Rollback() + return err + } + + if err := u.links(); err != nil { + u.tx.Rollback() + return err + } + + if err := u.authors(); err != nil { + u.tx.Rollback() + return err + } + + return u.tx.Commit() +} + +// set document fields in db, returns if an update has occured +func (u *Update) document() (bool, error) { + filetime := sql.NullInt64{Int64: u.Doc.FileTime.Unix(), Valid: !u.Doc.FileTime.IsZero()} + row := u.tx.QueryRow(` + SELECT TRUE + FROM Documents + WHERE path = ? AND COALESCE(fileTime,0) < COALESCE(?,0) + `, u.Doc.Path, filetime) + + isUpdate := false + if err := row.Scan(&isUpdate); err == sql.ErrNoRows { + isUpdate = true + } else if err != nil { + return false, err + } + + if !isUpdate { + return false, nil + } + + title := sql.NullString{String: u.Doc.Title, Valid: u.Doc.Title != ""} + date := sql.NullInt64{Int64: u.Doc.Date.Unix(), Valid: !u.Doc.Date.IsZero()} + meta := sql.NullString{String: u.Doc.OtherMeta, Valid: u.Doc.OtherMeta != ""} + + _, err := u.tx.Exec(` + INSERT INTO Documents(path, title, date, fileTime, meta) + VALUES (?,?,?,?,?) + ON CONFLICT(path) + DO UPDATE SET + title=excluded.title, + date=excluded.date, + fileTime=excluded.fileTime, + meta=excluded.meta + `, u.Doc.Path, title, date, filetime, meta) + if err != nil { + return true, err + } + + row = u.tx.QueryRow(`SELECT id FROM Documents WHERE path = ?`, u.Doc.Path) + if err := row.Scan(&u.Id); err != nil { + return true, err + } + + return true, nil +} + +func (u *UpdateMany) documents() (bool, error) { + _, err := u.tx.Exec(` + CREATE TEMPORARY TABLE updateDocs ( + path TEXT UNIQUE NOT NULL, + title TEXT, + date INT, + fileTime INT, + meta BLOB + )`) + if err != nil { + return false, err + } + defer u.tx.Exec("DROP TABLE temp.updateDocs") + + tempInsertStmt, err := u.tx.Prepare("INSERT INTO temp.updateDocs VALUES (?,?,?,?,?)") + if err != nil { + return false, err + } + defer tempInsertStmt.Close() + + for path, doc := range u.pathDocs { + filetime := sql.NullInt64{ + Int64: doc.FileTime.Unix(), + Valid: !doc.FileTime.IsZero(), + } + title := sql.NullString{ + String: doc.Title, + Valid: doc.Title != "", + } + date := sql.NullInt64{ + Int64: doc.Date.Unix(), + Valid: !doc.Date.IsZero(), + } + meta := sql.NullString{ + String: doc.OtherMeta, + Valid: doc.OtherMeta != "", + } + if _, err := tempInsertStmt.Exec(path, title, date, filetime, meta); err != nil { + return false, err + } + } + + _, err = u.tx.Exec(` + DELETE FROM Documents + WHERE Documents.path NOT IN ( + SELECT path FROM temp.updateDocs + )`) + if err != nil { + return false, err + } + + _, err = u.tx.Exec(` + INSERT INTO Documents (path, title, date, fileTime, meta) + SELECT * FROM updateDocs WHERE TRUE + ON CONFLICT(path) DO UPDATE SET + title=excluded.title, + date=excluded.date, + fileTime=excluded.fileTime, + meta=excluded.meta + WHERE excluded.fileTime > Documents.fileTime + `) + if err != nil { + return false, err + } + + updates, err := u.tx.Query(` + SELECT id, Documents.path + FROM updateDocs + JOIN Documents ON updateDocs.path = Documents.path + WHERE Documents.fileTime = updateDocs.fileTime + `) + if err != nil { + return false, err + } + defer updates.Close() + + u.Docs = make(map[int64]*index.Document) + var id int64 + var path string + hasUpdate := false + for updates.Next() { + if err := updates.Scan(&id, &path); err != nil { + return false, err + } + u.Docs[id] = u.pathDocs[path] + hasUpdate = true + } + + return hasUpdate, nil +} + +func (u Update) tags() error { + if _, err := u.tx.Exec(` + DELETE FROM DocumentTags + WHERE docId = ? + `, u.Id); err != nil { + return err + } + + query, args := BatchQuery( + "INSERT OR IGNORE INTO Tags (name) VALUES", + "", "(?)", ",", "", + len(u.Doc.Tags), u.Doc.Tags, + ) + if _, err := u.tx.Exec(query, args...); err != nil { + return err + } + + preqQuery := fmt.Sprintf(` + INSERT INTO DocumentTags + SELECT %d, Tags.id + FROM Tags + WHERE name in + `, u.Id) + query, args = BatchQuery( + preqQuery, "(", "?", ",", ")", + len(u.Doc.Tags), u.Doc.Tags, + ) + + if _, err := u.tx.Exec(query, args...); err != nil { + return err + } + + return nil +} + +func (u UpdateMany) tags() error { + // PERF: consider batching + deleteStmt, err := u.tx.Prepare("DELETE FROM DocumentTags WHERE docId = ?") + if err != nil { + return err + } + defer deleteStmt.Close() + + for id := range u.Docs { + if _, err := deleteStmt.Exec(id); err != nil { + return err + } + } + + for id, doc := range u.Docs { + if len(doc.Tags) == 0 { + continue + } + insertTag, args := BatchQuery( + "INSERT OR IGNORE INTO Tags (name) VALUES", + "", "(?)", ",", "", + len(doc.Tags), doc.Tags, + ) + _, err = u.tx.Exec(insertTag, args...) + if err != nil { + return err + } + + preqQuery := fmt.Sprintf(` + INSERT INTO DocumentTags + SELECT %d, Tags.id + FROM Tags + WHERE name in + `, id) + setDocTags, _ := BatchQuery( + preqQuery, "(", "?", ",", ")", + len(doc.Tags), doc.Tags, + ) + if _, err := u.tx.Exec(setDocTags, args...); err != nil { + return err + } + } + + return nil +} + +func (u Update) links() error { + if _, err := u.tx.Exec(` + DELETE FROM Links + WHERE docId = ? + `, u.Id); err != nil { + return err + } + + query, args := BatchQuery( + "INSERT INTO Links VALUES ", + "", fmt.Sprintf("(%d,?)", u.Id), ",", "", + len(u.Doc.Links), u.Doc.Links, + ) + if _, err := u.tx.Exec(query, args...); err != nil { + return err + } + + return nil +} + +func (u UpdateMany) links() error { + deleteStmt, err := u.tx.Prepare("DELETE FROM Links WHERE docId = ?") + if err != nil { + return err + } + defer deleteStmt.Close() + insertStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Links VALUES (?,?)") + if err != nil { + return err + } + defer insertStmt.Close() + + for id, doc := range u.Docs { + if _, err := deleteStmt.Exec(id); err != nil { + return err + } + + for _, link := range doc.Links { + if _, err := insertStmt.Exec(id, link); err != nil { + return err + } + } + } + + return nil +} + +func (u Update) authors() error { + if _, err := u.tx.Exec(` + DELETE FROM DocumentAuthors + WHERE docId = ? + `, u.Id); err != nil { + return err + } + + tempTable, args := BatchQuery(` + CREATE TEMPORARY TABLE new_names AS + SELECT column1 AS name + FROM ( VALUES `, + "", "(?)", ",", ")", + len(u.Doc.Authors), u.Doc.Authors, + ) + _, err := u.tx.Exec(tempTable, args...) + if err != nil { + return err + } + defer u.tx.Exec("DROP TABLE temp.new_names") + + _, err = u.tx.Exec(` + INSERT OR IGNORE INTO Authors(name) + SELECT * FROM new_names + `) + if err != nil { + return err + } + _, err = u.tx.Exec(` + INSERT OR IGNORE INTO Aliases(alias) + SELECT * FROM new_names + `) + if err != nil { + return err + } + + docAuthQuery := fmt.Sprintf(` + INSERT INTO DocumentAuthors + SELECT %d, existing.id + FROM new_names + LEFT JOIN ( + SELECT * FROM Authors + UNION ALL + SELECT * FROM Aliases + ) AS existing ON existing.name = new_names.name + `, u.Id) + if _, err := u.tx.Exec(docAuthQuery); err != nil { + return err + } + + return nil +} + +func (u UpdateMany) authors() error { + deleteStmt, err := u.tx.Prepare("DELETE FROM DocumentAuthors WHERE docId = ?") + if err != nil { + return err + } + defer deleteStmt.Close() + + _, err = u.tx.Exec(` + CREATE TEMPORARY TABLE new_names ( + docId INTEGER NOT NULL, + name TEXT NOT NULL, + UNIQUE(docId, name) + )`) + if err != nil { + return err + } + defer u.tx.Exec("DROP TABLE temp.new_names") + + insertTempTable, err := u.tx.Prepare("INSERT INTO temp.new_names VALUES (?,?)") + if err != nil { + return err + } + defer insertTempTable.Close() + + for id, doc := range u.Docs { + if _, err := deleteStmt.Exec(id); err != nil { + return err + } + + for _, author := range doc.Authors { + if _, err := insertTempTable.Exec(id, author); err != nil { + return err + } + } + } + + _, err = u.tx.Exec(` + INSERT OR IGNORE INTO Authors(name) + SELECT name FROM new_names + `) + if err != nil { + return err + } + + _, err = u.tx.Exec(` + INSERT OR IGNORE INTO Aliases(alias) + SELECT name FROM new_names + `) + if err != nil { + return err + } + + _, err = u.tx.Exec(` + INSERT INTO DocumentAuthors + SELECT docId, existing.id + FROM new_names + LEFT JOIN ( + SELECT * FROM Authors + UNION ALL + SELECT * FROM Aliases + ) AS existing ON existing.name = new_names.name + `) + + return err +} diff --git a/pkg/data/update_test.go b/pkg/data/update_test.go new file mode 100644 index 0000000..d6ef578 --- /dev/null +++ b/pkg/data/update_test.go @@ -0,0 +1,266 @@ +package data_test + +import ( + "context" + "database/sql" + "errors" + "maps" + "testing" + "time" + + "github.com/jpappel/atlas/pkg/data" + "github.com/jpappel/atlas/pkg/index" +) + +func TestUpdate_Update(t *testing.T) { + tests := []struct { + name string + newDb func(t *testing.T) *sql.DB + doc index.Document + wantErr error + }{ + { + "update on empty", + func(t *testing.T) *sql.DB { + t.Helper() + return data.NewMemDB() + }, + index.Document{ + Path: "/file", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, + }, + nil, + }, + { + "update on existing", + func(t *testing.T) *sql.DB { + t.Helper() + db := data.NewMemDB() + p := data.NewPut(db, index.Document{ + Path: "/file", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, + }) + + if err := p.Insert(t.Context()); err != nil { + panic(err) + } + + return db + }, + index.Document{ + Path: "/file", + Title: "A file with a new title", + Date: time.Unix(1, 0), + FileTime: time.Unix(3, 0), + Authors: []string{"jp", "pj"}, + Tags: []string{"foo", "bar", "oof"}, + Links: []string{"link_4"}, + }, + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + db := tt.newDb(t) + defer db.Close() + + u := data.NewUpdate(context.Background(), db, tt.doc) + gotErr := u.Update(t.Context()) + if !errors.Is(gotErr, tt.wantErr) { + t.Fatalf("recieved unexpected error: got %v want %v", gotErr, tt.wantErr) + } else if gotErr != nil { + return + } + + f := data.Fill{Path: tt.doc.Path, Db: db} + gotDoc, err := f.Get(t.Context()) + if err != nil { + t.Fatal("Error while retrieving document for comparison:", err) + } + + if !gotDoc.Equal(tt.doc) { + t.Errorf("Retrieved doc is not stored doc!\nrecv: %+v\nsent: %+v", gotDoc, tt.doc) + } + }) + } +} + +func TestUpdateMany_Update(t *testing.T) { + tests := []struct { + name string + newDb func(t *testing.T) *sql.DB + docs map[string]*index.Document + wantErr error + }{ + { + "additions", + func(t *testing.T) *sql.DB { + return data.NewMemDB() + }, + map[string]*index.Document{ + "/afile": { + Path: "/afile", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, + }, + "/bfile": { + Path: "/bfile", + Title: "B file", + Date: time.Unix(3, 0), + FileTime: time.Unix(4, 0), + Authors: []string{"pj"}, + Tags: []string{"foo", "gar"}, + Links: []string{"link_4"}, + }, + }, + nil, + }, + { + "delete", + func(t *testing.T) *sql.DB { + db := data.NewMemDB() + + docs := map[string]*index.Document{ + "/afile": { + Path: "/afile", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, + }, + "/bfile": { + Path: "/bfile", + Title: "B file", + Date: time.Unix(3, 0), + FileTime: time.Unix(4, 0), + Authors: []string{"pj"}, + Tags: []string{"foo", "gar"}, + Links: []string{"link_4"}, + }, + } + p, err := data.NewPutMany(t.Context(), db, docs) + if err != nil { + panic(err) + } + if err := p.Insert(); err != nil { + panic(err) + } + + return db + }, + map[string]*index.Document{ + "/afile": { + Path: "/afile", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, + }, + }, + nil, + }, + { + "update", + func(t *testing.T) *sql.DB { + db := data.NewMemDB() + + docs := map[string]*index.Document{ + "/afile": { + Path: "/afile", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(2, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "oof", "baz"}, + Links: []string{"link_1", "link_2", "link_3"}, + }, + "/bfile": { + Path: "/bfile", + Title: "B file", + Date: time.Unix(3, 0), + FileTime: time.Unix(4, 0), + Authors: []string{"pj"}, + Tags: []string{"foo", "gar"}, + Links: []string{"link_4"}, + }, + } + p, err := data.NewPutMany(t.Context(), db, docs) + if err != nil { + panic(err) + } + if err := p.Insert(); err != nil { + panic(err) + } + + return db + }, + map[string]*index.Document{ + "/afile": { + Path: "/afile", + Title: "A file", + Date: time.Unix(1, 0), + FileTime: time.Unix(10, 0), + Authors: []string{"jp"}, + Tags: []string{"foo", "bar", "bing", "baz"}, + Links: []string{"link_1", "link_3"}, + }, + "/bfile": { + Path: "/bfile", + Title: "B file", + Date: time.Unix(3, 0), + FileTime: time.Unix(5, 0), + Authors: []string{}, + Tags: []string{}, + Links: []string{}, + }, + }, + nil, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + db := tt.newDb(t) + defer db.Close() + + u := data.NewUpdateMany(db, tt.docs) + gotErr := u.Update(t.Context()) + if !errors.Is(gotErr, tt.wantErr) { + t.Fatalf("recieved unexpected error: got %v want %v", gotErr, tt.wantErr) + } else if gotErr != nil { + return + } + + f := data.FillMany{Db: db} + docs, err := f.Get(t.Context()) + if err != nil { + t.Fatal("Error while retrieving documents for comparison:", err) + } + + if !maps.EqualFunc(docs, tt.docs, func(a, b *index.Document) bool { + return a.Equal(*b) + }) { + t.Error("Got different docs than expected") + t.Logf("Got:\n%+v\n", docs) + t.Logf("Want:\n%+v\n", tt.docs) + } + }) + } +} |
