diff options
| author | JP Appel <jeanpierre.appel01@gmail.com> | 2025-08-10 02:30:00 -0400 |
|---|---|---|
| committer | JP Appel <jeanpierre.appel01@gmail.com> | 2025-08-10 02:42:41 -0400 |
| commit | 5d0b36cf87ee94c690d11c0beab48f4dadc6fc52 (patch) | |
| tree | 8459c2e484c4f91019ea4b4999de3516dac6488f /pkg/data/put.go | |
| parent | 7b798789897bd07b03510e073f41e8deadbc460c (diff) | |
Change db schema; remove aliases, add fts5 integration
Greatly simplify db schema by removing alias functionality.
Create fts5 tables for text search over paths, headings, titles, meta,
authors, and links.
Diffstat (limited to 'pkg/data/put.go')
| -rw-r--r-- | pkg/data/put.go | 142 |
1 files changed, 44 insertions, 98 deletions
diff --git a/pkg/data/put.go b/pkg/data/put.go index 4a9a039..4830dbe 100644 --- a/pkg/data/put.go +++ b/pkg/data/put.go @@ -280,135 +280,81 @@ func (p Put) authors() error { return nil } - // PERF: consider using temp table instead of cte - namesCTE, args := BatchQuery("WITH names(n) AS", - "( VALUES ", "(?)", ",", "),", len(p.Doc.Authors), p.Doc.Authors) - - newAuthorsQuery := namesCTE + ` - filtered_names AS ( - SELECT n - FROM names - LEFT JOIN ( - SELECT * FROM Authors - UNION ALL - SELECT * FROM Aliases - ) AS existing ON existing.name = names.n - WHERE existing.name IS NULL - ) - INSERT INTO Authors(name) - SELECT n FROM filtered_names - ` - if _, err := p.tx.Exec(newAuthorsQuery, args...); err != nil { - return err - } - - docAuthorsQuery := namesCTE + fmt.Sprintf(` - matched_authors AS ( - SELECT Authors.id AS author_id - FROM Authors - LEFT JOIN Aliases ON Authors.id = Aliases.authorId - JOIN names ON Authors.name = n OR Aliases.alias = n - ) - INSERT INTO DocumentAuthors(docId, authorId) - SELECT %d, author_id FROM matched_authors - `, p.Id) - if _, err := p.tx.Exec(docAuthorsQuery, args...); err != nil { - return err - } - - return nil -} - -func (p PutMany) authors(ctx context.Context) error { - tx, err := p.db.BeginTx(ctx, nil) + authStmt, err := p.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") if err != nil { return err } + defer authStmt.Close() - _, err = tx.Exec("CREATE TEMPORARY TABLE names (name TEXT UNIQUE NOT NULL)") + idStmt, err := p.tx.Prepare("SELECT id FROM Authors WHERE name = ?") if err != nil { - tx.Rollback() return err } - defer p.db.Exec("DROP TABLE IF EXISTS temp.names") + defer idStmt.Close() - nameStmt, err := tx.Prepare("INSERT OR IGNORE INTO temp.names VALUES (?)") + docAuthStmt, err := p.tx.Prepare( + fmt.Sprintf("INSERT INTO DocumentAuthors(docId,authorId) VALUES (%d,?)", p.Id), + ) if err != nil { return err } - defer nameStmt.Close() + defer docAuthStmt.Close() - txNameStmt := tx.StmtContext(ctx, nameStmt) - for _, doc := range p.Docs { - if len(doc.Authors) == 0 { - continue + // sqlite is fast, and i'm too lazy to batch this + var authId int64 + for _, author := range p.Doc.Authors { + if _, err := authStmt.Exec(author); err != nil { + return err } - for _, name := range doc.Authors { - if _, err := txNameStmt.Exec(name); err != nil { - tx.Rollback() - return err - } + if err := idStmt.QueryRow(author).Scan(&authId); err != nil { + return err + } + if _, err := docAuthStmt.Exec(authId); err != nil { + return err } } - newAuthorsQuery := ` - WITH new_names AS ( - SELECT temp.names.name - FROM temp.names - LEFT JOIN ( - SELECT * FROM Authors - UNION ALL - SELECT * FROM Aliases - ) AS existing ON existing.name = temp.names.name - WHERE existing.name IS NULL - ) - INSERT INTO Authors(name) - SELECT name FROM new_names - ` + return nil +} - if _, err := tx.Exec(newAuthorsQuery); err != nil { - tx.Rollback() +func (p PutMany) authors(ctx context.Context) error { + tx, err := p.db.BeginTx(ctx, nil) + if err != nil { return err } - _, err = tx.Exec(` - CREATE TEMPORARY TABLE name_ids AS - SELECT names.name AS name, existing.id AS authorId - FROM temp.names - LEFT JOIN ( - SELECT * FROM Authors - UNION ALL - SELECT * FROM Aliases - ) AS existing ON existing.name = temp.names.name - `) + authStmt, err := tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") if err != nil { - tx.Rollback() return err } - defer p.db.Exec("DROP TABLE IF EXISTS temp.name_ids") + defer authStmt.Close() - docAuthorsStmt, err := tx.Prepare(` - INSERT INTO DocumentAuthors (docId, authorId) - SELECT ?, authorId - FROM temp.name_ids - WHERE name = ? - `) + idStmt, err := tx.Prepare("SELECT id FROM Authors WHERE name = ?") if err != nil { - tx.Rollback() return err } - defer docAuthorsStmt.Close() + defer idStmt.Close() - for id, doc := range p.Docs { - if len(doc.Authors) == 0 { - continue - } - for _, name := range doc.Authors { - if _, err := tx.Stmt(docAuthorsStmt).Exec(id, name); err != nil { - tx.Rollback() + docAuthStmt, err := tx.Prepare("INSERT INTO DocumentAuthors(docId,authorId) VALUES (?,?)") + if err != nil { + return err + } + defer docAuthStmt.Close() + + var authId int64 + for docId, doc := range p.Docs { + for _, author := range doc.Authors { + if _, err := authStmt.Exec(author); err != nil { + return err + } + if err := idStmt.QueryRow(author).Scan(&authId); err != nil { + return err + } + if _, err := docAuthStmt.Exec(docId, authId); err != nil { return err } } + } return tx.Commit() |
