aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/data/put.go
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/data/put.go')
-rw-r--r--pkg/data/put.go142
1 files changed, 44 insertions, 98 deletions
diff --git a/pkg/data/put.go b/pkg/data/put.go
index 4a9a039..4830dbe 100644
--- a/pkg/data/put.go
+++ b/pkg/data/put.go
@@ -280,135 +280,81 @@ func (p Put) authors() error {
return nil
}
- // PERF: consider using temp table instead of cte
- namesCTE, args := BatchQuery("WITH names(n) AS",
- "( VALUES ", "(?)", ",", "),", len(p.Doc.Authors), p.Doc.Authors)
-
- newAuthorsQuery := namesCTE + `
- filtered_names AS (
- SELECT n
- FROM names
- LEFT JOIN (
- SELECT * FROM Authors
- UNION ALL
- SELECT * FROM Aliases
- ) AS existing ON existing.name = names.n
- WHERE existing.name IS NULL
- )
- INSERT INTO Authors(name)
- SELECT n FROM filtered_names
- `
- if _, err := p.tx.Exec(newAuthorsQuery, args...); err != nil {
- return err
- }
-
- docAuthorsQuery := namesCTE + fmt.Sprintf(`
- matched_authors AS (
- SELECT Authors.id AS author_id
- FROM Authors
- LEFT JOIN Aliases ON Authors.id = Aliases.authorId
- JOIN names ON Authors.name = n OR Aliases.alias = n
- )
- INSERT INTO DocumentAuthors(docId, authorId)
- SELECT %d, author_id FROM matched_authors
- `, p.Id)
- if _, err := p.tx.Exec(docAuthorsQuery, args...); err != nil {
- return err
- }
-
- return nil
-}
-
-func (p PutMany) authors(ctx context.Context) error {
- tx, err := p.db.BeginTx(ctx, nil)
+ authStmt, err := p.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)")
if err != nil {
return err
}
+ defer authStmt.Close()
- _, err = tx.Exec("CREATE TEMPORARY TABLE names (name TEXT UNIQUE NOT NULL)")
+ idStmt, err := p.tx.Prepare("SELECT id FROM Authors WHERE name = ?")
if err != nil {
- tx.Rollback()
return err
}
- defer p.db.Exec("DROP TABLE IF EXISTS temp.names")
+ defer idStmt.Close()
- nameStmt, err := tx.Prepare("INSERT OR IGNORE INTO temp.names VALUES (?)")
+ docAuthStmt, err := p.tx.Prepare(
+ fmt.Sprintf("INSERT INTO DocumentAuthors(docId,authorId) VALUES (%d,?)", p.Id),
+ )
if err != nil {
return err
}
- defer nameStmt.Close()
+ defer docAuthStmt.Close()
- txNameStmt := tx.StmtContext(ctx, nameStmt)
- for _, doc := range p.Docs {
- if len(doc.Authors) == 0 {
- continue
+ // sqlite is fast, and i'm too lazy to batch this
+ var authId int64
+ for _, author := range p.Doc.Authors {
+ if _, err := authStmt.Exec(author); err != nil {
+ return err
}
- for _, name := range doc.Authors {
- if _, err := txNameStmt.Exec(name); err != nil {
- tx.Rollback()
- return err
- }
+ if err := idStmt.QueryRow(author).Scan(&authId); err != nil {
+ return err
+ }
+ if _, err := docAuthStmt.Exec(authId); err != nil {
+ return err
}
}
- newAuthorsQuery := `
- WITH new_names AS (
- SELECT temp.names.name
- FROM temp.names
- LEFT JOIN (
- SELECT * FROM Authors
- UNION ALL
- SELECT * FROM Aliases
- ) AS existing ON existing.name = temp.names.name
- WHERE existing.name IS NULL
- )
- INSERT INTO Authors(name)
- SELECT name FROM new_names
- `
+ return nil
+}
- if _, err := tx.Exec(newAuthorsQuery); err != nil {
- tx.Rollback()
+func (p PutMany) authors(ctx context.Context) error {
+ tx, err := p.db.BeginTx(ctx, nil)
+ if err != nil {
return err
}
- _, err = tx.Exec(`
- CREATE TEMPORARY TABLE name_ids AS
- SELECT names.name AS name, existing.id AS authorId
- FROM temp.names
- LEFT JOIN (
- SELECT * FROM Authors
- UNION ALL
- SELECT * FROM Aliases
- ) AS existing ON existing.name = temp.names.name
- `)
+ authStmt, err := tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)")
if err != nil {
- tx.Rollback()
return err
}
- defer p.db.Exec("DROP TABLE IF EXISTS temp.name_ids")
+ defer authStmt.Close()
- docAuthorsStmt, err := tx.Prepare(`
- INSERT INTO DocumentAuthors (docId, authorId)
- SELECT ?, authorId
- FROM temp.name_ids
- WHERE name = ?
- `)
+ idStmt, err := tx.Prepare("SELECT id FROM Authors WHERE name = ?")
if err != nil {
- tx.Rollback()
return err
}
- defer docAuthorsStmt.Close()
+ defer idStmt.Close()
- for id, doc := range p.Docs {
- if len(doc.Authors) == 0 {
- continue
- }
- for _, name := range doc.Authors {
- if _, err := tx.Stmt(docAuthorsStmt).Exec(id, name); err != nil {
- tx.Rollback()
+ docAuthStmt, err := tx.Prepare("INSERT INTO DocumentAuthors(docId,authorId) VALUES (?,?)")
+ if err != nil {
+ return err
+ }
+ defer docAuthStmt.Close()
+
+ var authId int64
+ for docId, doc := range p.Docs {
+ for _, author := range doc.Authors {
+ if _, err := authStmt.Exec(author); err != nil {
+ return err
+ }
+ if err := idStmt.QueryRow(author).Scan(&authId); err != nil {
+ return err
+ }
+ if _, err := docAuthStmt.Exec(docId, authId); err != nil {
return err
}
}
+
}
return tx.Commit()