aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/data
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/data')
-rw-r--r--pkg/data/db.go187
-rw-r--r--pkg/data/get.go2
-rw-r--r--pkg/data/get_test.go14
-rw-r--r--pkg/data/put.go142
-rw-r--r--pkg/data/update.go108
-rw-r--r--pkg/data/update_test.go50
6 files changed, 287 insertions, 216 deletions
diff --git a/pkg/data/db.go b/pkg/data/db.go
index bf707b4..58244ac 100644
--- a/pkg/data/db.go
+++ b/pkg/data/db.go
@@ -126,17 +126,6 @@ func createSchema(db *sql.DB, version string) error {
}
_, err = tx.Exec(`
- CREATE TABLE IF NOT EXISTS Aliases(
- authorId INT NOT NULL,
- alias TEXT UNIQUE NOT NULL,
- FOREIGN KEY (authorId) REFERENCES Authors(id) ON DELETE CASCADE
- )`)
- if err != nil {
- tx.Rollback()
- return err
- }
-
- _, err = tx.Exec(`
CREATE TABLE IF NOT EXISTS Tags(
id INTEGER PRIMARY KEY,
name TEXT UNIQUE NOT NULL
@@ -201,37 +190,87 @@ func createSchema(db *sql.DB, version string) error {
return err
}
- _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_aliases_alias ON Aliases(alias)")
+ _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_links_link ON Links(link)")
if err != nil {
tx.Rollback()
return err
}
- _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_aliases_authorId ON Aliases(authorId)")
+ _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doctags_tagid ON DocumentTags (tagId)")
if err != nil {
tx.Rollback()
return err
}
- _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_links_link ON Links(link)")
+ _, err = tx.Exec(`
+ CREATE VIRTUAL TABLE IF NOT EXISTS Documents_fts
+ USING fts5 (
+ path, headings, title, meta, content=Documents, content_rowid=id
+ )
+ `)
if err != nil {
tx.Rollback()
return err
}
- _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doctags_tagid ON DocumentTags (tagId)")
+ _, err = tx.Exec(`
+ CREATE VIRTUAL TABLE IF NOT EXISTS Authors_fts
+ USING fts5 (
+ name, content=Authors, content_rowid=id
+ )
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE VIRTUAL TABLE IF NOT EXISTS Tags_fts
+ USING fts5 (
+ name, content=Tags, content_rowid=id
+ )
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ // FIXME: doesn't set new.id
+ _, err = tx.Exec(`
+ CREATE TRIGGER IF NOT EXISTS trig_ai_authors
+ AFTER INSERT ON Authors
+ BEGIN
+ INSERT INTO Authors_fts(rowid, name)
+ VALUES (new.id, new.name);
+ END
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TRIGGER IF NOT EXISTS trig_ad_authors
+ AFTER DELETE ON Authors
+ BEGIN
+ INSERT INTO Authors_fts(Authors_fts, rowid, name)
+ VALUES ('delete', old.id, old.name);
+ END
+ `)
if err != nil {
tx.Rollback()
return err
}
+ // FIXME: doesn't set new.id
_, err = tx.Exec(`
- CREATE TRIGGER IF NOT EXISTS trig_new_author
- BEFORE INSERT ON Authors
+ CREATE TRIGGER IF NOT EXISTS trig_au_authors
+ AFTER UPDATE ON Authors
BEGIN
- SELECT CASE WHEN NEW.name IN (SELECT alias FROM Aliases) THEN
- RAISE(IGNORE)
- END;
+ INSERT INTO Authors_fts(Authors_fts, rowid, name)
+ VALUES ('delete', old.id, old.name);
+ INSERT INTO Authors_fts(rowid, name)
+ VALUES (new.id, new.name);
END
`)
if err != nil {
@@ -239,13 +278,13 @@ func createSchema(db *sql.DB, version string) error {
return err
}
+ // FIXME: doesn't set new.id
_, err = tx.Exec(`
- CREATE TRIGGER IF NOT EXISTS trig_new_alias
- BEFORE INSERT ON Aliases
+ CREATE TRIGGER IF NOT EXISTS trig_ai_tags
+ AFTER INSERT ON Tags
BEGIN
- SELECT CASE WHEN NEW.alias IN (SELECT name FROM Authors) THEN
- RAISE(IGNORE)
- END;
+ INSERT INTO Tags_fts(rowid, name)
+ VALUES (new.id, new.name);
END
`)
if err != nil {
@@ -254,22 +293,94 @@ func createSchema(db *sql.DB, version string) error {
}
_, err = tx.Exec(`
+ CREATE TRIGGER IF NOT EXISTS trig_ad_tags
+ AFTER DELETE ON Tags
+ BEGIN
+ INSERT INTO Tags_fts(Tags_fts, rowid, name)
+ VALUES ('delete', old.id, old.name);
+ END
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ // FIXME: doesn't set new.id
+ _, err = tx.Exec(`
+ CREATE TRIGGER IF NOT EXISTS trig_au_tags
+ AFTER UPDATE ON Tags
+ BEGIN
+ INSERT INTO Tags_fts(Tags_fts, rowid, name)
+ VALUES ('delete', old.id, old.name);
+ INSERT INTO Tags_fts(rowid, name)
+ VALUES (new.id, new.name);
+ END
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ // FIXME: doesn't set new.id
+ _, err = tx.Exec(`
+ CREATE TRIGGER IF NOT EXISTS trig_ai_doc
+ AFTER INSERT ON Documents
+ BEGIN
+ INSERT INTO Documents_fts(rowid, path, headings, title, meta)
+ VALUES (new.id, new.path, new.headings, new.title, new.meta);
+ END
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TRIGGER IF NOT EXISTS trig_ad_doc
+ AFTER DELETE ON Documents
+ BEGIN
+ INSERT INTO Documents_fts(Documents_fts, rowid, path, headings, title, meta)
+ VALUES ('delete', old.id, old.path, old.headings, old.title, old.meta);
+ END
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ // FIXME: doesn't set new.id
+ _, err = tx.Exec(`
+ CREATE TRIGGER IF NOT EXISTS trig_au_doc
+ AFTER UPDATE ON Documents
+ BEGIN
+ INSERT INTO Documents_fts(Documents_fts, rowid, path, headings, title, meta)
+ VALUES ('delete', old.id, old.path, old.headings, old.title, old.meta);
+ INSERT INTO Documents_fts(rowid, path, headings, title, meta)
+ VALUES (new.id, new.path, new.headings, new.title, new.meta);
+ END
+ `)
+ if err != nil {
+ return err
+ }
+
+ // TODO: update to use fts tables
+ _, err = tx.Exec(`
CREATE VIEW IF NOT EXISTS Search AS
SELECT
d.id AS docId,
- d.path,
- d.title,
+ d_fts.path,
+ d_fts.title,
d.date,
d.fileTime,
- d.headings,
- d.meta,
- COALESCE(a.name, al.alias) AS author,
+ d_fts.headings,
+ d_fts.meta,
+ a_fts.name AS author,
t.name AS tag,
l.link
FROM Documents d
+ JOIN Documents_fts as d_fts ON d.id = d_fts.rowid
LEFT JOIN DocumentAuthors da ON d.id = da.docId
- LEFT JOIN Authors a ON da.authorId = a.id
- LEFT JOIN Aliases al ON a.id = al.authorId
+ LEFT JOIN Authors_fts a_fts ON da.authorId = a_fts.rowid
LEFT JOIN DocumentTags dt ON d.id = dt.docId
LEFT JOIN Tags t ON dt.tagId = t.id
LEFT JOIN Links l ON d.id = l.docId
@@ -339,7 +450,7 @@ func (q Query) GetDocument(ctx context.Context, path string) (*index.Document, e
return f.Get(ctx)
}
-// Shrink database by removing unused authors, aliases, tags and VACUUM-ing
+// Shrink database by removing unused authors and tags and VACUUM-ing
func (q Query) Tidy() error {
if _, err := q.db.Exec(`
DELETE FROM Authors
@@ -362,6 +473,16 @@ func (q Query) Tidy() error {
return err
}
+ if _, err := q.db.Exec("INSERT INTO Documents_fts(Documents_fts) VALUES('optimize')"); err != nil {
+ return err
+ }
+ if _, err := q.db.Exec("INSERT INTO Authors_fts(Authors_fts) VALUES('optimize')"); err != nil {
+ return err
+ }
+ if _, err := q.db.Exec("INSERT INTO Tags_fts(Tags_fts) VALUES('optimize')"); err != nil {
+ return err
+ }
+
return nil
}
diff --git a/pkg/data/get.go b/pkg/data/get.go
index 7025c3a..3051e74 100644
--- a/pkg/data/get.go
+++ b/pkg/data/get.go
@@ -17,7 +17,7 @@ type Fill struct {
doc *index.Document
}
-// Use to build documents and aliases from a database connection
+// Use to build documents from a database connection
type FillMany struct {
docs map[string]*index.Document
ids map[string]int
diff --git a/pkg/data/get_test.go b/pkg/data/get_test.go
index 4cadc76..aa1e43e 100644
--- a/pkg/data/get_test.go
+++ b/pkg/data/get_test.go
@@ -29,13 +29,6 @@ func singleDoc(t *testing.T) *sql.DB {
}
if _, err := db.Exec(`
- INSERT INTO Aliases (authorId, alias)
- VALUES (1,"pj"), (1,"JP")
- `); err != nil {
- t.Fatal("err inserting aliases:", err)
- }
-
- if _, err := db.Exec(`
INSERT INTO Tags (name)
VALUES ("foo"), ("bar"), ("baz"), ("oof")
`); err != nil {
@@ -86,13 +79,6 @@ func multiDoc(t *testing.T) *sql.DB {
}
if _, err := db.Exec(`
- INSERT INTO Aliases (authorId, alias)
- VALUES (1,"pj"), (1,"JP")
- `); err != nil {
- t.Fatal("err inserting aliases:", err)
- }
-
- if _, err := db.Exec(`
INSERT INTO Tags (name)
VALUES ("foo"), ("bar"), ("baz"), ("oof")
`); err != nil {
diff --git a/pkg/data/put.go b/pkg/data/put.go
index 4a9a039..4830dbe 100644
--- a/pkg/data/put.go
+++ b/pkg/data/put.go
@@ -280,135 +280,81 @@ func (p Put) authors() error {
return nil
}
- // PERF: consider using temp table instead of cte
- namesCTE, args := BatchQuery("WITH names(n) AS",
- "( VALUES ", "(?)", ",", "),", len(p.Doc.Authors), p.Doc.Authors)
-
- newAuthorsQuery := namesCTE + `
- filtered_names AS (
- SELECT n
- FROM names
- LEFT JOIN (
- SELECT * FROM Authors
- UNION ALL
- SELECT * FROM Aliases
- ) AS existing ON existing.name = names.n
- WHERE existing.name IS NULL
- )
- INSERT INTO Authors(name)
- SELECT n FROM filtered_names
- `
- if _, err := p.tx.Exec(newAuthorsQuery, args...); err != nil {
- return err
- }
-
- docAuthorsQuery := namesCTE + fmt.Sprintf(`
- matched_authors AS (
- SELECT Authors.id AS author_id
- FROM Authors
- LEFT JOIN Aliases ON Authors.id = Aliases.authorId
- JOIN names ON Authors.name = n OR Aliases.alias = n
- )
- INSERT INTO DocumentAuthors(docId, authorId)
- SELECT %d, author_id FROM matched_authors
- `, p.Id)
- if _, err := p.tx.Exec(docAuthorsQuery, args...); err != nil {
- return err
- }
-
- return nil
-}
-
-func (p PutMany) authors(ctx context.Context) error {
- tx, err := p.db.BeginTx(ctx, nil)
+ authStmt, err := p.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)")
if err != nil {
return err
}
+ defer authStmt.Close()
- _, err = tx.Exec("CREATE TEMPORARY TABLE names (name TEXT UNIQUE NOT NULL)")
+ idStmt, err := p.tx.Prepare("SELECT id FROM Authors WHERE name = ?")
if err != nil {
- tx.Rollback()
return err
}
- defer p.db.Exec("DROP TABLE IF EXISTS temp.names")
+ defer idStmt.Close()
- nameStmt, err := tx.Prepare("INSERT OR IGNORE INTO temp.names VALUES (?)")
+ docAuthStmt, err := p.tx.Prepare(
+ fmt.Sprintf("INSERT INTO DocumentAuthors(docId,authorId) VALUES (%d,?)", p.Id),
+ )
if err != nil {
return err
}
- defer nameStmt.Close()
+ defer docAuthStmt.Close()
- txNameStmt := tx.StmtContext(ctx, nameStmt)
- for _, doc := range p.Docs {
- if len(doc.Authors) == 0 {
- continue
+ // sqlite is fast, and i'm too lazy to batch this
+ var authId int64
+ for _, author := range p.Doc.Authors {
+ if _, err := authStmt.Exec(author); err != nil {
+ return err
}
- for _, name := range doc.Authors {
- if _, err := txNameStmt.Exec(name); err != nil {
- tx.Rollback()
- return err
- }
+ if err := idStmt.QueryRow(author).Scan(&authId); err != nil {
+ return err
+ }
+ if _, err := docAuthStmt.Exec(authId); err != nil {
+ return err
}
}
- newAuthorsQuery := `
- WITH new_names AS (
- SELECT temp.names.name
- FROM temp.names
- LEFT JOIN (
- SELECT * FROM Authors
- UNION ALL
- SELECT * FROM Aliases
- ) AS existing ON existing.name = temp.names.name
- WHERE existing.name IS NULL
- )
- INSERT INTO Authors(name)
- SELECT name FROM new_names
- `
+ return nil
+}
- if _, err := tx.Exec(newAuthorsQuery); err != nil {
- tx.Rollback()
+func (p PutMany) authors(ctx context.Context) error {
+ tx, err := p.db.BeginTx(ctx, nil)
+ if err != nil {
return err
}
- _, err = tx.Exec(`
- CREATE TEMPORARY TABLE name_ids AS
- SELECT names.name AS name, existing.id AS authorId
- FROM temp.names
- LEFT JOIN (
- SELECT * FROM Authors
- UNION ALL
- SELECT * FROM Aliases
- ) AS existing ON existing.name = temp.names.name
- `)
+ authStmt, err := tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)")
if err != nil {
- tx.Rollback()
return err
}
- defer p.db.Exec("DROP TABLE IF EXISTS temp.name_ids")
+ defer authStmt.Close()
- docAuthorsStmt, err := tx.Prepare(`
- INSERT INTO DocumentAuthors (docId, authorId)
- SELECT ?, authorId
- FROM temp.name_ids
- WHERE name = ?
- `)
+ idStmt, err := tx.Prepare("SELECT id FROM Authors WHERE name = ?")
if err != nil {
- tx.Rollback()
return err
}
- defer docAuthorsStmt.Close()
+ defer idStmt.Close()
- for id, doc := range p.Docs {
- if len(doc.Authors) == 0 {
- continue
- }
- for _, name := range doc.Authors {
- if _, err := tx.Stmt(docAuthorsStmt).Exec(id, name); err != nil {
- tx.Rollback()
+ docAuthStmt, err := tx.Prepare("INSERT INTO DocumentAuthors(docId,authorId) VALUES (?,?)")
+ if err != nil {
+ return err
+ }
+ defer docAuthStmt.Close()
+
+ var authId int64
+ for docId, doc := range p.Docs {
+ for _, author := range doc.Authors {
+ if _, err := authStmt.Exec(author); err != nil {
+ return err
+ }
+ if err := idStmt.QueryRow(author).Scan(&authId); err != nil {
+ return err
+ }
+ if _, err := docAuthStmt.Exec(docId, authId); err != nil {
return err
}
}
+
}
return tx.Commit()
diff --git a/pkg/data/update.go b/pkg/data/update.go
index 6aecb41..52d9d53 100644
--- a/pkg/data/update.go
+++ b/pkg/data/update.go
@@ -390,46 +390,37 @@ func (u Update) authors() error {
return err
}
- tempTable, args := BatchQuery(`
- CREATE TEMPORARY TABLE new_names AS
- SELECT column1 AS name
- FROM ( VALUES `,
- "", "(?)", ",", ")",
- len(u.Doc.Authors), u.Doc.Authors,
- )
- _, err := u.tx.Exec(tempTable, args...)
+ authStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)")
if err != nil {
return err
}
- defer u.tx.Exec("DROP TABLE temp.new_names")
+ defer authStmt.Close()
- _, err = u.tx.Exec(`
- INSERT OR IGNORE INTO Authors(name)
- SELECT * FROM new_names
- `)
+ idStmt, err := u.tx.Prepare("SELECT id FROM Authors WHERE name = ?")
if err != nil {
return err
}
- _, err = u.tx.Exec(`
- INSERT OR IGNORE INTO Aliases(alias)
- SELECT * FROM new_names
- `)
+ defer idStmt.Close()
+
+ docAuthStmt, err := u.tx.Prepare(
+ fmt.Sprintf("INSERT INTO DocumentAuthors(docId,authorId) VALUES (%d,?)", u.Id),
+ )
if err != nil {
return err
}
+ defer docAuthStmt.Close()
- docAuthQuery := fmt.Sprintf(`
- INSERT INTO DocumentAuthors
- SELECT %d, existing.id
- FROM new_names
- LEFT JOIN (
- SELECT * FROM Authors
- UNION ALL
- SELECT * FROM Aliases
- ) AS existing ON existing.name = new_names.name
- `, u.Id)
- if _, err := u.tx.Exec(docAuthQuery); err != nil {
- return err
+ var authId int64
+ for _, author := range u.Doc.Authors {
+ if _, err := authStmt.Exec(author); err != nil {
+ return err
+ }
+ if err := idStmt.QueryRow(author).Scan(&authId); err != nil {
+ return err
+ }
+ if _, err := docAuthStmt.Exec(authId); err != nil {
+ return err
+ }
}
return nil
@@ -442,61 +433,42 @@ func (u UpdateMany) authors() error {
}
defer deleteStmt.Close()
- _, err = u.tx.Exec(`
- CREATE TEMPORARY TABLE new_names (
- docId INTEGER NOT NULL,
- name TEXT NOT NULL,
- UNIQUE(docId, name)
- )`)
+ authStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)")
if err != nil {
return err
}
- defer u.tx.Exec("DROP TABLE temp.new_names")
+ defer authStmt.Close()
- insertTempTable, err := u.tx.Prepare("INSERT INTO temp.new_names VALUES (?,?)")
+ idStmt, err := u.tx.Prepare("SELECT id FROM Authors WHERE name = ?")
if err != nil {
return err
}
- defer insertTempTable.Close()
+ defer idStmt.Close()
- for id, doc := range u.Docs {
- if _, err := deleteStmt.Exec(id); err != nil {
+ docAuthStmt, err := u.tx.Prepare("INSERT INTO DocumentAuthors(docId,authorId) VALUES (?,?)")
+ if err != nil {
+ return err
+ }
+ defer docAuthStmt.Close()
+
+ var authId int64
+ for docId, doc := range u.Docs {
+ if _, err := deleteStmt.Exec(docId); err != nil {
return err
}
-
for _, author := range doc.Authors {
- if _, err := insertTempTable.Exec(id, author); err != nil {
+ if _, err := authStmt.Exec(author); err != nil {
+ return err
+ }
+ if err := idStmt.QueryRow(author).Scan(&authId); err != nil {
+ return err
+ }
+ if _, err := docAuthStmt.Exec(docId, authId); err != nil {
return err
}
}
- }
- _, err = u.tx.Exec(`
- INSERT OR IGNORE INTO Authors(name)
- SELECT name FROM new_names
- `)
- if err != nil {
- return err
- }
-
- _, err = u.tx.Exec(`
- INSERT OR IGNORE INTO Aliases(alias)
- SELECT name FROM new_names
- `)
- if err != nil {
- return err
}
- _, err = u.tx.Exec(`
- INSERT INTO DocumentAuthors
- SELECT docId, existing.id
- FROM new_names
- LEFT JOIN (
- SELECT * FROM Authors
- UNION ALL
- SELECT * FROM Aliases
- ) AS existing ON existing.name = new_names.name
- `)
-
return err
}
diff --git a/pkg/data/update_test.go b/pkg/data/update_test.go
index dc2365b..c4c1ed4 100644
--- a/pkg/data/update_test.go
+++ b/pkg/data/update_test.go
@@ -5,6 +5,7 @@ import (
"database/sql"
"errors"
"maps"
+ "slices"
"testing"
"time"
@@ -265,8 +266,53 @@ func TestUpdateMany_Update(t *testing.T) {
return a.Equal(*b)
}) {
t.Error("Got different docs than expected")
- t.Logf("Got:\n%+v\n", docs)
- t.Logf("Want:\n%+v\n", tt.docs)
+ if len(docs) != len(tt.docs) {
+ t.Logf("Wanted %d docs, got %d", len(tt.docs), len(docs))
+ }
+
+ for path, wantDoc := range tt.docs {
+ gotDoc, ok := docs[path]
+ if !ok {
+ t.Logf("Wanted doc at %s but did not recieve it", path)
+ continue
+ } else if wantDoc.Equal(*gotDoc) {
+ continue
+ }
+
+ t.Log("Doc: ", path)
+ if wantDoc.Title != gotDoc.Title {
+ t.Log("want Title:", wantDoc.Title)
+ t.Log("Got Title:", gotDoc.Title)
+ }
+ if !wantDoc.Date.Equal(gotDoc.Date) {
+ t.Log("want Date:", wantDoc.Date)
+ t.Log("got Date:", gotDoc.Date)
+ }
+ if !wantDoc.FileTime.Equal(gotDoc.FileTime) {
+ t.Log("want filetime:", wantDoc.FileTime)
+ t.Log("got filetime:", gotDoc.FileTime)
+ }
+ if !slices.Equal(wantDoc.Authors, gotDoc.Authors) {
+ t.Log("want authors:", wantDoc.Authors)
+ t.Log("got authors:", gotDoc.Authors)
+ }
+ if !slices.Equal(wantDoc.Tags, gotDoc.Tags) {
+ t.Log("want tags:", wantDoc.Tags)
+ t.Log("got tags:", gotDoc.Tags)
+ }
+ if !slices.Equal(wantDoc.Links, gotDoc.Links) {
+ t.Log("want links:", wantDoc.Links)
+ t.Log("got links:", gotDoc.Links)
+ }
+ if wantDoc.Headings != gotDoc.Headings {
+ t.Log("want headings:", wantDoc.Headings)
+ t.Log("got headings:", gotDoc.Headings)
+ }
+ if wantDoc.OtherMeta != gotDoc.OtherMeta {
+ t.Log("want meta:", wantDoc.OtherMeta)
+ t.Log("got meta:", gotDoc.OtherMeta)
+ }
+ }
}
})
}