diff options
Diffstat (limited to 'pkg')
| -rw-r--r-- | pkg/data/db.go | 187 | ||||
| -rw-r--r-- | pkg/data/get.go | 2 | ||||
| -rw-r--r-- | pkg/data/get_test.go | 14 | ||||
| -rw-r--r-- | pkg/data/put.go | 142 | ||||
| -rw-r--r-- | pkg/data/update.go | 108 | ||||
| -rw-r--r-- | pkg/data/update_test.go | 50 | ||||
| -rw-r--r-- | pkg/index/schema.sql | 70 |
7 files changed, 287 insertions, 286 deletions
diff --git a/pkg/data/db.go b/pkg/data/db.go index bf707b4..58244ac 100644 --- a/pkg/data/db.go +++ b/pkg/data/db.go @@ -126,17 +126,6 @@ func createSchema(db *sql.DB, version string) error { } _, err = tx.Exec(` - CREATE TABLE IF NOT EXISTS Aliases( - authorId INT NOT NULL, - alias TEXT UNIQUE NOT NULL, - FOREIGN KEY (authorId) REFERENCES Authors(id) ON DELETE CASCADE - )`) - if err != nil { - tx.Rollback() - return err - } - - _, err = tx.Exec(` CREATE TABLE IF NOT EXISTS Tags( id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL @@ -201,37 +190,87 @@ func createSchema(db *sql.DB, version string) error { return err } - _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_aliases_alias ON Aliases(alias)") + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_links_link ON Links(link)") if err != nil { tx.Rollback() return err } - _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_aliases_authorId ON Aliases(authorId)") + _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doctags_tagid ON DocumentTags (tagId)") if err != nil { tx.Rollback() return err } - _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_links_link ON Links(link)") + _, err = tx.Exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS Documents_fts + USING fts5 ( + path, headings, title, meta, content=Documents, content_rowid=id + ) + `) if err != nil { tx.Rollback() return err } - _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doctags_tagid ON DocumentTags (tagId)") + _, err = tx.Exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS Authors_fts + USING fts5 ( + name, content=Authors, content_rowid=id + ) + `) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS Tags_fts + USING fts5 ( + name, content=Tags, content_rowid=id + ) + `) + if err != nil { + tx.Rollback() + return err + } + + // FIXME: doesn't set new.id + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_ai_authors + AFTER INSERT ON Authors + BEGIN + INSERT INTO Authors_fts(rowid, name) + VALUES (new.id, new.name); + END + `) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_ad_authors + AFTER DELETE ON Authors + BEGIN + INSERT INTO Authors_fts(Authors_fts, rowid, name) + VALUES ('delete', old.id, old.name); + END + `) if err != nil { tx.Rollback() return err } + // FIXME: doesn't set new.id _, err = tx.Exec(` - CREATE TRIGGER IF NOT EXISTS trig_new_author - BEFORE INSERT ON Authors + CREATE TRIGGER IF NOT EXISTS trig_au_authors + AFTER UPDATE ON Authors BEGIN - SELECT CASE WHEN NEW.name IN (SELECT alias FROM Aliases) THEN - RAISE(IGNORE) - END; + INSERT INTO Authors_fts(Authors_fts, rowid, name) + VALUES ('delete', old.id, old.name); + INSERT INTO Authors_fts(rowid, name) + VALUES (new.id, new.name); END `) if err != nil { @@ -239,13 +278,13 @@ func createSchema(db *sql.DB, version string) error { return err } + // FIXME: doesn't set new.id _, err = tx.Exec(` - CREATE TRIGGER IF NOT EXISTS trig_new_alias - BEFORE INSERT ON Aliases + CREATE TRIGGER IF NOT EXISTS trig_ai_tags + AFTER INSERT ON Tags BEGIN - SELECT CASE WHEN NEW.alias IN (SELECT name FROM Authors) THEN - RAISE(IGNORE) - END; + INSERT INTO Tags_fts(rowid, name) + VALUES (new.id, new.name); END `) if err != nil { @@ -254,22 +293,94 @@ func createSchema(db *sql.DB, version string) error { } _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_ad_tags + AFTER DELETE ON Tags + BEGIN + INSERT INTO Tags_fts(Tags_fts, rowid, name) + VALUES ('delete', old.id, old.name); + END + `) + if err != nil { + tx.Rollback() + return err + } + + // FIXME: doesn't set new.id + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_au_tags + AFTER UPDATE ON Tags + BEGIN + INSERT INTO Tags_fts(Tags_fts, rowid, name) + VALUES ('delete', old.id, old.name); + INSERT INTO Tags_fts(rowid, name) + VALUES (new.id, new.name); + END + `) + if err != nil { + tx.Rollback() + return err + } + + // FIXME: doesn't set new.id + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_ai_doc + AFTER INSERT ON Documents + BEGIN + INSERT INTO Documents_fts(rowid, path, headings, title, meta) + VALUES (new.id, new.path, new.headings, new.title, new.meta); + END + `) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_ad_doc + AFTER DELETE ON Documents + BEGIN + INSERT INTO Documents_fts(Documents_fts, rowid, path, headings, title, meta) + VALUES ('delete', old.id, old.path, old.headings, old.title, old.meta); + END + `) + if err != nil { + tx.Rollback() + return err + } + + // FIXME: doesn't set new.id + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_au_doc + AFTER UPDATE ON Documents + BEGIN + INSERT INTO Documents_fts(Documents_fts, rowid, path, headings, title, meta) + VALUES ('delete', old.id, old.path, old.headings, old.title, old.meta); + INSERT INTO Documents_fts(rowid, path, headings, title, meta) + VALUES (new.id, new.path, new.headings, new.title, new.meta); + END + `) + if err != nil { + return err + } + + // TODO: update to use fts tables + _, err = tx.Exec(` CREATE VIEW IF NOT EXISTS Search AS SELECT d.id AS docId, - d.path, - d.title, + d_fts.path, + d_fts.title, d.date, d.fileTime, - d.headings, - d.meta, - COALESCE(a.name, al.alias) AS author, + d_fts.headings, + d_fts.meta, + a_fts.name AS author, t.name AS tag, l.link FROM Documents d + JOIN Documents_fts as d_fts ON d.id = d_fts.rowid LEFT JOIN DocumentAuthors da ON d.id = da.docId - LEFT JOIN Authors a ON da.authorId = a.id - LEFT JOIN Aliases al ON a.id = al.authorId + LEFT JOIN Authors_fts a_fts ON da.authorId = a_fts.rowid LEFT JOIN DocumentTags dt ON d.id = dt.docId LEFT JOIN Tags t ON dt.tagId = t.id LEFT JOIN Links l ON d.id = l.docId @@ -339,7 +450,7 @@ func (q Query) GetDocument(ctx context.Context, path string) (*index.Document, e return f.Get(ctx) } -// Shrink database by removing unused authors, aliases, tags and VACUUM-ing +// Shrink database by removing unused authors and tags and VACUUM-ing func (q Query) Tidy() error { if _, err := q.db.Exec(` DELETE FROM Authors @@ -362,6 +473,16 @@ func (q Query) Tidy() error { return err } + if _, err := q.db.Exec("INSERT INTO Documents_fts(Documents_fts) VALUES('optimize')"); err != nil { + return err + } + if _, err := q.db.Exec("INSERT INTO Authors_fts(Authors_fts) VALUES('optimize')"); err != nil { + return err + } + if _, err := q.db.Exec("INSERT INTO Tags_fts(Tags_fts) VALUES('optimize')"); err != nil { + return err + } + return nil } diff --git a/pkg/data/get.go b/pkg/data/get.go index 7025c3a..3051e74 100644 --- a/pkg/data/get.go +++ b/pkg/data/get.go @@ -17,7 +17,7 @@ type Fill struct { doc *index.Document } -// Use to build documents and aliases from a database connection +// Use to build documents from a database connection type FillMany struct { docs map[string]*index.Document ids map[string]int diff --git a/pkg/data/get_test.go b/pkg/data/get_test.go index 4cadc76..aa1e43e 100644 --- a/pkg/data/get_test.go +++ b/pkg/data/get_test.go @@ -29,13 +29,6 @@ func singleDoc(t *testing.T) *sql.DB { } if _, err := db.Exec(` - INSERT INTO Aliases (authorId, alias) - VALUES (1,"pj"), (1,"JP") - `); err != nil { - t.Fatal("err inserting aliases:", err) - } - - if _, err := db.Exec(` INSERT INTO Tags (name) VALUES ("foo"), ("bar"), ("baz"), ("oof") `); err != nil { @@ -86,13 +79,6 @@ func multiDoc(t *testing.T) *sql.DB { } if _, err := db.Exec(` - INSERT INTO Aliases (authorId, alias) - VALUES (1,"pj"), (1,"JP") - `); err != nil { - t.Fatal("err inserting aliases:", err) - } - - if _, err := db.Exec(` INSERT INTO Tags (name) VALUES ("foo"), ("bar"), ("baz"), ("oof") `); err != nil { diff --git a/pkg/data/put.go b/pkg/data/put.go index 4a9a039..4830dbe 100644 --- a/pkg/data/put.go +++ b/pkg/data/put.go @@ -280,135 +280,81 @@ func (p Put) authors() error { return nil } - // PERF: consider using temp table instead of cte - namesCTE, args := BatchQuery("WITH names(n) AS", - "( VALUES ", "(?)", ",", "),", len(p.Doc.Authors), p.Doc.Authors) - - newAuthorsQuery := namesCTE + ` - filtered_names AS ( - SELECT n - FROM names - LEFT JOIN ( - SELECT * FROM Authors - UNION ALL - SELECT * FROM Aliases - ) AS existing ON existing.name = names.n - WHERE existing.name IS NULL - ) - INSERT INTO Authors(name) - SELECT n FROM filtered_names - ` - if _, err := p.tx.Exec(newAuthorsQuery, args...); err != nil { - return err - } - - docAuthorsQuery := namesCTE + fmt.Sprintf(` - matched_authors AS ( - SELECT Authors.id AS author_id - FROM Authors - LEFT JOIN Aliases ON Authors.id = Aliases.authorId - JOIN names ON Authors.name = n OR Aliases.alias = n - ) - INSERT INTO DocumentAuthors(docId, authorId) - SELECT %d, author_id FROM matched_authors - `, p.Id) - if _, err := p.tx.Exec(docAuthorsQuery, args...); err != nil { - return err - } - - return nil -} - -func (p PutMany) authors(ctx context.Context) error { - tx, err := p.db.BeginTx(ctx, nil) + authStmt, err := p.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") if err != nil { return err } + defer authStmt.Close() - _, err = tx.Exec("CREATE TEMPORARY TABLE names (name TEXT UNIQUE NOT NULL)") + idStmt, err := p.tx.Prepare("SELECT id FROM Authors WHERE name = ?") if err != nil { - tx.Rollback() return err } - defer p.db.Exec("DROP TABLE IF EXISTS temp.names") + defer idStmt.Close() - nameStmt, err := tx.Prepare("INSERT OR IGNORE INTO temp.names VALUES (?)") + docAuthStmt, err := p.tx.Prepare( + fmt.Sprintf("INSERT INTO DocumentAuthors(docId,authorId) VALUES (%d,?)", p.Id), + ) if err != nil { return err } - defer nameStmt.Close() + defer docAuthStmt.Close() - txNameStmt := tx.StmtContext(ctx, nameStmt) - for _, doc := range p.Docs { - if len(doc.Authors) == 0 { - continue + // sqlite is fast, and i'm too lazy to batch this + var authId int64 + for _, author := range p.Doc.Authors { + if _, err := authStmt.Exec(author); err != nil { + return err } - for _, name := range doc.Authors { - if _, err := txNameStmt.Exec(name); err != nil { - tx.Rollback() - return err - } + if err := idStmt.QueryRow(author).Scan(&authId); err != nil { + return err + } + if _, err := docAuthStmt.Exec(authId); err != nil { + return err } } - newAuthorsQuery := ` - WITH new_names AS ( - SELECT temp.names.name - FROM temp.names - LEFT JOIN ( - SELECT * FROM Authors - UNION ALL - SELECT * FROM Aliases - ) AS existing ON existing.name = temp.names.name - WHERE existing.name IS NULL - ) - INSERT INTO Authors(name) - SELECT name FROM new_names - ` + return nil +} - if _, err := tx.Exec(newAuthorsQuery); err != nil { - tx.Rollback() +func (p PutMany) authors(ctx context.Context) error { + tx, err := p.db.BeginTx(ctx, nil) + if err != nil { return err } - _, err = tx.Exec(` - CREATE TEMPORARY TABLE name_ids AS - SELECT names.name AS name, existing.id AS authorId - FROM temp.names - LEFT JOIN ( - SELECT * FROM Authors - UNION ALL - SELECT * FROM Aliases - ) AS existing ON existing.name = temp.names.name - `) + authStmt, err := tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") if err != nil { - tx.Rollback() return err } - defer p.db.Exec("DROP TABLE IF EXISTS temp.name_ids") + defer authStmt.Close() - docAuthorsStmt, err := tx.Prepare(` - INSERT INTO DocumentAuthors (docId, authorId) - SELECT ?, authorId - FROM temp.name_ids - WHERE name = ? - `) + idStmt, err := tx.Prepare("SELECT id FROM Authors WHERE name = ?") if err != nil { - tx.Rollback() return err } - defer docAuthorsStmt.Close() + defer idStmt.Close() - for id, doc := range p.Docs { - if len(doc.Authors) == 0 { - continue - } - for _, name := range doc.Authors { - if _, err := tx.Stmt(docAuthorsStmt).Exec(id, name); err != nil { - tx.Rollback() + docAuthStmt, err := tx.Prepare("INSERT INTO DocumentAuthors(docId,authorId) VALUES (?,?)") + if err != nil { + return err + } + defer docAuthStmt.Close() + + var authId int64 + for docId, doc := range p.Docs { + for _, author := range doc.Authors { + if _, err := authStmt.Exec(author); err != nil { + return err + } + if err := idStmt.QueryRow(author).Scan(&authId); err != nil { + return err + } + if _, err := docAuthStmt.Exec(docId, authId); err != nil { return err } } + } return tx.Commit() diff --git a/pkg/data/update.go b/pkg/data/update.go index 6aecb41..52d9d53 100644 --- a/pkg/data/update.go +++ b/pkg/data/update.go @@ -390,46 +390,37 @@ func (u Update) authors() error { return err } - tempTable, args := BatchQuery(` - CREATE TEMPORARY TABLE new_names AS - SELECT column1 AS name - FROM ( VALUES `, - "", "(?)", ",", ")", - len(u.Doc.Authors), u.Doc.Authors, - ) - _, err := u.tx.Exec(tempTable, args...) + authStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") if err != nil { return err } - defer u.tx.Exec("DROP TABLE temp.new_names") + defer authStmt.Close() - _, err = u.tx.Exec(` - INSERT OR IGNORE INTO Authors(name) - SELECT * FROM new_names - `) + idStmt, err := u.tx.Prepare("SELECT id FROM Authors WHERE name = ?") if err != nil { return err } - _, err = u.tx.Exec(` - INSERT OR IGNORE INTO Aliases(alias) - SELECT * FROM new_names - `) + defer idStmt.Close() + + docAuthStmt, err := u.tx.Prepare( + fmt.Sprintf("INSERT INTO DocumentAuthors(docId,authorId) VALUES (%d,?)", u.Id), + ) if err != nil { return err } + defer docAuthStmt.Close() - docAuthQuery := fmt.Sprintf(` - INSERT INTO DocumentAuthors - SELECT %d, existing.id - FROM new_names - LEFT JOIN ( - SELECT * FROM Authors - UNION ALL - SELECT * FROM Aliases - ) AS existing ON existing.name = new_names.name - `, u.Id) - if _, err := u.tx.Exec(docAuthQuery); err != nil { - return err + var authId int64 + for _, author := range u.Doc.Authors { + if _, err := authStmt.Exec(author); err != nil { + return err + } + if err := idStmt.QueryRow(author).Scan(&authId); err != nil { + return err + } + if _, err := docAuthStmt.Exec(authId); err != nil { + return err + } } return nil @@ -442,61 +433,42 @@ func (u UpdateMany) authors() error { } defer deleteStmt.Close() - _, err = u.tx.Exec(` - CREATE TEMPORARY TABLE new_names ( - docId INTEGER NOT NULL, - name TEXT NOT NULL, - UNIQUE(docId, name) - )`) + authStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") if err != nil { return err } - defer u.tx.Exec("DROP TABLE temp.new_names") + defer authStmt.Close() - insertTempTable, err := u.tx.Prepare("INSERT INTO temp.new_names VALUES (?,?)") + idStmt, err := u.tx.Prepare("SELECT id FROM Authors WHERE name = ?") if err != nil { return err } - defer insertTempTable.Close() + defer idStmt.Close() - for id, doc := range u.Docs { - if _, err := deleteStmt.Exec(id); err != nil { + docAuthStmt, err := u.tx.Prepare("INSERT INTO DocumentAuthors(docId,authorId) VALUES (?,?)") + if err != nil { + return err + } + defer docAuthStmt.Close() + + var authId int64 + for docId, doc := range u.Docs { + if _, err := deleteStmt.Exec(docId); err != nil { return err } - for _, author := range doc.Authors { - if _, err := insertTempTable.Exec(id, author); err != nil { + if _, err := authStmt.Exec(author); err != nil { + return err + } + if err := idStmt.QueryRow(author).Scan(&authId); err != nil { + return err + } + if _, err := docAuthStmt.Exec(docId, authId); err != nil { return err } } - } - _, err = u.tx.Exec(` - INSERT OR IGNORE INTO Authors(name) - SELECT name FROM new_names - `) - if err != nil { - return err - } - - _, err = u.tx.Exec(` - INSERT OR IGNORE INTO Aliases(alias) - SELECT name FROM new_names - `) - if err != nil { - return err } - _, err = u.tx.Exec(` - INSERT INTO DocumentAuthors - SELECT docId, existing.id - FROM new_names - LEFT JOIN ( - SELECT * FROM Authors - UNION ALL - SELECT * FROM Aliases - ) AS existing ON existing.name = new_names.name - `) - return err } diff --git a/pkg/data/update_test.go b/pkg/data/update_test.go index dc2365b..c4c1ed4 100644 --- a/pkg/data/update_test.go +++ b/pkg/data/update_test.go @@ -5,6 +5,7 @@ import ( "database/sql" "errors" "maps" + "slices" "testing" "time" @@ -265,8 +266,53 @@ func TestUpdateMany_Update(t *testing.T) { return a.Equal(*b) }) { t.Error("Got different docs than expected") - t.Logf("Got:\n%+v\n", docs) - t.Logf("Want:\n%+v\n", tt.docs) + if len(docs) != len(tt.docs) { + t.Logf("Wanted %d docs, got %d", len(tt.docs), len(docs)) + } + + for path, wantDoc := range tt.docs { + gotDoc, ok := docs[path] + if !ok { + t.Logf("Wanted doc at %s but did not recieve it", path) + continue + } else if wantDoc.Equal(*gotDoc) { + continue + } + + t.Log("Doc: ", path) + if wantDoc.Title != gotDoc.Title { + t.Log("want Title:", wantDoc.Title) + t.Log("Got Title:", gotDoc.Title) + } + if !wantDoc.Date.Equal(gotDoc.Date) { + t.Log("want Date:", wantDoc.Date) + t.Log("got Date:", gotDoc.Date) + } + if !wantDoc.FileTime.Equal(gotDoc.FileTime) { + t.Log("want filetime:", wantDoc.FileTime) + t.Log("got filetime:", gotDoc.FileTime) + } + if !slices.Equal(wantDoc.Authors, gotDoc.Authors) { + t.Log("want authors:", wantDoc.Authors) + t.Log("got authors:", gotDoc.Authors) + } + if !slices.Equal(wantDoc.Tags, gotDoc.Tags) { + t.Log("want tags:", wantDoc.Tags) + t.Log("got tags:", gotDoc.Tags) + } + if !slices.Equal(wantDoc.Links, gotDoc.Links) { + t.Log("want links:", wantDoc.Links) + t.Log("got links:", gotDoc.Links) + } + if wantDoc.Headings != gotDoc.Headings { + t.Log("want headings:", wantDoc.Headings) + t.Log("got headings:", gotDoc.Headings) + } + if wantDoc.OtherMeta != gotDoc.OtherMeta { + t.Log("want meta:", wantDoc.OtherMeta) + t.Log("got meta:", gotDoc.OtherMeta) + } + } } }) } diff --git a/pkg/index/schema.sql b/pkg/index/schema.sql deleted file mode 100644 index fb06351..0000000 --- a/pkg/index/schema.sql +++ /dev/null @@ -1,70 +0,0 @@ --- TABLE of config values -CREATE TABLE Indexes( - root TEXT NOT NULL, - followSym DATE -); - --- Schema -CREATE TABLE Documents( - id INTEGER PRIMARY KEY, - path TEXT UNIQUE NOT NULL, - title TEXT, - date INT, - fileTime INT, - meta BLOB -); - -CREATE TABLE Authors( - id INTEGER PRIMARY KEY, - name TEXT UNIQUE NOT NULL -); - -CREATE TABLE Aliases( - authorId INT NOT NULL, - alias TEXT UNIQUE NOT NULL, - FOREIGN KEY (authorId) REFERENCES Authors(id) -); - -CREATE TABLE Tags( - id INTEGER PRIMARY KEY, - name TEXT UNIQUE NOT NULL, -); - -CREATE TABLE Links( - referencedId INT, - refererId INT, - FOREIGN KEY (referencedId) REFERENCES Documents(id), - FOREIGN KEY (refererId) REFERENCES Documents(id) -); - -CREATE TABLE DocumentAuthors( - docId INT NOT NULL, - authorId INT NOT NULL, - FOREIGN KEY (docId) REFERENCES Documents(id), - FOREIGN KEY (authorId) REFERENCES Authors(id) -); - -CREATE TABLE DocumentTags( - docId INT NOT NULL, - tagId INT NOT NULL, - FOREIGN KEY (docId) REFERENCES Documents(id), - FOREIGN KEY (tagId) REFERENCES Tags(id), - UNIQUE(docId, tagId) -); - --- Indexes -CREATE INDEX idx_doc_dates -ON Documents (date); -CREATE INDEX idx_doc_titles -ON Documents (title); - -CREATE INDEX idx_author_name -ON Authors(name); - -CREATE INDEX idx_aliases_alias -ON Aliases(alias); -CREATE INDEX idx_aliases_authorId -ON Aliases(authorId); - -CREATE INDEX idx_doctags_tagid -ON DocumentTags (tagId); |
