From 49a2d76d2be793f9aeddd9997ae8abba4a7f03f2 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Sun, 10 Aug 2025 20:39:00 -0400 Subject: Fix approximate queries on nullable categories --- pkg/data/db.go | 115 +++++++++++++++++++++++++++++++++++++-------------- pkg/data/get.go | 20 ++++----- pkg/data/get_test.go | 8 ++-- pkg/data/put.go | 16 +++---- pkg/data/update.go | 16 +++---- 5 files changed, 114 insertions(+), 61 deletions(-) (limited to 'pkg/data') diff --git a/pkg/data/db.go b/pkg/data/db.go index 58244ac..7d151e1 100644 --- a/pkg/data/db.go +++ b/pkg/data/db.go @@ -118,7 +118,7 @@ func createSchema(db *sql.DB, version string) error { _, err = tx.Exec(` CREATE TABLE IF NOT EXISTS Authors( id INTEGER PRIMARY KEY, - name TEXT UNIQUE NOT NULL + author TEXT UNIQUE NOT NULL )`) if err != nil { tx.Rollback() @@ -128,7 +128,7 @@ func createSchema(db *sql.DB, version string) error { _, err = tx.Exec(` CREATE TABLE IF NOT EXISTS Tags( id INTEGER PRIMARY KEY, - name TEXT UNIQUE NOT NULL + tag TEXT UNIQUE NOT NULL )`) if err != nil { tx.Rollback() @@ -205,7 +205,7 @@ func createSchema(db *sql.DB, version string) error { _, err = tx.Exec(` CREATE VIRTUAL TABLE IF NOT EXISTS Documents_fts USING fts5 ( - path, headings, title, meta, content=Documents, content_rowid=id + path, headings, title, meta, content=Documents, content_rowid=id, tokenize="trigram" ) `) if err != nil { @@ -216,7 +216,7 @@ func createSchema(db *sql.DB, version string) error { _, err = tx.Exec(` CREATE VIRTUAL TABLE IF NOT EXISTS Authors_fts USING fts5 ( - name, content=Authors, content_rowid=id + author, content=Authors, content_rowid=id, tokenize="trigram" ) `) if err != nil { @@ -227,7 +227,7 @@ func createSchema(db *sql.DB, version string) error { _, err = tx.Exec(` CREATE VIRTUAL TABLE IF NOT EXISTS Tags_fts USING fts5 ( - name, content=Tags, content_rowid=id + tag, content=Tags, content_rowid=id, tokenize="trigram" ) `) if err != nil { @@ -235,13 +235,20 @@ func createSchema(db *sql.DB, version string) error { return err } + _, err = tx.Exec(` + CREATE VIRTUAL TABLE IF NOT EXISTS Links_fts + USING fts5 ( + link, docId UNINDEXED,content=Links, tokenize="trigram" + ) + `) + // FIXME: doesn't set new.id _, err = tx.Exec(` CREATE TRIGGER IF NOT EXISTS trig_ai_authors AFTER INSERT ON Authors BEGIN - INSERT INTO Authors_fts(rowid, name) - VALUES (new.id, new.name); + INSERT INTO Authors_fts(rowid, author) + VALUES (new.id, new.author); END `) if err != nil { @@ -253,8 +260,8 @@ func createSchema(db *sql.DB, version string) error { CREATE TRIGGER IF NOT EXISTS trig_ad_authors AFTER DELETE ON Authors BEGIN - INSERT INTO Authors_fts(Authors_fts, rowid, name) - VALUES ('delete', old.id, old.name); + INSERT INTO Authors_fts(Authors_fts, rowid, author) + VALUES ('delete', old.id, old.author); END `) if err != nil { @@ -267,10 +274,10 @@ func createSchema(db *sql.DB, version string) error { CREATE TRIGGER IF NOT EXISTS trig_au_authors AFTER UPDATE ON Authors BEGIN - INSERT INTO Authors_fts(Authors_fts, rowid, name) - VALUES ('delete', old.id, old.name); - INSERT INTO Authors_fts(rowid, name) - VALUES (new.id, new.name); + INSERT INTO Authors_fts(Authors_fts, rowid, author) + VALUES ('delete', old.id, old.author); + INSERT INTO Authors_fts(rowid, author) + VALUES (new.id, new.author); END `) if err != nil { @@ -278,13 +285,12 @@ func createSchema(db *sql.DB, version string) error { return err } - // FIXME: doesn't set new.id _, err = tx.Exec(` CREATE TRIGGER IF NOT EXISTS trig_ai_tags AFTER INSERT ON Tags BEGIN - INSERT INTO Tags_fts(rowid, name) - VALUES (new.id, new.name); + INSERT INTO Tags_fts(rowid, tag) + VALUES (new.id, new.tag); END `) if err != nil { @@ -296,8 +302,8 @@ func createSchema(db *sql.DB, version string) error { CREATE TRIGGER IF NOT EXISTS trig_ad_tags AFTER DELETE ON Tags BEGIN - INSERT INTO Tags_fts(Tags_fts, rowid, name) - VALUES ('delete', old.id, old.name); + INSERT INTO Tags_fts(Tags_fts, rowid, tag) + VALUES ('delete', old.id, old.tag); END `) if err != nil { @@ -310,10 +316,52 @@ func createSchema(db *sql.DB, version string) error { CREATE TRIGGER IF NOT EXISTS trig_au_tags AFTER UPDATE ON Tags BEGIN - INSERT INTO Tags_fts(Tags_fts, rowid, name) - VALUES ('delete', old.id, old.name); - INSERT INTO Tags_fts(rowid, name) - VALUES (new.id, new.name); + INSERT INTO Tags_fts(Tags_fts, rowid, tag) + VALUES ('delete', old.id, old.tag); + INSERT INTO Tags_fts(rowid, tag) + VALUES (new.id, new.tag); + END + `) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_ai_links + AFTER INSERT ON Links + BEGIN + INSERT INTO Links_fts(rowid, link, docId) + VALUES (new.rowid, new.link, new.docId); + END + `) + if err != nil { + tx.Rollback() + return err + } + + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_ad_links + AFTER DELETE ON Links + BEGIN + INSERT INTO Links_fts(Links_fts, rowid, link, docId) + VALUES ('delete', old.rowid, old.link, old.docId); + END + `) + if err != nil { + tx.Rollback() + return err + } + + // FIXME: doesn't set new.id + _, err = tx.Exec(` + CREATE TRIGGER IF NOT EXISTS trig_au_links + AFTER UPDATE ON Links + BEGIN + INSERT INTO Links_fts(Links_fts, rowid, link, docId) + VALUES ('delete', old.rowid, old.link, old.docId); + INSERT INTO Links_fts(rowid, link, docId) + VALUES (new.rowid, new.link, new.docId); END `) if err != nil { @@ -363,7 +411,6 @@ func createSchema(db *sql.DB, version string) error { return err } - // TODO: update to use fts tables _, err = tx.Exec(` CREATE VIEW IF NOT EXISTS Search AS SELECT @@ -374,16 +421,16 @@ func createSchema(db *sql.DB, version string) error { d.fileTime, d_fts.headings, d_fts.meta, - a_fts.name AS author, - t.name AS tag, - l.link + a_fts.author, + t_fts.tag, + l_fts.link FROM Documents d JOIN Documents_fts as d_fts ON d.id = d_fts.rowid LEFT JOIN DocumentAuthors da ON d.id = da.docId LEFT JOIN Authors_fts a_fts ON da.authorId = a_fts.rowid LEFT JOIN DocumentTags dt ON d.id = dt.docId - LEFT JOIN Tags t ON dt.tagId = t.id - LEFT JOIN Links l ON d.id = l.docId + LEFT JOIN Tags_fts t_fts ON dt.tagId = t_fts.rowid + LEFT JOIN Links_fts l_fts ON d.id = l_fts.docId `) if err != nil { tx.Rollback() @@ -517,9 +564,15 @@ func (q Query) Execute(ctx context.Context, artifact query.CompilationArtifact) } compiledQuery := fmt.Sprintf(` - SELECT DISTINCT docId, path, title, date, fileTime, headings, meta - FROM Search - WHERE %s`, artifact.Query) + SELECT id, d.path, d.title, d.date, d.fileTime, d.headings, d.meta + FROM Documents d + JOIN ( + SELECT DISTINCT docId + FROM Search + WHERE %s + ) s + ON d.id = s.docId + `, artifact.Query) rows, err := q.db.QueryContext(ctx, compiledQuery, artifact.Args...) if err != nil { diff --git a/pkg/data/get.go b/pkg/data/get.go index 3051e74..b940ecd 100644 --- a/pkg/data/get.go +++ b/pkg/data/get.go @@ -167,7 +167,7 @@ func (f *FillMany) documents(ctx context.Context, rows *sql.Rows) error { } func (f Fill) authors(ctx context.Context) error { rows, err := f.Db.QueryContext(ctx, ` - SELECT name + SELECT author FROM Authors JOIN DocumentAuthors ON Authors.id = DocumentAuthors.authorId @@ -178,13 +178,13 @@ func (f Fill) authors(ctx context.Context) error { } defer rows.Close() - var name string + var author string authors := make([]string, 0, 4) for rows.Next() { - if err := rows.Scan(&name); err != nil { + if err := rows.Scan(&author); err != nil { return err } - authors = append(authors, name) + authors = append(authors, author) } f.doc.Authors = authors @@ -194,7 +194,7 @@ func (f Fill) authors(ctx context.Context) error { func (f FillMany) authors(ctx context.Context) error { stmt, err := f.Db.PrepareContext(ctx, ` - SELECT name + SELECT author FROM Authors JOIN DocumentAuthors ON Authors.id = DocumentAuthors.authorId @@ -206,7 +206,7 @@ func (f FillMany) authors(ctx context.Context) error { defer stmt.Close() // PERF: parallelize - var name string + var author string for path, id := range f.ids { rows, err := stmt.QueryContext(ctx, id) if err != nil { @@ -215,12 +215,12 @@ func (f FillMany) authors(ctx context.Context) error { doc := f.docs[path] for rows.Next() { - if err := rows.Scan(&name); err != nil { + if err := rows.Scan(&author); err != nil { rows.Close() return err } - doc.Authors = append(doc.Authors, name) + doc.Authors = append(doc.Authors, author) } rows.Close() @@ -231,7 +231,7 @@ func (f FillMany) authors(ctx context.Context) error { func (f Fill) tags(ctx context.Context) error { rows, err := f.Db.QueryContext(ctx, ` - SELECT name + SELECT tag FROM Tags JOIN DocumentTags ON Tags.id = DocumentTags.tagId @@ -258,7 +258,7 @@ func (f Fill) tags(ctx context.Context) error { func (f FillMany) tags(ctx context.Context) error { stmt, err := f.Db.PrepareContext(ctx, ` - SELECT name + SELECT tag FROM Tags JOIN DocumentTags ON Tags.id = DocumentTags.tagId diff --git a/pkg/data/get_test.go b/pkg/data/get_test.go index aa1e43e..f5f20ab 100644 --- a/pkg/data/get_test.go +++ b/pkg/data/get_test.go @@ -22,14 +22,14 @@ func singleDoc(t *testing.T) *sql.DB { } if _, err := db.Exec(` - INSERT INTO Authors (name) + INSERT INTO Authors (author) VALUES ("jp") `); err != nil { t.Fatal("err inserting author:", err) } if _, err := db.Exec(` - INSERT INTO Tags (name) + INSERT INTO Tags (tag) VALUES ("foo"), ("bar"), ("baz"), ("oof") `); err != nil { t.Fatal("err inserting tags:", err) @@ -72,14 +72,14 @@ func multiDoc(t *testing.T) *sql.DB { } if _, err := db.Exec(` - INSERT INTO Authors (name) + INSERT INTO Authors (author) VALUES ("jp"), ("anonymous") `); err != nil { t.Fatal("err inserting author:", err) } if _, err := db.Exec(` - INSERT INTO Tags (name) + INSERT INTO Tags (tag) VALUES ("foo"), ("bar"), ("baz"), ("oof") `); err != nil { t.Fatal("err inserting tags:", err) diff --git a/pkg/data/put.go b/pkg/data/put.go index 4830dbe..e1c3c02 100644 --- a/pkg/data/put.go +++ b/pkg/data/put.go @@ -171,7 +171,7 @@ func (p Put) tags() error { return nil } - query, args := BatchQuery("INSERT OR IGNORE INTO Tags (name) VALUES", "", "(?)", ",", "", len(p.Doc.Tags), p.Doc.Tags) + query, args := BatchQuery("INSERT OR IGNORE INTO Tags (tag) VALUES", "", "(?)", ",", "", len(p.Doc.Tags), p.Doc.Tags) if _, err := p.tx.Exec(query, args...); err != nil { return err } @@ -180,7 +180,7 @@ func (p Put) tags() error { INSERT INTO DocumentTags SELECT %d, Tags.id FROM Tags - WHERE name IN + WHERE tag IN `, p.Id) query, args = BatchQuery(preQuery, "(", "?", ",", ")", len(p.Doc.Tags), p.Doc.Tags) @@ -197,7 +197,7 @@ func (p PutMany) tags(ctx context.Context) error { return err } - txNewTagStmt, err := tx.Prepare("INSERT OR IGNORE INTO Tags (name) VALUES (?)") + txNewTagStmt, err := tx.Prepare("INSERT OR IGNORE INTO Tags (tag) VALUES (?)") if err != nil { tx.Rollback() return err @@ -219,7 +219,7 @@ func (p PutMany) tags(ctx context.Context) error { INSERT INTO DocumentTags (docId, tagId) SELECT %d, Tags.id FROM Tags - WHERE name IN + WHERE tag IN `, id) query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(doc.Tags), doc.Tags) if _, err := tx.Exec(query, args...); err != nil { @@ -280,13 +280,13 @@ func (p Put) authors() error { return nil } - authStmt, err := p.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") + authStmt, err := p.tx.Prepare("INSERT OR IGNORE INTO Authors(author) VALUES(?)") if err != nil { return err } defer authStmt.Close() - idStmt, err := p.tx.Prepare("SELECT id FROM Authors WHERE name = ?") + idStmt, err := p.tx.Prepare("SELECT id FROM Authors WHERE author = ?") if err != nil { return err } @@ -323,13 +323,13 @@ func (p PutMany) authors(ctx context.Context) error { return err } - authStmt, err := tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") + authStmt, err := tx.Prepare("INSERT OR IGNORE INTO Authors(author) VALUES(?)") if err != nil { return err } defer authStmt.Close() - idStmt, err := tx.Prepare("SELECT id FROM Authors WHERE name = ?") + idStmt, err := tx.Prepare("SELECT id FROM Authors WHERE author = ?") if err != nil { return err } diff --git a/pkg/data/update.go b/pkg/data/update.go index 52d9d53..7f74236 100644 --- a/pkg/data/update.go +++ b/pkg/data/update.go @@ -263,7 +263,7 @@ func (u Update) tags() error { } query, args := BatchQuery( - "INSERT OR IGNORE INTO Tags (name) VALUES", + "INSERT OR IGNORE INTO Tags (tag) VALUES", "", "(?)", ",", "", len(u.Doc.Tags), u.Doc.Tags, ) @@ -275,7 +275,7 @@ func (u Update) tags() error { INSERT INTO DocumentTags SELECT %d, Tags.id FROM Tags - WHERE name in + WHERE tag in `, u.Id) query, args = BatchQuery( preqQuery, "(", "?", ",", ")", @@ -308,7 +308,7 @@ func (u UpdateMany) tags() error { continue } insertTag, args := BatchQuery( - "INSERT OR IGNORE INTO Tags (name) VALUES", + "INSERT OR IGNORE INTO Tags (tag) VALUES", "", "(?)", ",", "", len(doc.Tags), doc.Tags, ) @@ -321,7 +321,7 @@ func (u UpdateMany) tags() error { INSERT INTO DocumentTags SELECT %d, Tags.id FROM Tags - WHERE name in + WHERE tag in `, id) setDocTags, _ := BatchQuery( preqQuery, "(", "?", ",", ")", @@ -390,13 +390,13 @@ func (u Update) authors() error { return err } - authStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") + authStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Authors(author) VALUES(?)") if err != nil { return err } defer authStmt.Close() - idStmt, err := u.tx.Prepare("SELECT id FROM Authors WHERE name = ?") + idStmt, err := u.tx.Prepare("SELECT id FROM Authors WHERE author = ?") if err != nil { return err } @@ -433,13 +433,13 @@ func (u UpdateMany) authors() error { } defer deleteStmt.Close() - authStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Authors(name) VALUES(?)") + authStmt, err := u.tx.Prepare("INSERT OR IGNORE INTO Authors(author) VALUES(?)") if err != nil { return err } defer authStmt.Close() - idStmt, err := u.tx.Prepare("SELECT id FROM Authors WHERE name = ?") + idStmt, err := u.tx.Prepare("SELECT id FROM Authors WHERE author = ?") if err != nil { return err } -- cgit v1.2.3