aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/data
diff options
context:
space:
mode:
Diffstat (limited to 'pkg/data')
-rw-r--r--pkg/data/db.go259
-rw-r--r--pkg/data/db_test.go65
-rw-r--r--pkg/data/get.go330
-rw-r--r--pkg/data/get_test.go217
-rw-r--r--pkg/data/put.go384
-rw-r--r--pkg/data/put_test.go98
6 files changed, 1353 insertions, 0 deletions
diff --git a/pkg/data/db.go b/pkg/data/db.go
new file mode 100644
index 0000000..fcf56f9
--- /dev/null
+++ b/pkg/data/db.go
@@ -0,0 +1,259 @@
+package data
+
+import (
+ "context"
+ "database/sql"
+ "strings"
+
+ "github.com/jpappel/atlas/pkg/index"
+ _ "github.com/mattn/go-sqlite3"
+)
+
+type Query struct {
+ db *sql.DB
+}
+
+// Append n copies of val to query
+//
+// output is in the form
+//
+// <query> <start><(n-1)*(<val><delim)>><val><delim><stop>
+func BatchQuery[T any](query string, start string, val string, delim string, stop string, n int, baseArgs []T) (string, []any) {
+ args := make([]any, len(baseArgs))
+ for i, arg := range baseArgs {
+ args[i] = arg
+ }
+
+ b := strings.Builder{}
+ b.Grow(len(query) + 1 + len(start) + n*len(val) + ((n - 1) * len(delim)) + len(stop))
+
+ b.WriteString(query)
+ b.WriteRune(' ')
+ b.WriteString(start)
+ for range n - 1 {
+ b.WriteString(val)
+ b.WriteString(delim)
+ }
+ b.WriteString(val)
+ b.WriteString(stop)
+
+ return b.String(), args
+}
+
+func NewQuery(filename string) *Query {
+ query := &Query{NewDB(filename)}
+ return query
+}
+
+func NewDB(filename string) *sql.DB {
+ connStr := "file:" + filename + "?_fk=true&_journal=WAL"
+ db, err := sql.Open("sqlite3", connStr)
+ if err != nil {
+ panic(err)
+ }
+
+ if err := createSchema(db); err != nil {
+ panic(err)
+ }
+
+ return db
+}
+
+func NewMemDB() *sql.DB {
+ db, err := sql.Open("sqlite3", ":memory:?_fk=true")
+ if err != nil {
+ panic(err)
+ }
+
+ if err := createSchema(db); err != nil {
+ panic(err)
+ }
+
+ return db
+}
+
+func createSchema(db *sql.DB) error {
+ ctx := context.TODO()
+ tx, err := db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+ defer tx.Commit()
+
+ _, err = tx.Exec(`
+ CREATE TABLE IF NOT EXISTS Indexes(
+ root TEXT NOT NULL,
+ followSym DATE
+ )`)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TABLE IF NOT EXISTS Documents(
+ id INTEGER PRIMARY KEY,
+ path TEXT UNIQUE NOT NULL,
+ title TEXT,
+ date INT,
+ fileTime INT,
+ meta BLOB
+ )`)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TABLE IF NOT EXISTS Authors(
+ id INTEGER PRIMARY KEY,
+ name TEXT UNIQUE NOT NULL
+ )`)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TABLE IF NOT EXISTS Aliases(
+ authorId INT NOT NULL,
+ alias TEXT UNIQUE NOT NULL,
+ FOREIGN KEY (authorId) REFERENCES Authors(id)
+ )`)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TABLE IF NOT EXISTS Tags(
+ id INTEGER PRIMARY KEY,
+ name TEXT UNIQUE NOT NULL
+ )`)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TABLE IF NOT EXISTS Links(
+ referencedId INT,
+ refererId INT,
+ FOREIGN KEY (referencedId) REFERENCES Documents(id),
+ FOREIGN KEY (refererId) REFERENCES Documents(id)
+ )`)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TABLE IF NOT EXISTS DocumentAuthors(
+ docId INT NOT NULL,
+ authorId INT NOT NULL,
+ FOREIGN KEY (docId) REFERENCES Documents(id),
+ FOREIGN KEY (authorId) REFERENCES Authors(id)
+ )`)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TABLE IF NOT EXISTS DocumentTags(
+ docId INT NOT NULL,
+ tagId INT NOT NULL,
+ FOREIGN KEY (docId) REFERENCES Documents(id),
+ FOREIGN KEY (tagId) REFERENCES Tags(id),
+ UNIQUE(docId, tagId)
+ )`)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doc_dates ON Documents (date)")
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doc_titles ON Documents (title)")
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_aliases_alias ON Aliases(alias)")
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_aliases_authorId ON Aliases(authorId)")
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec("CREATE INDEX IF NOT EXISTS idx_doctags_tagid ON DocumentTags (tagId)")
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ return nil
+}
+
+func (q Query) Close() error {
+ return q.db.Close()
+}
+
+// Create an index
+func (q Query) Get(indexRoot string) (*index.Index, error) {
+ ctx := context.TODO()
+
+ docs, err := FillMany{Db: q.db}.Get(ctx)
+ if err != nil {
+ return nil, err
+ }
+
+ idx := &index.Index{
+ Root: indexRoot,
+ Documents: docs,
+ Filters: index.DefaultFilters(),
+ }
+
+ return idx, nil
+}
+
+// Write from index to database
+func (q Query) Put(idx index.Index) error {
+ ctx := context.TODO()
+
+ insertCtx, cancel := context.WithCancelCause(ctx)
+ defer cancel(nil)
+
+ p, err := NewPutMany(q.db, idx.Documents)
+ if err != nil {
+ return err
+ }
+
+ if err := p.Insert(insertCtx); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+// Update database with values from index
+func (q Query) Update(idx index.Index) error {
+ // TODO: implement
+ return nil
+}
+
+func (q Query) GetDocument(path string) (*index.Document, error) {
+ ctx := context.TODO()
+ f := Fill{Path: path, Db: q.db}
+ return f.Get(ctx)
+}
diff --git a/pkg/data/db_test.go b/pkg/data/db_test.go
new file mode 100644
index 0000000..b90a943
--- /dev/null
+++ b/pkg/data/db_test.go
@@ -0,0 +1,65 @@
+package data_test
+
+import (
+ "slices"
+ "testing"
+
+ "github.com/jpappel/atlas/pkg/data"
+)
+
+func TestBatchQuery(t *testing.T) {
+ tests := []struct {
+ name string
+ query string
+ start string
+ val string
+ delim string
+ stop string
+ n int
+ args []int
+ wantQuery string
+ wantArgs []any
+ }{
+ {
+ "1 val group",
+ "INSERT INTO Foo VALUES",
+ "(",
+ "?",
+ ",",
+ ")",
+ 5,
+ []int{1, 2, 3, 4, 5},
+ "INSERT INTO Foo VALUES (?,?,?,?,?)",
+ []any{1, 2, 3, 4, 5},
+ },
+ {
+ "multiple val groups",
+ "INSERT INTO Bar VALUES",
+ "",
+ "(?,?)",
+ ",",
+ "",
+ 2,
+ []int{1, 2, 3, 4},
+ "INSERT INTO Bar VALUES (?,?),(?,?)",
+ []any{1, 2, 3, 4},
+ },
+ }
+
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ gotQuery, gotArgs := data.BatchQuery(tt.query, tt.start, tt.val, tt.delim, tt.stop, tt.n, tt.args)
+ if gotQuery != tt.wantQuery {
+ t.Error("Got different query than wanted")
+ t.Log("Wanted:\n" + tt.wantQuery)
+ t.Log("Got:\n" + gotQuery)
+ }
+
+ if !slices.Equal(tt.wantArgs, gotArgs) {
+ t.Error("Got different args than wanted")
+ t.Logf("Wanted:\t%v", tt.wantArgs)
+ t.Logf("Got:\t%v", gotArgs)
+ }
+ })
+ }
+}
diff --git a/pkg/data/get.go b/pkg/data/get.go
new file mode 100644
index 0000000..7cae03a
--- /dev/null
+++ b/pkg/data/get.go
@@ -0,0 +1,330 @@
+package data
+
+import (
+ "context"
+ "database/sql"
+ "time"
+
+ "github.com/jpappel/atlas/pkg/index"
+)
+
+// TODO: rename struct
+//
+// Use to build a document from a database connection
+type Fill struct {
+ Path string
+ Db *sql.DB
+ id int
+ doc *index.Document
+}
+
+// TODO: rename struct
+//
+// Use to build documents and aliases from a database connection
+type FillMany struct {
+ docs map[string]*index.Document
+ ids map[string]int
+ Db *sql.DB
+}
+
+func (f Fill) Get(ctx context.Context) (*index.Document, error) {
+ f.doc = &index.Document{Path: f.Path}
+ if err := f.document(ctx); err != nil {
+ return nil, err
+ }
+ if err := f.tags(ctx); err != nil {
+ return nil, err
+ }
+ if err := f.authors(ctx); err != nil {
+ return nil, err
+ }
+ if err := f.links(ctx); err != nil {
+ return nil, err
+ }
+
+ return f.doc, nil
+}
+
+func (f FillMany) Get(ctx context.Context) (map[string]*index.Document, error) {
+ f.docs = make(map[string]*index.Document)
+ f.ids = make(map[string]int)
+
+ if err := f.documents(ctx); err != nil {
+ return nil, err
+ }
+ if err := f.tags(ctx); err != nil {
+ return nil, err
+ }
+ if err := f.links(ctx); err != nil {
+ return nil, err
+ }
+ if err := f.authors(ctx); err != nil {
+ return nil, err
+ }
+
+ return f.docs, nil
+}
+
+func (f *Fill) document(ctx context.Context) error {
+ var title sql.NullString
+ var dateEpoch sql.NullInt64
+ var fileTimeEpoch sql.NullInt64
+ var meta sql.NullString
+
+ row := f.Db.QueryRowContext(ctx, `
+ SELECT id, title, date, fileTime, meta
+ FROM Documents
+ WHERE path = ?
+ `, f.Path)
+ if err := row.Scan(&f.id, &title, &dateEpoch, &fileTimeEpoch, &meta); err != nil {
+ return err
+ }
+
+ if title.Valid {
+ f.doc.Title = title.String
+ }
+ if dateEpoch.Valid {
+ f.doc.Date = time.Unix(dateEpoch.Int64, 0)
+ }
+ if fileTimeEpoch.Valid {
+ f.doc.FileTime = time.Unix(fileTimeEpoch.Int64, 0)
+ }
+ if meta.Valid {
+ f.doc.OtherMeta = meta.String
+ }
+ return nil
+}
+
+func (f *FillMany) documents(ctx context.Context) error {
+ rows, err := f.Db.QueryContext(ctx, `
+ SELECT id, path, title, date, fileTime, meta
+ FROM Documents
+ `)
+ if err != nil {
+ return err
+ }
+ defer rows.Close()
+
+ var id int
+ var docPath string
+ var title, meta sql.NullString
+ var dateEpoch, filetimeEpoch sql.NullInt64
+
+ for rows.Next() {
+ if err := rows.Scan(&id, &docPath, &title, &dateEpoch, &filetimeEpoch, &meta); err != nil {
+ return err
+ }
+
+ doc := &index.Document{
+ Path: docPath,
+ }
+
+ if title.Valid {
+ doc.Title = title.String
+ }
+ if dateEpoch.Valid {
+ doc.Date = time.Unix(dateEpoch.Int64, 0)
+ }
+ if filetimeEpoch.Valid {
+ doc.FileTime = time.Unix(filetimeEpoch.Int64, 0)
+ }
+ if meta.Valid {
+ doc.OtherMeta = meta.String
+ }
+
+ f.docs[docPath] = doc
+ f.ids[docPath] = id
+ }
+
+ return nil
+}
+
+func (f Fill) authors(ctx context.Context) error {
+ rows, err := f.Db.QueryContext(ctx, `
+ SELECT name
+ FROM Authors
+ JOIN DocumentAuthors
+ ON Authors.id = DocumentAuthors.authorId
+ WHERE docId = ?
+ `, f.id)
+ if err != nil {
+ return err
+ }
+ defer rows.Close()
+
+ var name string
+ authors := make([]string, 0, 4)
+ for rows.Next() {
+ if err := rows.Scan(&name); err != nil {
+ return err
+ }
+ authors = append(authors, name)
+ }
+
+ f.doc.Authors = authors
+
+ return nil
+}
+
+func (f FillMany) authors(ctx context.Context) error {
+ stmt, err := f.Db.PrepareContext(ctx, `
+ SELECT name
+ FROM Authors
+ JOIN DocumentAuthors
+ ON Authors.id = DocumentAuthors.authorId
+ WHERE docId = ?
+ `)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ // PERF: parallelize
+ var name string
+ for path, id := range f.ids {
+ rows, err := stmt.QueryContext(ctx, id)
+ if err != nil {
+ return err
+ }
+
+ doc := f.docs[path]
+ for rows.Next() {
+ if err := rows.Scan(&name); err != nil {
+ rows.Close()
+ return err
+ }
+
+ doc.Authors = append(doc.Authors, name)
+ }
+
+ rows.Close()
+ }
+
+ return nil
+}
+
+func (f Fill) tags(ctx context.Context) error {
+ rows, err := f.Db.QueryContext(ctx, `
+ SELECT name
+ FROM Tags
+ JOIN DocumentTags
+ ON Tags.id = DocumentTags.tagId
+ WHERE docId = ?
+ `, f.id)
+ if err != nil {
+ panic(err)
+ }
+ defer rows.Close()
+
+ var tag string
+ tags := make([]string, 0, 2)
+ for rows.Next() {
+ if err := rows.Scan(&tag); err != nil {
+ return err
+ }
+ tags = append(tags, tag)
+ }
+
+ f.doc.Tags = tags
+
+ return nil
+}
+
+func (f FillMany) tags(ctx context.Context) error {
+ stmt, err := f.Db.PrepareContext(ctx, `
+ SELECT name
+ FROM Tags
+ JOIN DocumentTags
+ ON Tags.id = DocumentTags.tagId
+ WHERE docId = ?
+ `)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ // PERF: parallelize
+ var tag string
+ for docPath, id := range f.ids {
+ rows, err := stmt.QueryContext(ctx, id)
+ if err != nil {
+ return err
+ }
+
+ doc := f.docs[docPath]
+ for rows.Next() {
+ if err := rows.Scan(&tag); err != nil {
+ rows.Close()
+ return err
+ }
+
+ doc.Tags = append(doc.Tags, tag)
+ }
+
+ rows.Close()
+ }
+
+ return nil
+}
+
+func (f Fill) links(ctx context.Context) error {
+ rows, err := f.Db.QueryContext(ctx, `
+ SELECT path
+ FROM Documents
+ JOIN Links
+ ON Links.referencedId = Documents.id
+ WHERE Links.refererId = ?
+ `, f.id)
+ if err != nil {
+ return err
+ }
+ defer rows.Close()
+
+ var link string
+ links := make([]string, 0)
+ for rows.Next() {
+ if err := rows.Scan(&link); err != nil {
+ return err
+ }
+ links = append(links, link)
+ }
+ f.doc.Links = links
+
+ return nil
+}
+
+func (f FillMany) links(ctx context.Context) error {
+ stmt, err := f.Db.PrepareContext(ctx, `
+ SELECT path
+ FROM Documents
+ JOIN Links
+ ON Links.referencedId = Documents.id
+ WHERE Links.refererId = ?
+ `)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ // PERF: parallelize
+ var linkPath string
+ for path, id := range f.ids {
+ rows, err := stmt.QueryContext(ctx, id)
+ if err != nil {
+ return err
+ }
+
+ doc := f.docs[path]
+ for rows.Next() {
+ if err := rows.Scan(&linkPath); err != nil {
+ rows.Close()
+ return err
+ }
+ doc.Links = append(doc.Links, linkPath)
+ }
+
+ rows.Close()
+ }
+
+ return nil
+}
diff --git a/pkg/data/get_test.go b/pkg/data/get_test.go
new file mode 100644
index 0000000..14d6920
--- /dev/null
+++ b/pkg/data/get_test.go
@@ -0,0 +1,217 @@
+package data_test
+
+import (
+ "context"
+ "database/sql"
+ "errors"
+ "testing"
+ "time"
+
+ "github.com/jpappel/atlas/pkg/data"
+ "github.com/jpappel/atlas/pkg/index"
+)
+
+func singleDoc(t *testing.T) *sql.DB {
+ t.Helper()
+ db := data.NewMemDB()
+
+ if _, err := db.Exec(`
+ INSERT INTO Documents (path, title, date, fileTime)
+ VALUES ("/file", "A file", 1, 2)
+ `); err != nil {
+ t.Fatal("err inserting doc:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO Authors (name)
+ VALUES ("jp")
+ `); err != nil {
+ t.Fatal("err inserting author:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO Aliases (authorId, alias)
+ VALUES (1,"pj"), (1,"JP")
+ `); err != nil {
+ t.Fatal("err inserting aliases:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO Tags (name)
+ VALUES ("foo"), ("bar"), ("baz"), ("oof")
+ `); err != nil {
+ t.Fatal("err inserting tags:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO DocumentAuthors (docId, authorId)
+ VALUES (1, 1)
+ `); err != nil {
+ t.Fatal("err inserting docAuthors:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO DocumentTags (docId, tagId)
+ VALUES (1,1), (1,2), (1,3), (1,4)
+ `); err != nil {
+ t.Fatal("err inserting docTags:", err)
+ }
+
+ return db
+}
+
+func multiDoc(t *testing.T) *sql.DB {
+ t.Helper()
+ db := data.NewMemDB()
+
+ if _, err := db.Exec(`
+ INSERT INTO Documents (path, title, date, fileTime)
+ VALUES ("/notes/anote.md", "A note", 1, 2), ("README.md", "read this file!", 3, 4)
+ `); err != nil {
+ t.Fatal("err inserting doc:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO Authors (name)
+ VALUES ("jp"), ("anonymous")
+ `); err != nil {
+ t.Fatal("err inserting author:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO Aliases (authorId, alias)
+ VALUES (1,"pj"), (1,"JP")
+ `); err != nil {
+ t.Fatal("err inserting aliases:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO Tags (name)
+ VALUES ("foo"), ("bar"), ("baz"), ("oof")
+ `); err != nil {
+ t.Fatal("err inserting tags:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO DocumentAuthors (docId, authorId)
+ VALUES (1, 1), (2, 2), (2, 1)
+ `); err != nil {
+ t.Fatal("err inserting docAuthors:", err)
+ }
+
+ if _, err := db.Exec(`
+ INSERT INTO DocumentTags (docId, tagId)
+ VALUES (1,1), (2,2), (1,3), (2,4)
+ `); err != nil {
+ t.Fatal("err inserting docTags:", err)
+ }
+
+ return db
+}
+
+func TestFill_Get(t *testing.T) {
+ tests := []struct {
+ name string
+ newFill func(t *testing.T) data.Fill
+ want index.Document
+ wantErr error
+ }{
+ {
+ "single doc",
+ func(t *testing.T) data.Fill {
+ t.Helper()
+ return data.Fill{Path: "/file", Db: singleDoc(t)}
+ },
+ index.Document{
+ Path: "/file",
+ Title: "A file",
+ Date: time.Unix(1, 0),
+ FileTime: time.Unix(2, 0),
+ Authors: []string{"jp"},
+ Tags: []string{"foo", "bar", "oof", "baz"},
+ },
+ nil,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ f := tt.newFill(t)
+ got, gotErr := f.Get(context.Background())
+
+ if !errors.Is(gotErr, tt.wantErr) {
+ t.Fatalf("Recieved unexpected error: got %v want %v", gotErr, tt.wantErr)
+ } else if gotErr != nil {
+ return
+ }
+
+ if !got.Equal(tt.want) {
+ t.Errorf("Get() = %+v\nWant %+v", got, tt.want)
+ }
+ })
+ }
+}
+
+func TestFillMany_Get(t *testing.T) {
+ tests := []struct {
+ name string
+ newFillMany func(t *testing.T) data.FillMany
+ want map[string]*index.Document
+ wantErr error
+ }{
+ {
+ "multi doc",
+ func(t *testing.T) data.FillMany {
+ t.Helper()
+ return data.FillMany{Db: multiDoc(t)}
+ },
+ map[string]*index.Document{
+ "/notes/anote.md": {
+ Path: "/notes/anote.md",
+ Title: "A note",
+ Date: time.Unix(1, 0),
+ FileTime: time.Unix(2, 0),
+ Authors: []string{"jp"},
+ Tags: []string{"foo", "baz"},
+ },
+ "README.md": {
+ Path: "README.md",
+ Title: "read this file!",
+ Date: time.Unix(3, 0),
+ FileTime: time.Unix(4, 0),
+ Authors: []string{"anonymous", "jp"},
+ Tags: []string{"bar", "oof"},
+ },
+ },
+ nil,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ctx := context.Background()
+ f := tt.newFillMany(t)
+
+ got, gotErr := f.Get(ctx)
+ if !errors.Is(gotErr, tt.wantErr) {
+ t.Fatalf("Recieved unexpected error: got %v want %v", gotErr, tt.wantErr)
+ } else if gotErr != nil {
+ return
+ }
+
+ if len(tt.want) != len(got) {
+ t.Errorf("Recieved incorrect amount of documents: got %d want %d", len(got), len(tt.want))
+ }
+
+ for path, wantDoc := range tt.want {
+ gotDoc, ok := got[path]
+ if !ok {
+ t.Errorf("Can't find %s in recieved docs", path)
+ continue
+ }
+
+ if !gotDoc.Equal(*wantDoc) {
+ t.Errorf("%s not equal %+v\nWant %+v", path, gotDoc, wantDoc)
+ }
+ }
+ })
+ }
+}
diff --git a/pkg/data/put.go b/pkg/data/put.go
new file mode 100644
index 0000000..f3fe4b6
--- /dev/null
+++ b/pkg/data/put.go
@@ -0,0 +1,384 @@
+package data
+
+import (
+ "context"
+ "database/sql"
+ "fmt"
+
+ "github.com/jpappel/atlas/pkg/index"
+)
+
+// TODO: rename struct
+type Put struct {
+ Id int64
+ Doc index.Document
+ tx *sql.Tx
+}
+
+// TODO: rename struct
+type PutMany struct {
+ Docs map[int64]*index.Document
+ pathDocs map[string]*index.Document
+ db *sql.DB
+}
+
+func NewPut(ctx context.Context, db *sql.DB, doc index.Document) (Put, error) {
+ tx, err := db.BeginTx(ctx, nil)
+ if err != nil {
+ return Put{}, nil
+ }
+ p := Put{Doc: doc, tx: tx}
+ return p, nil
+}
+
+func NewPutMany(db *sql.DB, documents map[string]*index.Document) (PutMany, error) {
+ docs := make(map[int64]*index.Document, len(documents))
+ p := PutMany{
+ Docs: docs,
+ pathDocs: documents,
+ db: db,
+ }
+ return p, nil
+}
+
+func (p Put) Insert() error {
+ if err := p.document(); err != nil {
+ p.tx.Rollback()
+ return err
+ }
+
+ if err := p.tags(); err != nil {
+ p.tx.Rollback()
+ return err
+ }
+
+ if err := p.links(); err != nil {
+ p.tx.Rollback()
+ return err
+ }
+
+ if err := p.authors(); err != nil {
+ p.tx.Rollback()
+ return err
+ }
+
+ return p.tx.Commit()
+}
+
+func (p PutMany) Insert(ctx context.Context) error {
+ if err := p.documents(ctx); err != nil {
+ return err
+ }
+
+ if err := p.tags(ctx); err != nil {
+ return err
+ }
+
+ if err := p.links(ctx); err != nil {
+ return err
+ }
+
+ if err := p.authors(ctx); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (p *Put) document() error {
+ title := sql.NullString{String: p.Doc.Title, Valid: p.Doc.Title != ""}
+
+ dateUnix := p.Doc.Date.Unix()
+ date := sql.NullInt64{Int64: dateUnix, Valid: dateUnix != 0}
+
+ filetimeUnix := p.Doc.FileTime.Unix()
+ filetime := sql.NullInt64{Int64: filetimeUnix, Valid: filetimeUnix != 0}
+
+ meta := sql.NullString{String: p.Doc.OtherMeta, Valid: p.Doc.OtherMeta != ""}
+
+ result, err := p.tx.Exec(`
+ INSERT INTO Documents(path, title, date, fileTime, meta)
+ VALUES (?,?,?,?,?)
+ `, p.Doc.Path, title, date, filetime, meta)
+ if err != nil {
+ return err
+ }
+
+ p.Id, err = result.LastInsertId()
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (p *PutMany) documents(ctx context.Context) error {
+ stmt, err := p.db.PrepareContext(ctx, `
+ INSERT INTO Documents(path, title, date, fileTime, meta)
+ VALUES (?,?,?,?,?)
+ `)
+ if err != nil {
+ return err
+ }
+ defer stmt.Close()
+
+ tx, err := p.db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+
+ txStmt := tx.StmtContext(ctx, stmt)
+
+ // PERF: profile this, grabbing the docId here might save time by simpliyfying
+ // future inserts
+ for _, doc := range p.pathDocs {
+ title := sql.NullString{String: doc.Title, Valid: doc.Title != ""}
+ dateUnix := doc.Date.Unix()
+ date := sql.NullInt64{Int64: dateUnix, Valid: dateUnix != 0}
+
+ filetimeUnix := doc.FileTime.Unix()
+ filetime := sql.NullInt64{Int64: filetimeUnix, Valid: filetimeUnix != 0}
+
+ meta := sql.NullString{String: doc.OtherMeta, Valid: doc.OtherMeta != ""}
+
+ res, err := txStmt.ExecContext(ctx, doc.Path, title, date, filetime, meta)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ id, err := res.LastInsertId()
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ p.Docs[id] = doc
+ }
+
+ return tx.Commit()
+}
+
+func (p Put) tags() error {
+ query, args := BatchQuery("INSERT OR IGNORE INTO Tags (name) VALUES", "", "(?)", ",", "", len(p.Doc.Tags), p.Doc.Tags)
+ if _, err := p.tx.Exec(query, args...); err != nil {
+ return err
+ }
+
+ preQuery := fmt.Sprintf(`
+ INSERT INTO DocumentTags
+ SELECT %d, Tags.id
+ FROM Tags
+ WHERE name IN
+ `, p.Id)
+
+ query, args = BatchQuery(preQuery, "(", "?", ",", ")", len(p.Doc.Tags), p.Doc.Tags)
+ if _, err := p.tx.Exec(query, args...); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (p PutMany) tags(ctx context.Context) error {
+ newTagStmt, err := p.db.PrepareContext(ctx, "INSERT OR IGNORE INTO Tags (name) VALUES (?)")
+ if err != nil {
+ return err
+ }
+ defer newTagStmt.Close()
+
+ tx, err := p.db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+
+ txNewTagStmt := tx.StmtContext(ctx, newTagStmt)
+
+ for id, doc := range p.Docs {
+ for _, tag := range doc.Tags {
+ if _, err := txNewTagStmt.ExecContext(ctx, tag); err != nil {
+ tx.Rollback()
+ return err
+ }
+ }
+
+ preQuery := fmt.Sprintf(`
+ INSERT INTO DocumentTags (docId, tagId)
+ SELECT %d, Tags.id
+ FROM Tags
+ WHERE name IN
+ `, id)
+ query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(doc.Tags), doc.Tags)
+ if _, err := tx.Exec(query, args); err != nil {
+ tx.Rollback()
+ return err
+ }
+ }
+
+ return tx.Commit()
+}
+
+func (p Put) links() error {
+ if len(p.Doc.Links) == 0 {
+ return nil
+ }
+
+ preQuery := fmt.Sprintf(`
+ INSERT INTO Links (referencedId, refererId)
+ SELECT id, %d
+ FROM Documents
+ WHERE path IN
+ `, p.Id)
+ query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(p.Doc.Links), p.Doc.Links)
+ if _, err := p.tx.Exec(query, args...); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (p PutMany) links(ctx context.Context) error {
+ tx, err := p.db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+
+ for id, doc := range p.Docs {
+ preQuery := fmt.Sprintf(`
+ INSERT INTO Links (referencedId, refererId)
+ SELECT id, %d
+ FROM Documents
+ WHERE path IN
+ `, id)
+ query, args := BatchQuery(preQuery, "(", "?", ",", ")", len(doc.Links), doc.Links)
+ if _, err := tx.Exec(query, args...); err != nil {
+ tx.Rollback()
+ return err
+ }
+ }
+
+ return tx.Commit()
+}
+
+func (p Put) authors() error {
+ // TODO: consider using temp table instead of cte
+ namesCTE, args := BatchQuery("WITH names(n) AS",
+ "( VALUES ", "(?)", ",", "),", len(p.Doc.Authors), p.Doc.Authors)
+
+ newAuthorsQuery := namesCTE + `
+ filtered_names AS (
+ SELECT n
+ FROM names
+ LEFT JOIN Authors on Authors.name = n
+ LEFT JOIN Aliases on Aliases.alias = n
+ WHERE Authors.name IS NULL AND Aliases.alias IS NULL
+ )
+ INSERT INTO Authors(name)
+ SELECT n FROM filtered_names
+ `
+ if _, err := p.tx.Exec(newAuthorsQuery, args...); err != nil {
+ return err
+ }
+
+ docAuthorsQuery := namesCTE + fmt.Sprintf(`
+ matched_authors AS (
+ SELECT Authors.id AS author_id
+ FROM Authors
+ LEFT JOIN Aliases
+ ON Authors.id = Aliases.authorId
+ JOIN names
+ ON Authors.name = n OR Aliases.alias = n
+ )
+ INSERT INTO DocumentAuthors(docId, authorId)
+ SELECT %d, author_id FROM matched_authors
+ `, p.Id)
+ if _, err := p.tx.Exec(docAuthorsQuery, args...); err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (p PutMany) authors(ctx context.Context) error {
+ tx, err := p.db.BeginTx(ctx, nil)
+ if err != nil {
+ return err
+ }
+
+ _, err = p.db.Exec("CREATE TEMPORARY TABLE names (name TEXT UNIQUE NOT NULL)")
+ // _, err = tx.Exec("CREATE TEMPORARY TABLE names (name TEXT UNIQUE NOT NULL)")
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+ defer p.db.Exec("DROP TABLE IF EXISTS temp.names")
+
+ nameStmt, err := p.db.PrepareContext(ctx, "INSERT OR IGNORE INTO temp.names VALUES (?)")
+ if err != nil {
+ return err
+ }
+ defer nameStmt.Close()
+
+ txNameStmt := tx.StmtContext(ctx, nameStmt)
+ for _, doc := range p.Docs {
+ for _, name := range doc.Authors {
+ if _, err := txNameStmt.Exec(name); err != nil {
+ tx.Rollback()
+ return err
+ }
+ }
+ }
+
+ newAuthorsQuery := `
+ WITH new_names AS (
+ SELECT name
+ FROM temp.names
+ LEFT JOIN Authors on Authors.name = temp.names.name
+ LEFT JOIN Aliases on Aliases.alias = tmep.names.name
+ WHERE Authors.name IS NULL AND Aliases.alias IS NULL
+ )
+ INSERT INTO Authors(name)
+ SELECT name FROM new_names
+ `
+
+ if _, err := tx.Exec(newAuthorsQuery); err != nil {
+ tx.Rollback()
+ return err
+ }
+
+ _, err = tx.Exec(`
+ CREATE TEMPORARY TABLE name_ids AS
+ SELECT names.name AS name, COALESCE(Authors.id, Aliases.authorId) AS authorId
+ FROM names
+ LEFT JOIN Authors ON temp.names.name = Authors.name
+ LEFT JOIN Aliases ON temp.names.name = Aliases.name
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+ defer p.db.Exec("DROP TABLE IF EXISTS temp.name_ids")
+
+ docAuthorsStmt, err := p.db.Prepare(`
+ INSERT INTO DocumentAuthors (docId, authorId)
+ SELECT ?, authorId
+ FROM temp.name_ids
+ WHERE name = ?
+ `)
+ if err != nil {
+ tx.Rollback()
+ return err
+ }
+ defer docAuthorsStmt.Close()
+
+ for id, doc := range p.Docs {
+ for _, name := range doc.Authors {
+ if _, err := tx.Stmt(docAuthorsStmt).Exec(id, name); err != nil {
+ tx.Rollback()
+ return err
+ }
+ }
+ }
+
+ return tx.Commit()
+}
diff --git a/pkg/data/put_test.go b/pkg/data/put_test.go
new file mode 100644
index 0000000..7e5ad38
--- /dev/null
+++ b/pkg/data/put_test.go
@@ -0,0 +1,98 @@
+package data_test
+
+import (
+ "context"
+ "database/sql"
+ "errors"
+ "testing"
+ "time"
+
+ "github.com/jpappel/atlas/pkg/data"
+ "github.com/jpappel/atlas/pkg/index"
+)
+
+func TestPut_Insert(t *testing.T) {
+ tests := []struct {
+ name string
+ newDb func(t *testing.T) *sql.DB
+ doc index.Document
+ wantErr error
+ }{
+ {
+ "insert on empty",
+ func(t *testing.T) *sql.DB {
+ t.Helper()
+ return data.NewMemDB()
+ },
+ index.Document{
+ Path: "/file",
+ Title: "A file",
+ Date: time.Unix(1, 0),
+ FileTime: time.Unix(2, 0),
+ Authors: []string{"jp"},
+ Tags: []string{"foo", "bar", "oof", "baz"},
+ },
+ nil,
+ },
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ ctx := context.Background()
+ db := tt.newDb(t)
+ defer db.Close()
+
+ p, err := data.NewPut(ctx, db, tt.doc)
+ if err != nil {
+ t.Fatalf("could not construct receiver type: %v", err)
+ }
+
+ gotErr := p.Insert()
+ if !errors.Is(gotErr, tt.wantErr) {
+ t.Fatalf("Unexpected error on Insert():, want %v got %v", tt.wantErr, gotErr)
+ } else if gotErr != nil {
+ return
+ }
+
+ f := data.Fill{Path: tt.doc.Path, Db: db}
+ gotDoc, err := f.Get(ctx)
+ if err != nil {
+ t.Fatal("Error while retrieving document for comparison:", err)
+ }
+
+ if !gotDoc.Equal(tt.doc) {
+ t.Errorf("Retrieved doc is not stored doc!\nrecv: %+v\nsent: %+v", gotDoc, tt.doc)
+ }
+ })
+ }
+}
+
+func TestPutMany_Insert(t *testing.T) {
+ tests := []struct {
+ name string // description of this test case
+ // Named input parameters for receiver constructor.
+ db *sql.DB
+ documents map[string]*index.Document
+ wantErr bool
+ }{
+ // TODO: Add test cases.
+ }
+ for _, tt := range tests {
+ t.Run(tt.name, func(t *testing.T) {
+ p, err := data.NewPutMany(tt.db, tt.documents)
+ if err != nil {
+ t.Fatalf("could not construct receiver type: %v", err)
+ }
+ gotErr := p.Insert(context.Background())
+ if gotErr != nil {
+ if !tt.wantErr {
+ t.Errorf("Insert() failed: %v", gotErr)
+ }
+ return
+ }
+ if tt.wantErr {
+ t.Fatal("Insert() succeeded unexpectedly")
+ }
+ })
+ }
+}
+