aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-07-18 16:57:17 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-07-18 16:57:17 -0400
commit295f35e901f2088f54b66bd8bb0c0646d8e8c181 (patch)
tree05391382c5d88716e1a578ecc9a87f20ebc8f016
parent09cd2f2f80920830cc3fd3636a6b9fc93b10f743 (diff)
Add flag to avoid crawling hidden files
-rw-r--r--cmd/index.go15
-rw-r--r--pkg/index/index.go18
-rw-r--r--pkg/index/index_test.go2
3 files changed, 25 insertions, 10 deletions
diff --git a/cmd/index.go b/cmd/index.go
index f072599..2f18fe0 100644
--- a/cmd/index.go
+++ b/cmd/index.go
@@ -19,10 +19,19 @@ type IndexFlags struct {
}
func setupIndexFlags(args []string, fs *flag.FlagSet, flags *IndexFlags) {
+ flags.ParseLinks = true
+ flags.ParseMeta = true
fs.BoolVar(&flags.IgnoreDateError, "ignoreBadDates", false, "ignore malformed dates while indexing")
fs.BoolVar(&flags.IgnoreMetaError, "ignoreMetaError", false, "ignore errors while parsing general YAML header info")
- fs.BoolVar(&flags.ParseMeta, "parseMeta", true, "parse YAML header values other title, authors, date, tags")
- fs.BoolVar(&flags.ParseLinks, "parseLinks", true, "parse file contents for links")
+ fs.BoolFunc("ignoreMeta", "don't parse YAML header values other title, authors, date, tags", func(s string) error {
+ flags.ParseMeta = false
+ return nil
+ })
+ fs.BoolFunc("ignoreLinks", "don't parse file contents for links", func(s string) error {
+ flags.ParseLinks = false
+ return nil
+ })
+ fs.BoolVar(&flags.IgnoreHidden, "ignoreHidden", false, "ignore hidden files while crawling")
fs.Usage = func() {
f := fs.Output()
@@ -84,7 +93,7 @@ func runIndex(gFlags GlobalFlags, iFlags IndexFlags, db *data.Query) byte {
)
}
- traversedFiles := idx.Traverse(gFlags.NumWorkers)
+ traversedFiles := idx.Traverse(gFlags.NumWorkers, iFlags.IgnoreHidden)
fmt.Print("Crawled ", len(traversedFiles))
filteredFiles := idx.Filter(traversedFiles, gFlags.NumWorkers)
diff --git a/pkg/index/index.go b/pkg/index/index.go
index 4db1eb4..cfa4138 100644
--- a/pkg/index/index.go
+++ b/pkg/index/index.go
@@ -7,6 +7,7 @@ import (
"io"
"log/slog"
"os"
+ "path"
"regexp"
"slices"
"strings"
@@ -38,6 +39,7 @@ type ParseOpts struct {
ParseLinks bool
IgnoreDateError bool
IgnoreMetaError bool
+ IgnoreHidden bool
}
type InfoPath struct {
@@ -195,16 +197,20 @@ func (doc Document) Equal(other Document) bool {
return true
}
-func visit(file InfoPath, visitQueue chan<- InfoPath, filterQueue chan<- InfoPath, wg *sync.WaitGroup) {
+func visit(file InfoPath, visitQueue chan<- InfoPath, filterQueue chan<- InfoPath, ignoreHidden bool, wg *sync.WaitGroup) {
// TODO: check if symlink, and handle appropriately
// TODO: extract error out of function
+ if ignoreHidden && path.Base(file.Path)[0] == '.' {
+ wg.Done()
+ return
+ }
+
if file.Info.IsDir() {
entries, err := os.ReadDir(file.Path)
if err != nil {
panic(err)
}
-
wg.Add(len(entries))
for _, entry := range entries {
entryInfo, err := entry.Info()
@@ -223,13 +229,13 @@ func visit(file InfoPath, visitQueue chan<- InfoPath, filterQueue chan<- InfoPat
wg.Done()
}
-func workerTraverse(wg *sync.WaitGroup, visitQueue chan InfoPath, filterQueue chan<- InfoPath) {
+func workerTraverse(wg *sync.WaitGroup, ignoreHidden bool, visitQueue chan InfoPath, filterQueue chan<- InfoPath) {
for work := range visitQueue {
- visit(work, visitQueue, filterQueue, wg)
+ visit(work, visitQueue, filterQueue, ignoreHidden, wg)
}
}
-func (idx Index) Traverse(numWorkers uint) []string {
+func (idx Index) Traverse(numWorkers uint, ignoreHidden bool) []string {
if numWorkers <= 1 {
panic(fmt.Sprint("Invalid number of workers: ", numWorkers))
}
@@ -247,7 +253,7 @@ func (idx Index) Traverse(numWorkers uint) []string {
// start workers
for range numWorkers {
- go workerTraverse(activeJobs, jobs, filterQueue)
+ go workerTraverse(activeJobs, ignoreHidden, jobs, filterQueue)
}
// init send
diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go
index 69600c2..4242ce1 100644
--- a/pkg/index/index_test.go
+++ b/pkg/index/index_test.go
@@ -80,7 +80,7 @@ func TestIndex_Traverse(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
idx := tt.indexCase(t)
- got := idx.Traverse(tt.numWorkers)
+ got := idx.Traverse(tt.numWorkers, true)
slices.Sort(got)
slices.Sort(tt.want)