From 295f35e901f2088f54b66bd8bb0c0646d8e8c181 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Fri, 18 Jul 2025 16:57:17 -0400 Subject: Add flag to avoid crawling hidden files --- cmd/index.go | 15 ++++++++++++--- pkg/index/index.go | 18 ++++++++++++------ pkg/index/index_test.go | 2 +- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/cmd/index.go b/cmd/index.go index f072599..2f18fe0 100644 --- a/cmd/index.go +++ b/cmd/index.go @@ -19,10 +19,19 @@ type IndexFlags struct { } func setupIndexFlags(args []string, fs *flag.FlagSet, flags *IndexFlags) { + flags.ParseLinks = true + flags.ParseMeta = true fs.BoolVar(&flags.IgnoreDateError, "ignoreBadDates", false, "ignore malformed dates while indexing") fs.BoolVar(&flags.IgnoreMetaError, "ignoreMetaError", false, "ignore errors while parsing general YAML header info") - fs.BoolVar(&flags.ParseMeta, "parseMeta", true, "parse YAML header values other title, authors, date, tags") - fs.BoolVar(&flags.ParseLinks, "parseLinks", true, "parse file contents for links") + fs.BoolFunc("ignoreMeta", "don't parse YAML header values other title, authors, date, tags", func(s string) error { + flags.ParseMeta = false + return nil + }) + fs.BoolFunc("ignoreLinks", "don't parse file contents for links", func(s string) error { + flags.ParseLinks = false + return nil + }) + fs.BoolVar(&flags.IgnoreHidden, "ignoreHidden", false, "ignore hidden files while crawling") fs.Usage = func() { f := fs.Output() @@ -84,7 +93,7 @@ func runIndex(gFlags GlobalFlags, iFlags IndexFlags, db *data.Query) byte { ) } - traversedFiles := idx.Traverse(gFlags.NumWorkers) + traversedFiles := idx.Traverse(gFlags.NumWorkers, iFlags.IgnoreHidden) fmt.Print("Crawled ", len(traversedFiles)) filteredFiles := idx.Filter(traversedFiles, gFlags.NumWorkers) diff --git a/pkg/index/index.go b/pkg/index/index.go index 4db1eb4..cfa4138 100644 --- a/pkg/index/index.go +++ b/pkg/index/index.go @@ -7,6 +7,7 @@ import ( "io" "log/slog" "os" + "path" "regexp" "slices" "strings" @@ -38,6 +39,7 @@ type ParseOpts struct { ParseLinks bool IgnoreDateError bool IgnoreMetaError bool + IgnoreHidden bool } type InfoPath struct { @@ -195,16 +197,20 @@ func (doc Document) Equal(other Document) bool { return true } -func visit(file InfoPath, visitQueue chan<- InfoPath, filterQueue chan<- InfoPath, wg *sync.WaitGroup) { +func visit(file InfoPath, visitQueue chan<- InfoPath, filterQueue chan<- InfoPath, ignoreHidden bool, wg *sync.WaitGroup) { // TODO: check if symlink, and handle appropriately // TODO: extract error out of function + if ignoreHidden && path.Base(file.Path)[0] == '.' { + wg.Done() + return + } + if file.Info.IsDir() { entries, err := os.ReadDir(file.Path) if err != nil { panic(err) } - wg.Add(len(entries)) for _, entry := range entries { entryInfo, err := entry.Info() @@ -223,13 +229,13 @@ func visit(file InfoPath, visitQueue chan<- InfoPath, filterQueue chan<- InfoPat wg.Done() } -func workerTraverse(wg *sync.WaitGroup, visitQueue chan InfoPath, filterQueue chan<- InfoPath) { +func workerTraverse(wg *sync.WaitGroup, ignoreHidden bool, visitQueue chan InfoPath, filterQueue chan<- InfoPath) { for work := range visitQueue { - visit(work, visitQueue, filterQueue, wg) + visit(work, visitQueue, filterQueue, ignoreHidden, wg) } } -func (idx Index) Traverse(numWorkers uint) []string { +func (idx Index) Traverse(numWorkers uint, ignoreHidden bool) []string { if numWorkers <= 1 { panic(fmt.Sprint("Invalid number of workers: ", numWorkers)) } @@ -247,7 +253,7 @@ func (idx Index) Traverse(numWorkers uint) []string { // start workers for range numWorkers { - go workerTraverse(activeJobs, jobs, filterQueue) + go workerTraverse(activeJobs, ignoreHidden, jobs, filterQueue) } // init send diff --git a/pkg/index/index_test.go b/pkg/index/index_test.go index 69600c2..4242ce1 100644 --- a/pkg/index/index_test.go +++ b/pkg/index/index_test.go @@ -80,7 +80,7 @@ func TestIndex_Traverse(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { idx := tt.indexCase(t) - got := idx.Traverse(tt.numWorkers) + got := idx.Traverse(tt.numWorkers, true) slices.Sort(got) slices.Sort(tt.want) -- cgit v1.2.3