From c89447a77e816447e84ceeeb653053f8450d9360 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Fri, 13 Jun 2025 00:14:00 -0400 Subject: Add regex based filters and helper functions --- pkg/index/filters.go | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 86 insertions(+), 2 deletions(-) (limited to 'pkg/index') diff --git a/pkg/index/filters.go b/pkg/index/filters.go index 3a22910..44b12cf 100644 --- a/pkg/index/filters.go +++ b/pkg/index/filters.go @@ -5,7 +5,9 @@ import ( "io" "os" "path/filepath" + "regexp" "slices" + "strconv" "strings" ) @@ -16,6 +18,62 @@ type DocFilter struct { Filter func(infoPath, io.ReadSeeker) bool } +const FilterHelp string = ` +YAMLHeader - reject files without YAML header +Ext,Extension_ - accept files ending with +MaxSize,MaxFilesize_ - accept files of at most bytes +ExcludeName,ExcludeFilename_,..., - reject files with names in list +IncludeName,IncludeFilename_,..., - accept files with names in list +ExcludeParent_ - reject files if is a parent directory +IncludeRegex_ - accept files whose path matches +ExcludeRegex_ - reject files whose path matches ` + +func ParseFilter(s string) (DocFilter, error) { + name, param, found := strings.Cut(s, "_") + + // paramless filters + if name == "YAMLHeader" { + return YamlHeaderFilter, nil + } + + if !found { + return DocFilter{}, fmt.Errorf("Expected parameter with filter %s", name) + } + + switch name { + case "Ext", "Extension": + return NewExtensionFilter(param), nil + case "MaxSize", "MaxFilesize": + size, err := strconv.ParseInt(param, 10, 64) + if err != nil { + return DocFilter{}, err + } + return NewMaxFilesizeFilter(size), nil + case "ExcludeName", "ExcludeFilename": + // FIXME: support escaped commas + return NewExcludeFilenameFilter(strings.Split(param, ",")), nil + case "IncludeName", "IncludeFilename": + // FIXME: support escaped commas + return NewIncludeFilenameFilter(strings.Split(param, ",")), nil + case "ExcludeParent": + return NewExcludeParentFilter(param), nil + case "IncludeRegex": + filter, err := NewIncludeRegexFilter(param) + if err != nil { + return DocFilter{}, err + } + return filter, nil + case "ExcludeRegex": + filter, err := NewIncludeRegexFilter(param) + if err != nil { + return DocFilter{}, err + } + return filter, nil + default: + return DocFilter{}, fmt.Errorf("Unrecognized filter %s, see FILTERS", s) + } +} + func NewExtensionFilter(ext string) DocFilter { return DocFilter{ ext + " Filter", @@ -57,14 +115,40 @@ func NewIncludeFilenameFilter(included []string) DocFilter { // exclude files if it has a parent directory badParent func NewExcludeParentFilter(badParent string) DocFilter { return DocFilter{ - "Excluded Parent Directory filter", + "Excluded Parent Directory filter: " + badParent, func(ip infoPath, _ io.ReadSeeker) bool { - return !slices.Contains(strings.Split(ip.path, string(os.PathSeparator)), badParent) }, } } +func NewIncludeRegexFilter(pattern string) (DocFilter, error) { + re, err := regexp.Compile(pattern) + if err != nil { + return DocFilter{}, fmt.Errorf("Cannot compile regex: %v", err) + } + + return DocFilter{ + "Included Regex Filter: " + pattern, + func(ip infoPath, _ io.ReadSeeker) bool { + return re.MatchString(ip.path) + }, + }, nil +} +func NewExcludeRegexFilter(pattern string) (DocFilter, error) { + re, err := regexp.Compile(pattern) + if err != nil { + return DocFilter{}, fmt.Errorf("Cannot compile regex: %v", err) + } + + return DocFilter{ + "Excluded Regex Filter: " + pattern, + func(ip infoPath, _ io.ReadSeeker) bool { + return !re.MatchString(ip.path) + }, + }, nil +} + var YamlHeaderFilter = DocFilter{ "YAML Header Filter", yamlHeaderFilterFunc, -- cgit v1.2.3