package index import ( "fmt" "io" "os" "path/filepath" "regexp" "slices" "strconv" "strings" ) type DocFilter struct { Name string Filter func(InfoPath, io.ReadSeeker) bool } const FilterHelp string = ` YAMLHeader - reject files without YAML header Ext,Extension_ - accept files ending with MaxSize,MaxFilesize_ - accept files of at most bytes ExcludeName,ExcludeFilename_,..., - reject files with names in list IncludeName,IncludeFilename_,..., - accept files with names in list ExcludeParent_ - reject files if is a parent directory IncludeRegex_ - accept files whose path matches ExcludeRegex_ - reject files whose path matches ` func ParseFilter(s string) (DocFilter, error) { name, param, found := strings.Cut(s, "_") // paramless filters if name == "YAMLHeader" { return YamlHeaderFilter, nil } if !found { return DocFilter{}, fmt.Errorf("Expected parameter with filter %s", name) } switch name { case "Ext", "Extension": return NewExtensionFilter(param), nil case "MaxSize", "MaxFilesize": size, err := strconv.ParseInt(param, 10, 64) if err != nil { return DocFilter{}, err } return NewMaxFilesizeFilter(size), nil case "ExcludeName", "ExcludeFilename": return NewExcludeFilenameFilter(strings.Split(param, ",")), nil case "IncludeName", "IncludeFilename": return NewIncludeFilenameFilter(strings.Split(param, ",")), nil case "ExcludeParent": return NewExcludeParentFilter(param), nil case "IncludeRegex": filter, err := NewIncludeRegexFilter(param) if err != nil { return DocFilter{}, err } return filter, nil case "ExcludeRegex": filter, err := NewIncludeRegexFilter(param) if err != nil { return DocFilter{}, err } return filter, nil default: return DocFilter{}, fmt.Errorf("Unrecognized filter %s, see FILTERS", s) } } func NewExtensionFilter(ext string) DocFilter { return DocFilter{ ext + " Filter", func(ip InfoPath, _ io.ReadSeeker) bool { return filepath.Ext(ip.Path) == ext }, } } func NewMaxFilesizeFilter(size int64) DocFilter { return DocFilter{ fmt.Sprintf("Max Size Filter %d", size), func(ip InfoPath, _ io.ReadSeeker) bool { return ip.Info.Size() <= size }, } } func NewExcludeFilenameFilter(excluded []string) DocFilter { return DocFilter{ "Excluded Filename filter", func(ip InfoPath, _ io.ReadSeeker) bool { filename := filepath.Base(ip.Path) return !slices.Contains(excluded, filename) }, } } func NewIncludeFilenameFilter(included []string) DocFilter { return DocFilter{ "Included Filename filter", func(ip InfoPath, _ io.ReadSeeker) bool { filename := filepath.Base(ip.Path) return slices.Contains(included, filename) }, } } // exclude files if it has a parent directory badParent func NewExcludeParentFilter(badParent string) DocFilter { return DocFilter{ "Excluded Parent Directory filter: " + badParent, func(ip InfoPath, _ io.ReadSeeker) bool { return !slices.Contains(strings.Split(ip.Path, string(os.PathSeparator)), badParent) }, } } func NewIncludeRegexFilter(pattern string) (DocFilter, error) { re, err := regexp.Compile(pattern) if err != nil { return DocFilter{}, fmt.Errorf("Cannot compile regex: %v", err) } return DocFilter{ "Included Regex Filter: " + pattern, func(ip InfoPath, _ io.ReadSeeker) bool { return re.MatchString(ip.Path) }, }, nil } func NewExcludeRegexFilter(pattern string) (DocFilter, error) { re, err := regexp.Compile(pattern) if err != nil { return DocFilter{}, fmt.Errorf("Cannot compile regex: %v", err) } return DocFilter{ "Excluded Regex Filter: " + pattern, func(ip InfoPath, _ io.ReadSeeker) bool { return !re.MatchString(ip.Path) }, }, nil } var YamlHeaderFilter = DocFilter{ "YAML Header Filter", func(_ InfoPath, rs io.ReadSeeker) bool { return YamlHeaderPos(rs) > 0 }, } // Position of the end of a yaml header, negative func YamlHeaderPos(r io.ReadSeeker) int64 { const bufSize = 4096 buf := make([]byte, bufSize) carry := make([]byte, 4) cmp := make([]byte, 4) n, err := r.Read(carry) if err != nil || n < 4 || string(carry) != "---\n" { return -1 } pos := int64(3) headerFound := false readMore := true for readMore { buf = buf[:bufSize] n, err := r.Read(buf) if err == io.EOF { readMore = false } else if err != nil { return -1 } buf = buf[:n] // PERF: the carry doesn't need to be checked on the first loop iteration for i := range min(4, n) { pos++ b := carry[i] for j := range 4 { if i+j < 4 { cmp[j] = carry[i+j] } else { cmp[j] = buf[(i+j)%4] } } if b == '\n' && string(cmp) == "\n---\n" { headerFound = true readMore = false break } } for i := range n - 4 { pos++ b := buf[i] if b == '\n' && string(buf[i:i+5]) == "\n---\n" { headerFound = true readMore = false break } } if readMore { for i := range 4 { carry[i] = buf[n-4+i] } } } if headerFound { return pos } else { return -1 } } func DefaultFilters() []DocFilter { return []DocFilter{NewExtensionFilter(".md"), NewMaxFilesizeFilter(200 * 1024), NewExcludeParentFilter("templates"), YamlHeaderFilter} }