aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/index/filters.go
blob: 7766b4ce52a1727911bc2eaf347834d436628123 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
package index

import (
	"fmt"
	"io"
	"path/filepath"
)

// NOTE: in the future it would be interesting lua filters

type DocFilter struct {
	Name   string
	Filter func(infoPath, io.ReadSeeker) bool
}

func NewExtensionFilter(ext string) DocFilter {
	return DocFilter{
		ext + " Filter",
		func(ip infoPath, _ io.ReadSeeker) bool {
			return filepath.Ext(ip.path) == ext
		},
	}
}

func NewMaxFilesizeFilter(size int64) DocFilter {
	return DocFilter{
		fmt.Sprintf("Max Size Filter %d", size),
		func(ip infoPath, _ io.ReadSeeker) bool {
			return ip.info.Size() <= size
		},
	}
}

func NewExcludeFilenameFilter(excluded []string) DocFilter {
	excludedSet := make(map[string]bool, len(excluded))
	for _, filename := range excluded {
		excludedSet[filename] = true
	}
	return DocFilter{
		"Excluded Filename filter",
		func(ip infoPath, _ io.ReadSeeker) bool {
			_, ok := excludedSet[filepath.Base(ip.path)]
			return !ok
		},
	}
}

func NewIncludeFilenameFilter(included []string) DocFilter {
	includedSet := make(map[string]bool, len(included))
	for _, filename := range included {
		includedSet[filename] = true
	}
	return DocFilter{
		"Included Filename filter",
		func(ip infoPath, _ io.ReadSeeker) bool {
			_, ok := includedSet[filepath.Base(ip.path)]
			return ok
		},
	}
}

var YamlHeaderFilter = DocFilter{
	"YAML Header Filter",
	yamlHeaderFilterFunc,
}

func yamlHeaderFilterFunc(_ infoPath, r io.ReadSeeker) bool {
	const bufSize = 4096
	buf := make([]byte, bufSize)

	carry := make([]byte, 4)
	cmp := make([]byte, 4)
	n, err := r.Read(carry)
	if err != nil || n < 4 || string(carry) != "---\n" {
		return false
	}

	headerFound := false
	readMore := true
	for readMore {
		buf = buf[:bufSize]
		n, err := r.Read(buf)
		if err == io.EOF {
			readMore = false
		} else if err != nil {
			return false
		}
		buf = buf[:n]

		// PERF: the carry doesn't need to be checked on the first loop iteration
		for i := range min(4, n) {
			b := carry[i]
			for j := range 4 {
				if i+j < 4 {
					cmp[j] = carry[i+j]
				} else {
					cmp[j] = buf[(i+j)%4]
				}
			}
			if b == '\n' && string(cmp) == "\n---\n" {
				headerFound = true
				readMore = false
				break
			}
		}
		for i := range n - 4 {
			b := buf[i]
			if b == '\n' && string(buf[i:i+5]) == "\n---\n" {
				headerFound = true
				readMore = false
				break
			}
		}

		if readMore {
			for i := range 4 {
				carry[i] = buf[n-4+i]
			}
		}
	}

	return headerFound
}

func DefaultFilters() []DocFilter {
	return []DocFilter{NewExtensionFilter(".md"), NewMaxFilesizeFilter(200 * 1024), YamlHeaderFilter}
}