aboutsummaryrefslogtreecommitdiffstats
path: root/pkg/index/filters.go
blob: f59a5d6656bc3bb3d629b3a4f3167cd77fb59943 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
package index

import (
	"io"
	"path/filepath"
)

// TODO: create excluded path filter factory

type DocFilter func(infoPath, io.ReadSeeker) bool

func NewExtensionFilter(ext string) DocFilter {
	return func(ip infoPath, _ io.ReadSeeker) bool {
		return filepath.Ext(ip.path) == ext
	}
}

func NewMaxFilesizeFilter(size int64) DocFilter {
	return func(ip infoPath, _ io.ReadSeeker) bool {
		return ip.info.Size() <= size
	}
}

func YamlHeaderFilter(_ infoPath, r io.ReadSeeker) bool {
	const bufSize = 4096
	buf := make([]byte, bufSize)

	carry := make([]byte, 4)
	cmp := make([]byte, 4)
	n, err := r.Read(carry)
	if err != nil || n < 4 || string(carry) != "---\n" {
		return false
	}

	headerFound := false
	readMore := true
	for readMore {
		buf = buf[:bufSize]
		n, err := r.Read(buf)
		if err == io.EOF {
			readMore = false
		} else if err != nil {
			return false
		}
		buf = buf[:n]

		// PERF: the carry doesn't need to be checked on the first loop iteration
		for i := range min(4, n) {
			b := carry[i]
			for j := range 4 {
				if i+j < 4 {
					cmp[j] = carry[i+j]
				} else {
					cmp[j] = buf[(i+j)%4]
				}
			}
			if b == '\n' && string(cmp) == "\n---\n" {
				headerFound = true
				readMore = false
				break
			}
		}
		for i := range n - 4 {
			b := buf[i]
			if b == '\n' && string(buf[i:i+5]) == "\n---\n" {
				headerFound = true
				readMore = false
				break
			}
		}

		if readMore {
			for i := range 4 {
				carry[i] = buf[n-4+i]
			}
		}
	}

	return headerFound
}

func DefaultFilters() []DocFilter {
	return []DocFilter{NewExtensionFilter(".md"), NewMaxFilesizeFilter(200 * 1024)}
}