package query import ( "fmt" "regexp" "slices" "strings" ) type queryTokenType int var LexRegex *regexp.Regexp var LexRegexPattern string const ( TOK_UNKNOWN queryTokenType = iota // clause tokens TOK_CLAUSE_OR // clause or TOK_CLAUSE_AND // clause and TOK_CLAUSE_START TOK_CLAUSE_END // statement tokens TOK_OP_NEG // negation TOK_OP_EQ // equal TOK_OP_AP // approximate/fuzzy TOK_OP_NE // not equal TOK_OP_LT // less than TOK_OP_LE // less than or equal TOK_OP_GE // greater than or equal TOK_OP_GT // greaterthan TOK_OP_PIPE // external pipe TOK_OP_ARG // external arg // categories TOK_CAT_TITLE TOK_CAT_AUTHOR TOK_CAT_DATE TOK_CAT_FILETIME TOK_CAT_TAGS TOK_CAT_LINKS TOK_CAT_META // values TOK_VAL_STR TOK_VAL_DATETIME ) type Token struct { Type queryTokenType Value string } func (tokType queryTokenType) String() string { switch tokType { case TOK_UNKNOWN: return "Unknown" case TOK_CLAUSE_OR: return "Or" case TOK_CLAUSE_AND: return "And" case TOK_CLAUSE_START: return "Start Clause" case TOK_CLAUSE_END: return "End Clause" case TOK_OP_NEG: return "Negation" case TOK_OP_EQ: return "Equal" case TOK_OP_AP: return "Approximate" case TOK_OP_NE: return "Not Equal" case TOK_OP_LT: return "Less Than" case TOK_OP_LE: return "Less Than or Equal" case TOK_OP_GE: return "Greater Than or Equal" case TOK_OP_GT: return "Greater Than" case TOK_OP_PIPE: return "Pipe External Command" case TOK_OP_ARG: return "Argument External Command" case TOK_CAT_TITLE: return "Title Category" case TOK_CAT_AUTHOR: return "Author Category" case TOK_CAT_DATE: return "Date Category" case TOK_CAT_FILETIME: return "Filetime Category" case TOK_CAT_TAGS: return "Tags Category" case TOK_CAT_LINKS: return "Links Category" case TOK_CAT_META: return "Metadata Category" case TOK_VAL_DATETIME: return "Datetime Value" case TOK_VAL_STR: return "String Value" default: return "Invalid" } } func (t Token) String() string { return fmt.Sprint(t.Type.String(), ": ", t.Value) } func (t Token) Equal(other Token) bool { if t.Type.isValue() { return t.Type == other.Type && t.Value == other.Value } return t.Type == other.Type } // if a token type is one of any func (tokType queryTokenType) Any(expected ...queryTokenType) bool { return slices.Contains(expected, tokType) } func (t queryTokenType) isClause() bool { return t == TOK_CLAUSE_OR || t == TOK_CLAUSE_AND || t == TOK_CLAUSE_START || t == TOK_CLAUSE_END } func (t queryTokenType) isCategory() bool { return t.Any(TOK_CAT_TITLE, TOK_CAT_AUTHOR, TOK_CAT_DATE, TOK_CAT_FILETIME, TOK_CAT_TAGS, TOK_CAT_LINKS, TOK_CAT_META) } func (t queryTokenType) isOperation() bool { return t.Any(TOK_OP_EQ, TOK_OP_AP, TOK_OP_NE, TOK_OP_LT, TOK_OP_LE, TOK_OP_GE, TOK_OP_GT, TOK_OP_PIPE, TOK_OP_ARG) } func (t queryTokenType) isValue() bool { return t == TOK_VAL_STR || t == TOK_VAL_DATETIME } func Lex(query string) []Token { const ( MATCH = iota CLAUSE_START CLAUSE_OPERATOR STATEMENT NEGATION CATEGORY OPERATOR VALUE UNKNOWN CLAUSE_END ) matches := LexRegex.FindAllStringSubmatch(query, -1) tokens := make([]Token, 0, 4*len(matches)) tokens = append(tokens, Token{Type: TOK_CLAUSE_START}) tokens = append(tokens, Token{TOK_CLAUSE_AND, "and"}) // default to and'ing all args clauseLevel := 1 for _, match := range matches { if match[CLAUSE_START] != "" { tokens = append(tokens, Token{Type: TOK_CLAUSE_START}) clauseLevel += 1 } if match[CLAUSE_OPERATOR] != "" { if len(tokens) == 0 || tokens[len(tokens)-1].Type != TOK_CLAUSE_START { tokens = append(tokens, Token{Type: TOK_CLAUSE_START}) clauseLevel += 1 } tokens = append(tokens, tokenizeClauseOperation(match[CLAUSE_OPERATOR])) } if t, ok := tokenizeNegation(match[NEGATION]); ok { tokens = append(tokens, t) } if match[CATEGORY] != "" { tokens = append(tokens, tokenizeCategory(match[CATEGORY])) } if match[OPERATOR] != "" { tokens = append(tokens, tokenizeOperation(match[OPERATOR])) } if match[VALUE] != "" { tokens = append(tokens, tokenizeValue(match[VALUE], tokens[len(tokens)-2].Type)) } if match[UNKNOWN] != "" { tokens = append(tokens, Token{Value: match[UNKNOWN]}) } if match[CLAUSE_END] != "" { tokens = append(tokens, Token{Type: TOK_CLAUSE_END}) clauseLevel -= 1 } } for range clauseLevel { tokens = append(tokens, Token{Type: TOK_CLAUSE_END}) } return tokens } func tokenizeClauseOperation(s string) Token { t := Token{Value: s} switch s { case "and", "AND": t.Type = TOK_CLAUSE_AND case "or", "OR": t.Type = TOK_CLAUSE_OR } return t } func tokenizeNegation(s string) (Token, bool) { t := Token{Value: s} if s == "-" { t.Type = TOK_OP_NEG } return t, len(s) > 0 } func tokenizeOperation(s string) Token { t := Token{Value: s} switch s { case "!=": t.Type = TOK_OP_NE case "!+": t.Type = TOK_OP_ARG case "<=": t.Type = TOK_OP_LE case ">=": t.Type = TOK_OP_GE case "=": t.Type = TOK_OP_EQ case ":", "~": t.Type = TOK_OP_AP case "<": t.Type = TOK_OP_LT case ">": t.Type = TOK_OP_GT case "!": t.Type = TOK_OP_PIPE } return t } func tokenizeCategory(s string) Token { t := Token{Value: s} switch s { case "T", "title": t.Type = TOK_CAT_TITLE case "a", "author": t.Type = TOK_CAT_AUTHOR case "d", "date": t.Type = TOK_CAT_DATE case "f", "filetime": t.Type = TOK_CAT_FILETIME case "t", "tags": t.Type = TOK_CAT_TAGS case "l", "links": t.Type = TOK_CAT_LINKS case "m", "meta": t.Type = TOK_CAT_META } return t } func tokenizeValue(s string, catType queryTokenType) Token { t := Token{} if len(s) >= 2 && s[0] == '"' && s[len(s)-1] == '"' { t.Value = s[1 : len(s)-1] } else { t.Value = s } switch catType { case TOK_CAT_DATE, TOK_CAT_FILETIME: t.Type = TOK_VAL_DATETIME case TOK_CAT_TITLE, TOK_CAT_AUTHOR, TOK_CAT_TAGS, TOK_CAT_LINKS, TOK_CAT_META: t.Type = TOK_VAL_STR } return t } func TokensStringify(tokens []Token) string { b := strings.Builder{} indentLvl := 0 writeToken := func(t Token) { b.WriteByte('`') b.WriteString(t.String()) b.WriteByte('`') } for i, token := range tokens { switch token.Type { case TOK_CLAUSE_START: writeIndent(&b, indentLvl) b.WriteByte('(') case TOK_CLAUSE_END: indentLvl -= 1 writeIndent(&b, indentLvl) b.WriteString(")\n") case TOK_CLAUSE_OR: b.WriteString("or\n") indentLvl += 1 case TOK_CLAUSE_AND: b.WriteString("and\n") indentLvl += 1 case TOK_CAT_TITLE, TOK_CAT_AUTHOR, TOK_CAT_DATE, TOK_CAT_FILETIME, TOK_CAT_TAGS, TOK_CAT_LINKS, TOK_CAT_META, TOK_OP_NEG: if i == 0 || tokens[i-1].Type != TOK_OP_NEG { writeIndent(&b, indentLvl) } writeToken(token) case TOK_VAL_STR, TOK_VAL_DATETIME, TOK_UNKNOWN: writeToken(token) b.WriteByte('\n') default: writeToken(token) } } return b.String() } func init() { negPattern := `(?-?)` categoryPattern := `(?T|a(?:uthor)?|d(?:ate)?|f(?:iletime)?|t(?:ags|itle)?|l(?:inks)?|m(?:eta)?)` opPattern := `(?!=|!+|<=|>=|=|:|~|<|>|!)` valPattern := `(?".*?"|\S*[^\s\)])` statementPattern := `(?` + negPattern + categoryPattern + opPattern + valPattern + `)` unknownPattern := `(?\S*".*?"[^\s)]*|\S*[^\s\)])` clauseOpPattern := `(?(?i)and|or)?` clauseStart := `(?\()?` clauseEnd := `(?\))?` clausePattern := clauseStart + `\s*` + clauseOpPattern + `\s*(?:` + statementPattern + `|` + unknownPattern + `)\s*` + clauseEnd + `\s*` LexRegexPattern = clausePattern // FIXME: fails to match start of clauses with no values // example: (and (or ... )) fails LexRegex = regexp.MustCompile(LexRegexPattern) }