aboutsummaryrefslogtreecommitdiffstats
path: root/debug_shell
diff options
context:
space:
mode:
authorJP Appel <jeanpierre.appel01@gmail.com>2025-06-09 16:09:56 -0400
committerJP Appel <jeanpierre.appel01@gmail.com>2025-06-09 16:09:56 -0400
commitc3d812262761c140316f5501783f8302191e5053 (patch)
tree5c70e8c340ef24275740f2bfe7b7e21392c1a56b /debug_shell
parentb09bbdae45e0c3a3b3f00c6a677dd524464ddf67 (diff)
Add debug shell
Diffstat (limited to 'debug_shell')
-rw-r--r--debug_shell/debug_shell.go11
-rw-r--r--debug_shell/interpreter.go334
-rw-r--r--debug_shell/state.go142
3 files changed, 487 insertions, 0 deletions
diff --git a/debug_shell/debug_shell.go b/debug_shell/debug_shell.go
new file mode 100644
index 0000000..2ddf467
--- /dev/null
+++ b/debug_shell/debug_shell.go
@@ -0,0 +1,11 @@
+package main
+
+import "os"
+
+func main() {
+ // TODO: command line args
+ state := make(State)
+ interpreter := NewInterpreter(state, os.Stdin)
+
+ interpreter.Run()
+}
diff --git a/debug_shell/interpreter.go b/debug_shell/interpreter.go
new file mode 100644
index 0000000..d63f1c2
--- /dev/null
+++ b/debug_shell/interpreter.go
@@ -0,0 +1,334 @@
+package main
+
+import (
+ "bufio"
+ "errors"
+ "fmt"
+ "io"
+ "os"
+ "os/signal"
+ "slices"
+ "strings"
+ "syscall"
+
+ "github.com/jpappel/atlas/pkg/query"
+)
+
+var commands = []string{
+ "help",
+ "let",
+ "del",
+ "print",
+ "tokenize",
+ "rematch",
+ "parse",
+ "repattern",
+}
+
+type Interpreter struct {
+ State State
+ Scanner *bufio.Scanner
+}
+
+type ITokType int
+
+const (
+ ITOK_INVALID ITokType = iota
+
+ ITOK_VAR_NAME
+
+ // values
+ ITOK_VAL_INT
+ ITOK_VAL_STR
+ ITOK_VAL_TOKENS
+ ITOK_VAL_CLAUSE
+
+ // commands
+ ITOK_CMD_HELP
+ ITOK_CMD_LET
+ ITOK_CMD_DEL
+ ITOK_CMD_PRINT
+ ITOK_CMD_LEN
+ ITOK_CMD_SLICE
+ ITOK_CMD_REMATCH
+ ITOK_CMD_REPATTERN
+ ITOK_CMD_TOKENIZE
+ ITOK_CMD_PARSE
+)
+
+type IToken struct {
+ Type ITokType
+ Text string
+}
+
+func NewInterpreter(initialState State, inputSource io.Reader) *Interpreter {
+ return &Interpreter{
+ initialState,
+ bufio.NewScanner(inputSource),
+ }
+}
+
+func (interpreter *Interpreter) Reset() {
+ interpreter.State = make(State)
+}
+
+func (interpreter *Interpreter) Eval(tokens []IToken) (bool, error) {
+ if len(tokens) == 0 {
+ return false, nil
+ }
+
+ if slices.ContainsFunc(tokens, func(token IToken) bool {
+ return token.Type == ITOK_INVALID
+ }) {
+ b := strings.Builder{}
+ b.WriteString("Unexpected token(s) at ")
+ for i, t := range tokens {
+ if t.Type == ITOK_INVALID {
+ b.WriteString(fmt.Sprint(i, ", "))
+ }
+ }
+ return false, errors.New(b.String())
+ }
+
+ var variableName string
+ var carryValue Value
+ var ok bool
+ for i := len(tokens) - 1; i >= 0; i-- {
+ t := tokens[i]
+ switch t.Type {
+ case ITOK_CMD_HELP:
+ printHelp()
+ break
+ case ITOK_CMD_LET:
+ interpreter.State[variableName] = carryValue
+ carryValue.Type = INVALID
+ break
+ case ITOK_CMD_DEL:
+ if len(tokens) == 1 {
+ fmt.Println("Deleting all variables")
+ interpreter.State = make(State)
+ } else {
+ // HACK: variable name is not evaluated correctly so just look at the next token
+ delete(interpreter.State, tokens[i+1].Text)
+ }
+ carryValue.Type = INVALID
+ break
+ case ITOK_CMD_PRINT:
+ if len(tokens) == 1 {
+ fmt.Println("Variables:")
+ fmt.Println(interpreter.State)
+ } else {
+ carryValue, ok = interpreter.State[tokens[1].Text]
+ if !ok {
+ return false, errors.New("No variable found with name " + tokens[1].Text)
+ }
+ }
+ case ITOK_CMD_REMATCH:
+ if carryValue.Type != STRING {
+ return false, errors.New("Unable to match against argument")
+ }
+
+ body, ok := carryValue.Val.(string)
+ if !ok {
+ return true, errors.New("Type corruption during rematch, expected string")
+ }
+
+ b := strings.Builder{}
+ matchGroupNames := query.LexRegex.SubexpNames()
+ for _, match := range query.LexRegex.FindAllStringSubmatch(body, -1) {
+ for i, part := range match {
+ b.WriteString(matchGroupNames[i])
+ fmt.Fprintf(&b, "[%d]", len(part))
+ b.WriteByte(':')
+ b.WriteString(part)
+ b.WriteByte('\n')
+ }
+ b.WriteByte('\n')
+ }
+ carryValue.Val = b.String()
+ case ITOK_CMD_REPATTERN:
+ fmt.Println(query.LexRegexPattern)
+ break
+ case ITOK_CMD_TOKENIZE:
+ if carryValue.Type != STRING {
+ return false, errors.New("Unable to tokenize argument")
+ }
+
+ rawQuery, ok := carryValue.Val.(string)
+ if !ok {
+ return true, errors.New("Type corruption during tokenize, expected string")
+ }
+ carryValue.Type = TOKENS
+ carryValue.Val = query.Lex(rawQuery)
+ case ITOK_CMD_PARSE:
+ if carryValue.Type != TOKENS {
+ fmt.Println("Carry type: ", carryValue.Type)
+ return false, errors.New("Unable to parse argument")
+ }
+
+ queryTokens, ok := carryValue.Val.([]query.Token)
+ if !ok {
+ return true, errors.New("Type corruption during parse, expected []query.Tokens")
+ }
+
+ clause, err := query.Parse(queryTokens)
+ if err != nil {
+ return false, err
+ }
+ carryValue.Type = CLAUSE
+ carryValue.Val = clause
+ case ITOK_VAR_NAME:
+ // NOTE: very brittle, only allows expansion of a single variable
+ if i == len(tokens)-1 {
+ carryValue, ok = interpreter.State[t.Text]
+ if !ok {
+ return false, errors.New("No variable: " + t.Text)
+ }
+ } else {
+ variableName = t.Text
+ }
+ case ITOK_VAL_STR:
+ carryValue.Type = STRING
+ carryValue.Val = t.Text
+ case ITOK_CMD_LEN:
+ fmt.Println("not implemented yet ;)")
+ break
+ case ITOK_CMD_SLICE:
+ fmt.Println("not implemented yet ;)")
+ break
+ }
+ }
+
+ if carryValue.Type != INVALID {
+ fmt.Println(carryValue)
+ }
+
+ return false, nil
+}
+
+func (interpreter Interpreter) Tokenize(line string) []IToken {
+ var prevType ITokType
+ tokens := make([]IToken, 0, 3)
+ for word := range strings.SplitSeq(line, " ") {
+ trimmedWord := strings.TrimSpace(word)
+ if trimmedWord == "" {
+ continue
+ }
+
+ if len(tokens) != 0 {
+ prevType = tokens[len(tokens)-1].Type
+ }
+
+ if trimmedWord == "help" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_HELP})
+ } else if trimmedWord == "let" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_LET})
+ } else if trimmedWord == "del" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_DEL})
+ } else if trimmedWord == "print" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_PRINT})
+ } else if trimmedWord == "len" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_LEN})
+ } else if trimmedWord == "slice" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_SLICE})
+ } else if trimmedWord == "rematch" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_REMATCH})
+ } else if trimmedWord == "repattern" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_REPATTERN})
+ } else if trimmedWord == "tokenize" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_TOKENIZE})
+ } else if trimmedWord == "parse" {
+ tokens = append(tokens, IToken{Type: ITOK_CMD_PARSE})
+ } else if prevType == ITOK_CMD_LET {
+ tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord})
+ } else if prevType == ITOK_CMD_DEL {
+ tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord})
+ } else if prevType == ITOK_CMD_PRINT {
+ tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord})
+ } else if prevType == ITOK_CMD_LEN || prevType == ITOK_CMD_SLICE {
+ if trimmedWord[0] == '`' {
+ _, strLiteral, _ := strings.Cut(word, "`")
+ tokens = append(tokens, IToken{ITOK_VAL_STR, strLiteral})
+ } else {
+ tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord})
+ }
+ } else if prevType == ITOK_CMD_REMATCH || prevType == ITOK_CMD_TOKENIZE {
+ if trimmedWord[0] == '`' {
+ _, strLiteral, _ := strings.Cut(word, "`")
+ tokens = append(tokens, IToken{ITOK_VAL_STR, strLiteral})
+ } else {
+ tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord})
+ }
+ } else if prevType == ITOK_CMD_PARSE {
+ tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord})
+ } else if prevType == ITOK_VAR_NAME && trimmedWord[0] == '`' {
+ _, strLiteral, _ := strings.Cut(word, "`")
+ tokens = append(tokens, IToken{ITOK_VAL_STR, strLiteral})
+ } else if prevType == ITOK_VAL_STR {
+ tokens[len(tokens)-1].Text += " " + word
+ } else {
+ tokens = append(tokens, IToken{ITOK_INVALID, trimmedWord})
+ }
+ }
+
+ return tokens
+}
+
+func (interpreter Interpreter) Run() error {
+ signalCh := make(chan os.Signal, 1)
+ exitCh := make(chan error, 1)
+ lineCh := make(chan string)
+ defer close(signalCh)
+ defer close(lineCh)
+ defer close(exitCh)
+
+ signal.Notify(signalCh, syscall.SIGINT)
+ go func(output chan<- string, exitCh chan<- error) {
+ for {
+ if interpreter.Scanner.Scan() {
+ output <- interpreter.Scanner.Text()
+ } else if err := interpreter.Scanner.Err(); err != nil {
+ exitCh <- err
+ return
+ } else {
+ exitCh <- io.EOF
+ return
+ }
+ }
+ }(lineCh, exitCh)
+
+ for {
+ fmt.Print("> ")
+
+ select {
+ case <-signalCh:
+ // TODO: log info to output
+ return nil
+ case err := <-exitCh:
+ return err
+ case line := <-lineCh:
+ tokens := interpreter.Tokenize(line)
+ fatal, err := interpreter.Eval(tokens)
+ if fatal {
+ return err
+ } else if err != nil {
+ fmt.Println(err)
+ }
+ }
+ }
+}
+
+func printHelp() {
+ fmt.Println("Commands: help, let, del, print, tokenize, parse")
+ fmt.Println("help - print this help")
+ fmt.Println("let name (string|tokens|clause) - save value to a variable")
+ fmt.Println("del [name] - delete a variable or all variables")
+ fmt.Println("print [name] - print a variable or all variables")
+ fmt.Println("slice (string|tokens|name) start stop - slice a string or tokens from start to stop")
+ fmt.Println("rematch (string|name) - match against regex for querylang spec")
+ fmt.Println("repattern - print regex for querylang")
+ fmt.Println("tokenize (string|name) - tokenize a string")
+ fmt.Println(" ex. tokenize `author:me")
+ fmt.Println("parse (tokens|name) - parse tokens into a clause")
+ fmt.Println("\nBare `tokenize` and `parse` assign to an implicit variable _")
+}
diff --git a/debug_shell/state.go b/debug_shell/state.go
new file mode 100644
index 0000000..17d0474
--- /dev/null
+++ b/debug_shell/state.go
@@ -0,0 +1,142 @@
+package main
+
+import (
+ "errors"
+ "fmt"
+ "os"
+ "strings"
+
+ "github.com/jpappel/atlas/pkg/query"
+)
+
+type ValueType int
+
+const (
+ INVALID ValueType = iota
+ STRING
+ TOKENS
+ CLAUSE
+)
+
+type Value struct {
+ Type ValueType
+ Val any
+}
+
+type State map[string]Value
+
+func (v Value) String() string {
+ switch v.Type {
+ case STRING:
+ s, ok := v.Val.(string)
+ if !ok {
+ return "Corrupted Type (expected string)"
+ }
+ return s
+ case TOKENS:
+ ts, ok := v.Val.([]query.Token)
+ if !ok {
+ return "Corrupted Type (expected []query.Token)"
+ }
+ return query.TokensStringify(ts)
+ case CLAUSE:
+ rootClause, ok := v.Val.(*query.Clause)
+ if !ok {
+ return "Corrupted Type (expected query.Clause)"
+ }
+ return rootClause.String()
+ case INVALID:
+ return "Invalid"
+ }
+ return fmt.Sprintf("Unknown @ %p", v.Val)
+}
+
+func (s State) String() string {
+ b := strings.Builder{}
+
+ for k, v := range s {
+ b.WriteString(k)
+ b.WriteByte(':')
+ switch v.Type {
+ case INVALID:
+ b.WriteString(" Invalid")
+ case STRING:
+ b.WriteString(" String")
+ case TOKENS:
+ b.WriteString(" Tokens")
+ case CLAUSE:
+ b.WriteString(" Clause")
+ default:
+ fmt.Fprintf(&b, " Unknown (%d)", v.Val)
+ }
+ b.WriteByte('\n')
+ }
+
+ return b.String()
+}
+
+func (s State) CmdTokenize(input string) (Value, bool) {
+ if len(input) == 0 {
+ return Value{}, false
+ }
+
+ var rawQuery string
+ if input[0] == '`' {
+ rawQuery = input[1:]
+ } else {
+ variable, ok := s[input]
+ if !ok {
+ fmt.Fprintln(os.Stderr, "Cannot tokenize: no variable with name", input)
+ return Value{}, false
+ } else if variable.Type != STRING {
+ fmt.Fprintln(os.Stderr, "Cannot tokenize: variable is not a string")
+ return Value{}, false
+ }
+
+ rawQuery, ok = variable.Val.(string)
+ if !ok {
+ fmt.Fprintln(os.Stderr, "Cannot tokenize: type corruption")
+ fmt.Fprintln(os.Stderr, "Type corruption, expected string")
+ panic("Type corruption")
+ }
+ }
+ tokens := query.Lex(rawQuery)
+ return Value{TOKENS, tokens}, true
+}
+
+func (s State) CmdParse(args string) (Value, error) {
+ if len(args) == 0 {
+ return Value{}, errors.New("no arguments for parse")
+ }
+
+ var tokens []query.Token
+ if tokenizeArgs, found := strings.CutPrefix(args, "tokenize "); found {
+ val, ok := s.CmdTokenize(tokenizeArgs)
+ if !ok {
+ return Value{}, errors.New("error occured during tokenization")
+ }
+ tokens = val.Val.([]query.Token)
+ } else {
+ variable, ok := s[args]
+ if !ok {
+ fmt.Fprintln(os.Stderr, "Cannot parse: no variable with name", args)
+ return Value{}, errors.New("variable does not exist")
+ } else if variable.Type != TOKENS {
+ fmt.Fprintln(os.Stderr, "Cannot parse: variable is not []query.Tokens")
+ return Value{}, errors.New("bad variable type")
+ }
+
+ tokens, ok = variable.Val.([]query.Token)
+ if !ok {
+ fmt.Fprintln(os.Stderr, "Cannot parse: type corruption")
+ fmt.Fprintln(os.Stderr, "Type corruption, expected []query.Tokens")
+ panic("Type corruption")
+ }
+ }
+
+ clause, err := query.Parse(tokens)
+ if err != nil {
+ return Value{}, err
+ }
+ return Value{CLAUSE, *clause}, err
+}