From c3d812262761c140316f5501783f8302191e5053 Mon Sep 17 00:00:00 2001 From: JP Appel Date: Mon, 9 Jun 2025 16:09:56 -0400 Subject: Add debug shell --- debug_shell/debug_shell.go | 11 ++ debug_shell/interpreter.go | 334 +++++++++++++++++++++++++++++++++++++++++++++ debug_shell/state.go | 142 +++++++++++++++++++ 3 files changed, 487 insertions(+) create mode 100644 debug_shell/debug_shell.go create mode 100644 debug_shell/interpreter.go create mode 100644 debug_shell/state.go (limited to 'debug_shell') diff --git a/debug_shell/debug_shell.go b/debug_shell/debug_shell.go new file mode 100644 index 0000000..2ddf467 --- /dev/null +++ b/debug_shell/debug_shell.go @@ -0,0 +1,11 @@ +package main + +import "os" + +func main() { + // TODO: command line args + state := make(State) + interpreter := NewInterpreter(state, os.Stdin) + + interpreter.Run() +} diff --git a/debug_shell/interpreter.go b/debug_shell/interpreter.go new file mode 100644 index 0000000..d63f1c2 --- /dev/null +++ b/debug_shell/interpreter.go @@ -0,0 +1,334 @@ +package main + +import ( + "bufio" + "errors" + "fmt" + "io" + "os" + "os/signal" + "slices" + "strings" + "syscall" + + "github.com/jpappel/atlas/pkg/query" +) + +var commands = []string{ + "help", + "let", + "del", + "print", + "tokenize", + "rematch", + "parse", + "repattern", +} + +type Interpreter struct { + State State + Scanner *bufio.Scanner +} + +type ITokType int + +const ( + ITOK_INVALID ITokType = iota + + ITOK_VAR_NAME + + // values + ITOK_VAL_INT + ITOK_VAL_STR + ITOK_VAL_TOKENS + ITOK_VAL_CLAUSE + + // commands + ITOK_CMD_HELP + ITOK_CMD_LET + ITOK_CMD_DEL + ITOK_CMD_PRINT + ITOK_CMD_LEN + ITOK_CMD_SLICE + ITOK_CMD_REMATCH + ITOK_CMD_REPATTERN + ITOK_CMD_TOKENIZE + ITOK_CMD_PARSE +) + +type IToken struct { + Type ITokType + Text string +} + +func NewInterpreter(initialState State, inputSource io.Reader) *Interpreter { + return &Interpreter{ + initialState, + bufio.NewScanner(inputSource), + } +} + +func (interpreter *Interpreter) Reset() { + interpreter.State = make(State) +} + +func (interpreter *Interpreter) Eval(tokens []IToken) (bool, error) { + if len(tokens) == 0 { + return false, nil + } + + if slices.ContainsFunc(tokens, func(token IToken) bool { + return token.Type == ITOK_INVALID + }) { + b := strings.Builder{} + b.WriteString("Unexpected token(s) at ") + for i, t := range tokens { + if t.Type == ITOK_INVALID { + b.WriteString(fmt.Sprint(i, ", ")) + } + } + return false, errors.New(b.String()) + } + + var variableName string + var carryValue Value + var ok bool + for i := len(tokens) - 1; i >= 0; i-- { + t := tokens[i] + switch t.Type { + case ITOK_CMD_HELP: + printHelp() + break + case ITOK_CMD_LET: + interpreter.State[variableName] = carryValue + carryValue.Type = INVALID + break + case ITOK_CMD_DEL: + if len(tokens) == 1 { + fmt.Println("Deleting all variables") + interpreter.State = make(State) + } else { + // HACK: variable name is not evaluated correctly so just look at the next token + delete(interpreter.State, tokens[i+1].Text) + } + carryValue.Type = INVALID + break + case ITOK_CMD_PRINT: + if len(tokens) == 1 { + fmt.Println("Variables:") + fmt.Println(interpreter.State) + } else { + carryValue, ok = interpreter.State[tokens[1].Text] + if !ok { + return false, errors.New("No variable found with name " + tokens[1].Text) + } + } + case ITOK_CMD_REMATCH: + if carryValue.Type != STRING { + return false, errors.New("Unable to match against argument") + } + + body, ok := carryValue.Val.(string) + if !ok { + return true, errors.New("Type corruption during rematch, expected string") + } + + b := strings.Builder{} + matchGroupNames := query.LexRegex.SubexpNames() + for _, match := range query.LexRegex.FindAllStringSubmatch(body, -1) { + for i, part := range match { + b.WriteString(matchGroupNames[i]) + fmt.Fprintf(&b, "[%d]", len(part)) + b.WriteByte(':') + b.WriteString(part) + b.WriteByte('\n') + } + b.WriteByte('\n') + } + carryValue.Val = b.String() + case ITOK_CMD_REPATTERN: + fmt.Println(query.LexRegexPattern) + break + case ITOK_CMD_TOKENIZE: + if carryValue.Type != STRING { + return false, errors.New("Unable to tokenize argument") + } + + rawQuery, ok := carryValue.Val.(string) + if !ok { + return true, errors.New("Type corruption during tokenize, expected string") + } + carryValue.Type = TOKENS + carryValue.Val = query.Lex(rawQuery) + case ITOK_CMD_PARSE: + if carryValue.Type != TOKENS { + fmt.Println("Carry type: ", carryValue.Type) + return false, errors.New("Unable to parse argument") + } + + queryTokens, ok := carryValue.Val.([]query.Token) + if !ok { + return true, errors.New("Type corruption during parse, expected []query.Tokens") + } + + clause, err := query.Parse(queryTokens) + if err != nil { + return false, err + } + carryValue.Type = CLAUSE + carryValue.Val = clause + case ITOK_VAR_NAME: + // NOTE: very brittle, only allows expansion of a single variable + if i == len(tokens)-1 { + carryValue, ok = interpreter.State[t.Text] + if !ok { + return false, errors.New("No variable: " + t.Text) + } + } else { + variableName = t.Text + } + case ITOK_VAL_STR: + carryValue.Type = STRING + carryValue.Val = t.Text + case ITOK_CMD_LEN: + fmt.Println("not implemented yet ;)") + break + case ITOK_CMD_SLICE: + fmt.Println("not implemented yet ;)") + break + } + } + + if carryValue.Type != INVALID { + fmt.Println(carryValue) + } + + return false, nil +} + +func (interpreter Interpreter) Tokenize(line string) []IToken { + var prevType ITokType + tokens := make([]IToken, 0, 3) + for word := range strings.SplitSeq(line, " ") { + trimmedWord := strings.TrimSpace(word) + if trimmedWord == "" { + continue + } + + if len(tokens) != 0 { + prevType = tokens[len(tokens)-1].Type + } + + if trimmedWord == "help" { + tokens = append(tokens, IToken{Type: ITOK_CMD_HELP}) + } else if trimmedWord == "let" { + tokens = append(tokens, IToken{Type: ITOK_CMD_LET}) + } else if trimmedWord == "del" { + tokens = append(tokens, IToken{Type: ITOK_CMD_DEL}) + } else if trimmedWord == "print" { + tokens = append(tokens, IToken{Type: ITOK_CMD_PRINT}) + } else if trimmedWord == "len" { + tokens = append(tokens, IToken{Type: ITOK_CMD_LEN}) + } else if trimmedWord == "slice" { + tokens = append(tokens, IToken{Type: ITOK_CMD_SLICE}) + } else if trimmedWord == "rematch" { + tokens = append(tokens, IToken{Type: ITOK_CMD_REMATCH}) + } else if trimmedWord == "repattern" { + tokens = append(tokens, IToken{Type: ITOK_CMD_REPATTERN}) + } else if trimmedWord == "tokenize" { + tokens = append(tokens, IToken{Type: ITOK_CMD_TOKENIZE}) + } else if trimmedWord == "parse" { + tokens = append(tokens, IToken{Type: ITOK_CMD_PARSE}) + } else if prevType == ITOK_CMD_LET { + tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord}) + } else if prevType == ITOK_CMD_DEL { + tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord}) + } else if prevType == ITOK_CMD_PRINT { + tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord}) + } else if prevType == ITOK_CMD_LEN || prevType == ITOK_CMD_SLICE { + if trimmedWord[0] == '`' { + _, strLiteral, _ := strings.Cut(word, "`") + tokens = append(tokens, IToken{ITOK_VAL_STR, strLiteral}) + } else { + tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord}) + } + } else if prevType == ITOK_CMD_REMATCH || prevType == ITOK_CMD_TOKENIZE { + if trimmedWord[0] == '`' { + _, strLiteral, _ := strings.Cut(word, "`") + tokens = append(tokens, IToken{ITOK_VAL_STR, strLiteral}) + } else { + tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord}) + } + } else if prevType == ITOK_CMD_PARSE { + tokens = append(tokens, IToken{ITOK_VAR_NAME, trimmedWord}) + } else if prevType == ITOK_VAR_NAME && trimmedWord[0] == '`' { + _, strLiteral, _ := strings.Cut(word, "`") + tokens = append(tokens, IToken{ITOK_VAL_STR, strLiteral}) + } else if prevType == ITOK_VAL_STR { + tokens[len(tokens)-1].Text += " " + word + } else { + tokens = append(tokens, IToken{ITOK_INVALID, trimmedWord}) + } + } + + return tokens +} + +func (interpreter Interpreter) Run() error { + signalCh := make(chan os.Signal, 1) + exitCh := make(chan error, 1) + lineCh := make(chan string) + defer close(signalCh) + defer close(lineCh) + defer close(exitCh) + + signal.Notify(signalCh, syscall.SIGINT) + go func(output chan<- string, exitCh chan<- error) { + for { + if interpreter.Scanner.Scan() { + output <- interpreter.Scanner.Text() + } else if err := interpreter.Scanner.Err(); err != nil { + exitCh <- err + return + } else { + exitCh <- io.EOF + return + } + } + }(lineCh, exitCh) + + for { + fmt.Print("> ") + + select { + case <-signalCh: + // TODO: log info to output + return nil + case err := <-exitCh: + return err + case line := <-lineCh: + tokens := interpreter.Tokenize(line) + fatal, err := interpreter.Eval(tokens) + if fatal { + return err + } else if err != nil { + fmt.Println(err) + } + } + } +} + +func printHelp() { + fmt.Println("Commands: help, let, del, print, tokenize, parse") + fmt.Println("help - print this help") + fmt.Println("let name (string|tokens|clause) - save value to a variable") + fmt.Println("del [name] - delete a variable or all variables") + fmt.Println("print [name] - print a variable or all variables") + fmt.Println("slice (string|tokens|name) start stop - slice a string or tokens from start to stop") + fmt.Println("rematch (string|name) - match against regex for querylang spec") + fmt.Println("repattern - print regex for querylang") + fmt.Println("tokenize (string|name) - tokenize a string") + fmt.Println(" ex. tokenize `author:me") + fmt.Println("parse (tokens|name) - parse tokens into a clause") + fmt.Println("\nBare `tokenize` and `parse` assign to an implicit variable _") +} diff --git a/debug_shell/state.go b/debug_shell/state.go new file mode 100644 index 0000000..17d0474 --- /dev/null +++ b/debug_shell/state.go @@ -0,0 +1,142 @@ +package main + +import ( + "errors" + "fmt" + "os" + "strings" + + "github.com/jpappel/atlas/pkg/query" +) + +type ValueType int + +const ( + INVALID ValueType = iota + STRING + TOKENS + CLAUSE +) + +type Value struct { + Type ValueType + Val any +} + +type State map[string]Value + +func (v Value) String() string { + switch v.Type { + case STRING: + s, ok := v.Val.(string) + if !ok { + return "Corrupted Type (expected string)" + } + return s + case TOKENS: + ts, ok := v.Val.([]query.Token) + if !ok { + return "Corrupted Type (expected []query.Token)" + } + return query.TokensStringify(ts) + case CLAUSE: + rootClause, ok := v.Val.(*query.Clause) + if !ok { + return "Corrupted Type (expected query.Clause)" + } + return rootClause.String() + case INVALID: + return "Invalid" + } + return fmt.Sprintf("Unknown @ %p", v.Val) +} + +func (s State) String() string { + b := strings.Builder{} + + for k, v := range s { + b.WriteString(k) + b.WriteByte(':') + switch v.Type { + case INVALID: + b.WriteString(" Invalid") + case STRING: + b.WriteString(" String") + case TOKENS: + b.WriteString(" Tokens") + case CLAUSE: + b.WriteString(" Clause") + default: + fmt.Fprintf(&b, " Unknown (%d)", v.Val) + } + b.WriteByte('\n') + } + + return b.String() +} + +func (s State) CmdTokenize(input string) (Value, bool) { + if len(input) == 0 { + return Value{}, false + } + + var rawQuery string + if input[0] == '`' { + rawQuery = input[1:] + } else { + variable, ok := s[input] + if !ok { + fmt.Fprintln(os.Stderr, "Cannot tokenize: no variable with name", input) + return Value{}, false + } else if variable.Type != STRING { + fmt.Fprintln(os.Stderr, "Cannot tokenize: variable is not a string") + return Value{}, false + } + + rawQuery, ok = variable.Val.(string) + if !ok { + fmt.Fprintln(os.Stderr, "Cannot tokenize: type corruption") + fmt.Fprintln(os.Stderr, "Type corruption, expected string") + panic("Type corruption") + } + } + tokens := query.Lex(rawQuery) + return Value{TOKENS, tokens}, true +} + +func (s State) CmdParse(args string) (Value, error) { + if len(args) == 0 { + return Value{}, errors.New("no arguments for parse") + } + + var tokens []query.Token + if tokenizeArgs, found := strings.CutPrefix(args, "tokenize "); found { + val, ok := s.CmdTokenize(tokenizeArgs) + if !ok { + return Value{}, errors.New("error occured during tokenization") + } + tokens = val.Val.([]query.Token) + } else { + variable, ok := s[args] + if !ok { + fmt.Fprintln(os.Stderr, "Cannot parse: no variable with name", args) + return Value{}, errors.New("variable does not exist") + } else if variable.Type != TOKENS { + fmt.Fprintln(os.Stderr, "Cannot parse: variable is not []query.Tokens") + return Value{}, errors.New("bad variable type") + } + + tokens, ok = variable.Val.([]query.Token) + if !ok { + fmt.Fprintln(os.Stderr, "Cannot parse: type corruption") + fmt.Fprintln(os.Stderr, "Type corruption, expected []query.Tokens") + panic("Type corruption") + } + } + + clause, err := query.Parse(tokens) + if err != nil { + return Value{}, err + } + return Value{CLAUSE, *clause}, err +} -- cgit v1.2.3