Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .claude/skills/implement-awk/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,13 @@ argument-hint: "[feature-or-failure-filter]"
Use this skill when implementing, extending, or fixing the rshell `awk`
builtin.

## Shared Implementation Plan

Before starting or resuming implementation work, read
`docs/AWK_IMPLEMENTATION_PLAN.md`. That document captures the agreed rshell awk
profile, the long-lived parser strategy, Phase 1 Practical awk scope, safety
policy, test plan, and later-phase roadmap.

## Compatibility Target

The implementation target is GNU awk (`gawk`), not POSIX awk alone, One True
Expand Down
1 change: 1 addition & 0 deletions SHELL_FEATURES.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The in-shell `help` command mirrors these feature categories: run `help` for a c

## Builtins

- ✅ `awk [-F SEP] [-v NAME=VALUE] ['PROGRAM'|-f PROGRAM-FILE] [FILE]...` — pattern scanning and text processing; Phase 1 supports BEGIN/main/END rules, read-only fields (`$0`, `$1`, `$NF`), `NF`/`NR`/`FNR`/`FILENAME`, `FS`/`OFS`/`ORS`, `print`, scalar assignment, arithmetic/comparison/boolean expressions, regex patterns and `~`/`!~`, and string concatenation; `system()`, command pipes, output redirection, `getline`, arrays, control flow, `printf`, regex `FS`, and field mutation are rejected or deferred
- ✅ `break` — exit the innermost `for` loop
- ✅ `cat [-AbeEnstTuv] [FILE]...` — concatenate files to stdout; supports line numbering, blank squeezing, and non-printing character display
- ✅ `continue` — skip to the next iteration of the innermost `for` loop
Expand Down
26 changes: 26 additions & 0 deletions analysis/symbols_builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,28 @@ package analysis
// Every symbol listed here must also appear in builtinAllowedSymbols
// (which acts as the global ceiling).
var builtinPerCommandSymbols = map[string][]string{
"awk": {
"bufio.NewScanner", // 🟢 line-by-line record reading; no write or exec capability.
"context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects.
"fmt.Errorf", // 🟢 error formatting; pure function, no I/O.
"io.EOF", // 🟢 sentinel error value; pure constant.
"io.NopCloser", // 🟢 wraps a Reader with a no-op Close; no side effects.
"io.ReadCloser", // 🟢 interface type; no side effects.
"math.Mod", // 🟢 pure arithmetic modulo for awk % operator; no side effects.
"os.O_RDONLY", // 🟢 read-only file flag constant; cannot open files by itself.
"regexp.Compile", // 🟢 compiles a regular expression; pure function, no I/O. Uses RE2 engine (linear-time, no backtracking).
"regexp.Regexp", // 🟢 compiled regular expression type; no I/O side effects. All matching methods are linear-time (RE2).
"strconv.FormatFloat", // 🟢 float-to-string conversion for awk numeric output; pure function.
"strconv.ParseFloat", // 🟢 string-to-float conversion; pure function, no I/O.
"strings.Builder", // 🟢 efficient string concatenation; pure in-memory buffer, no I/O.
"strings.Cut", // 🟢 splits a string around the first separator; pure function, no I/O.
"strings.Join", // 🟢 concatenates a slice of strings with a separator; pure function, no I/O.
"strings.NewReader", // 🟢 wraps a string as an io.Reader; pure in-memory, no I/O.
"strings.Split", // 🟢 splits a string by separator into a slice; pure function, no I/O.
"strings.TrimSpace", // 🟢 removes leading/trailing whitespace; pure function.
"unicode/utf8.DecodeRuneInString", // 🟢 decodes first UTF-8 rune from a string; pure function, no I/O.
"unicode/utf8.RuneError", // 🟢 replacement character returned for invalid UTF-8; constant, no I/O.
},
"break": {
"context.Context", // 🟢 deadline/cancellation plumbing; pure interface, no side effects.
},
Expand Down Expand Up @@ -505,6 +527,7 @@ var builtinAllowedSymbols = []string{
"math.MaxInt64", // 🟢 integer constant; no side effects.
"math.MaxUint64", // 🟢 integer constant; no side effects.
"math.MinInt64", // 🟢 integer constant; no side effects.
"math.Mod", // 🟢 pure arithmetic modulo; no side effects.
"math.NaN", // 🟢 returns IEEE 754 NaN value; pure function, no I/O.
"net.DefaultResolver", // 🔴 default system DNS resolver; used for context-aware address lookup; network I/O is the explicit purpose of the ping builtin.
"net.FlagBroadcast", // 🟢 interface flag constant: broadcast capability; pure constant, no network connections.
Expand Down Expand Up @@ -541,6 +564,7 @@ var builtinAllowedSymbols = []string{
"strconv.Atoi", // 🟢 string-to-int conversion; pure function, no I/O.
"strconv.ErrRange", // 🟢 sentinel error value for overflow; pure constant.
"strconv.FormatBool", // 🟢 bool-to-string conversion; pure function, no I/O.
"strconv.FormatFloat", // 🟢 float-to-string conversion; pure function, no I/O.
"strconv.FormatInt", // 🟢 int-to-string conversion; pure function, no I/O.
"strconv.FormatUint", // 🟢 uint-to-string conversion; pure function, no I/O.
"strconv.IntSize", // 🟢 platform int size constant (32 or 64); pure constant, no I/O.
Expand All @@ -553,10 +577,12 @@ var builtinAllowedSymbols = []string{
"strings.Builder", // 🟢 efficient string concatenation; pure in-memory buffer, no I/O.
"strings.Contains", // 🟢 substring search; pure function, no I/O.
"strings.ContainsRune", // 🟢 checks if a rune is in a string; pure function, no I/O.
"strings.Cut", // 🟢 splits a string around the first separator; pure function, no I/O.
"strings.Fields", // 🟢 splits a string on whitespace into a slice; pure function, no I/O.
"strings.HasPrefix", // 🟢 pure function for prefix matching; no I/O.
"strings.IndexByte", // 🟢 finds byte in string; pure function, no I/O.
"strings.Join", // 🟢 concatenates a slice of strings with a separator; pure function, no I/O.
"strings.NewReader", // 🟢 wraps a string as an io.Reader; pure in-memory, no I/O.
"strings.ReplaceAll", // 🟢 replaces all occurrences of a substring; pure function, no I/O.
"strings.Split", // 🟢 splits a string by separator into a slice; pure function, no I/O.
"strings.ToLower", // 🟢 converts string to lowercase; pure function, no I/O.
Expand Down
19 changes: 15 additions & 4 deletions analysis/symbols_verification_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,13 +147,11 @@ func writeGoFile(t *testing.T, path, pkg string, imports []string, body string)
func findFirstSubdirGoFile(t *testing.T, dir string) string {
t.Helper()
var found string
var fallback string
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if found != "" {
return filepath.SkipAll
}
if info.IsDir() {
return nil
}
Expand All @@ -162,13 +160,26 @@ func findFirstSubdirGoFile(t *testing.T, dir string) string {
return nil
}
if strings.Contains(rel, string(filepath.Separator)) {
found = path
data, err := os.ReadFile(path)
if err != nil {
return err
}
if strings.Contains(string(data), "import (") {
found = path
return filepath.SkipAll
}
if fallback == "" {
fallback = path
}
}
return nil
})
if err != nil {
t.Fatal(err)
}
if found == "" {
found = fallback
}
if found == "" {
t.Fatalf("no .go file found in subdirectories of %s", dir)
}
Expand Down
112 changes: 112 additions & 0 deletions builtins/awk/ast.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2026-present Datadog, Inc.

package awk

type program struct {
rules []rule
}

type ruleKind int

const (
ruleNormal ruleKind = iota
ruleBegin
ruleEnd
)

type rule struct {
kind ruleKind
pattern expr
action []stmt
}

type stmt interface {
stmtNode()
}

type printStmt struct {
args []expr
}

func (*printStmt) stmtNode() {}

type exprStmt struct {
x expr
}

func (*exprStmt) stmtNode() {}

type expr interface {
exprNode()
}

type numberExpr struct {
text string
num float64
}

func (*numberExpr) exprNode() {}

type stringExpr struct {
value string
}

func (*stringExpr) exprNode() {}

type regexExpr struct {
pattern string
}

func (*regexExpr) exprNode() {}

type varExpr struct {
name string
}

func (*varExpr) exprNode() {}

type fieldExpr struct {
index expr
}

func (*fieldExpr) exprNode() {}

type groupedExpr struct {
x expr
}

func (*groupedExpr) exprNode() {}

type unaryExpr struct {
op string
x expr
}

func (*unaryExpr) exprNode() {}

type binaryExpr struct {
op string
left expr
right expr
}

func (*binaryExpr) exprNode() {}

type assignExpr struct {
op string
left expr
right expr
}

func (*assignExpr) exprNode() {}

type incDecExpr struct {
op string
x expr
prefix bool
}

func (*incDecExpr) exprNode() {}
Loading