Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
239 changes: 238 additions & 1 deletion builtins/builtins.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"io/fs"
"os"
"sort"
"strings"
"syscall"
"time"

Expand Down Expand Up @@ -96,14 +97,250 @@ func (c Command) Register() {
if normalize != nil {
args = normalize(args)
}
hasHelp := fs.Lookup("help") != nil
// Honor `--help` once it's reached in argv to match GNU coreutils:
// `cmd --no-arg-flag --help --bogus` should print help and exit 0.
// We trim args at the first `--help` only when every preceding
// token is a registered no-argument flag. Value-takers (`-n 5`),
// explicit-value forms (`--foo=bar`), positionals, and unknown
// flags all preclude the trim — those tokens may still fail
// later validation (e.g. `head -n nope`'s numeric check), and
// trimming would silently swallow that error by short-circuiting
// on `--help`. GNU's left-to-right semantics require the earlier
// failure to surface even when `--help` follows it.
if hasHelp {
if idx, ok := safeHelpTrimIndex(fs, args); ok {
args = args[:idx+1]
}
}
if err := fs.Parse(args); err != nil {
callCtx.Errf("%s: %v\n", name, err)
callCtx.Errf("%s: %s\n", name, rewritePflagError(err, args))
if hasHelp {
callCtx.Errf("Try '%s --help' for more information.\n", name)
}
return Result{Code: 1}
}
return handler(ctx, callCtx, fs.Args())
})
}

// rewritePflagError translates pflag's default error messages to the
// GNU-getopt-style wording that matches GNU coreutils byte-for-byte.
// It returns the rewritten message without a trailing newline or any
// "cmd:" prefix; callers prepend the builtin name themselves.
//
// args is the argv that was handed to fs.Parse. It is consulted only
// for the unknown-long-flag case, where pflag strips the `=value`
// suffix from its error string but GNU getopt reports the full token
// (`--no-such=value`, not just `--no-such`).
//
// Patterns covered:
//
// pflag → GNU
// "unknown flag: --foo" → "unrecognized option '--foo'"
// ("--foo=value" if the original argv used =value)
// "unknown shorthand flag: 'X' in -..." → "invalid option -- 'X'"
// "flag needs an argument: --foo" → "option '--foo' requires an argument"
// "flag needs an argument: 'X' in -Y" → "option requires an argument -- 'X'"
// `invalid argument "..." for "DESC" flag: → "option '--LONG' doesn't allow
// flag does not allow an argument` an argument"
//
// Unknown messages are returned unchanged.
func rewritePflagError(err error, args []string) string {
Copy link
Copy Markdown
Member

@AlexandreYang AlexandreYang May 7, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hum,

1/ I'm not sure if patching pflag is the right way. Maybe we should just re-code our own flag parser. WDYT?

2/ I think we should at least move all the flag parsing logic to a separate package like /builtins/internal/flagparser/ to isolate flag parsing logic. Having all this flag parsing logic in builtins.go seems not ideal.
And then, we can optionally implement 1/ more easy if needed.

msg := err.Error()

// pflag wraps errors returned by a Var's Set(value) as
// `invalid argument "VALUE" for "DESC" flag: INNER`
// where DESC is e.g. `-h, --human-readable` or `--total`. df's
// noArgBool / unitFlag.Set returns the literal "flag does not
// allow an argument" so users (and tests) see GNU's "doesn't"
// instead of the wrapped pflag verbosity.
if strings.HasPrefix(msg, "invalid argument ") &&
strings.HasSuffix(msg, "flag does not allow an argument") {
if d, ok := extractFlagDescriptor(msg); ok {
// `-X=value` for a no-arg shorthand: GNU getopt iterates
// shorthand chars and treats `=` as an unknown shorthand,
// emitting `invalid option -- '='`. pflag instead routes
// the value through Set, hitting our no-arg guard. Match
// GNU when the original argv used the shorthand=value form.
if shortFlagEqualsValueIn(d, args) {
return "invalid option -- '='"
}
return "option '" + longFlagName(d) + "' doesn't allow an argument"
}
}

if rest, ok := strings.CutPrefix(msg, "unknown flag: "); ok {
// pflag's error carries only the flag name; GNU's reports the
// full token, so we recover `--foo=value` from argv when the
// user wrote it that way.
return "unrecognized option '" + recoverLongFlagToken(rest, args) + "'"
}

const shortPrefix = "unknown shorthand flag: '"
if rest, ok := strings.CutPrefix(msg, shortPrefix); ok {
if char, _, ok := strings.Cut(rest, "'"); ok {
return "invalid option -- '" + char + "'"
}
}

if rest, ok := strings.CutPrefix(msg, "flag needs an argument: "); ok {
// pflag emits two distinct payloads depending on the form:
// long form: `--foo`
// short form: `'X' in -Y` (X is the char, Y is the run
// of shorthand letters)
// GNU getopt uses different wording for each: long flags get
// `option '--foo' requires an argument`, short flags get
// `option requires an argument -- 'X'`.
if char, found := shortMissingArg(rest); found {
return "option requires an argument -- '" + char + "'"
}
return "option '" + rest + "' requires an argument"
Comment thread
julesmcrt marked this conversation as resolved.
}

return msg
}

// safeHelpTrimIndex returns the index of the first `--help` in args if
// trimming at it is provably safe — i.e. every preceding token is a
// registered no-argument flag and the scan does not cross a `--`
// end-of-flags marker. Returns false otherwise, signalling the caller
// to leave args alone and let pflag report any earlier validation
// error.
func safeHelpTrimIndex(fs *pflag.FlagSet, args []string) (int, bool) {
for i, a := range args {
if a == "--" {
return 0, false
}
if a == "--help" {
return i, true
}
if !isNoArgFlagToken(fs, a) {
return 0, false
Comment on lines +218 to +219
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Honor --help after valid value-taking options

For commands with value-taking flags, this rejects trimming as soon as a valid value option appears before --help, so head -n 5 --help --bogus is left for pflag to parse and fails on the later --bogus. GNU head -n 5 --help --bogus exits 0 and prints help because the earlier -n 5 is valid and --help short-circuits before later options; this means the new left-to-right help behavior is only correct for no-arg flags and regresses valid invocations of head/tail-style builtins.

Useful? React with 👍 / 👎.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Claude Opus 4.7 (1M context)] You're right that GNU prints help here, but extending the trim to allow value-takers conflicts with the prior round's feedback (3202120242) about head -n nope --help --bogus — pflag accepts nope as a String value but head's numeric validation runs only after --help short-circuits, so trimming would silently swallow the validation failure. GNU itself diverges by value validity:

  • head -n 5 --help --bogus → exit 0, prints help (-n 5 is valid)
  • head -n nope --help --bogus → exit 1, invalid number of lines: 'nope'

Verified both with docker run --rm debian:bookworm-slim. To match GNU on both, head's handler would need to validate -n's numeric value via pflag.Var.Set before the --help short-circuit fires — that's a per-builtin handler-ordering change, not something the shared trim in builtins/builtins.go can resolve safely (we don't know which value-taker fields a builtin's handler validates internally).

Trade-off matrix with the current no-arg-only trim:

invocation GNU rshell now extending trim
-q --help --bogus (no-arg) help, 0 help, 0 ✓ help, 0 ✓
-n 5 --help --bogus help, 0 error, 1 help, 0 ✓
-n nope --help --bogus error, 1 error, 1 ✓ help, 0 ✗

Two out of three match either way; extending makes the -n 5 case match at the cost of regressing the nope case. Deferring to a follow-up PR that re-orders head/tail/etc. handlers to validate values before --help short-circuits, which closes both gaps cleanly. Leaving this thread open for the reviewer to decide between the trade-off.

Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do in another PR

}
}
return 0, false
}

// isNoArgFlagToken reports whether a is a registered flag (long or
// shorthand cluster) whose every component takes no value. Tokens
// containing `=` (explicit-value form) or matching value-taking flags
// return false — both can fail at parse time. Non-ASCII bytes in a
// shorthand cluster also return false: pflag's ShorthandLookup panics
// on inputs longer than one byte and `string(byte)` for any byte ≥
// 0x80 produces a 2-byte UTF-8 encoding.
func isNoArgFlagToken(fs *pflag.FlagSet, a string) bool {
if len(a) < 2 || a[0] != '-' || a == "-" {
return false
}
if strings.Contains(a, "=") {
return false
}
if strings.HasPrefix(a, "--") {
f := fs.Lookup(a[2:])
return f != nil && f.NoOptDefVal != ""
}
for i := 1; i < len(a); i++ {
c := a[i]
if c > 0x7E || c <= ' ' {
return false
}
f := fs.ShorthandLookup(string(c))
if f == nil || f.NoOptDefVal == "" {
return false
}
}
return true
}

// recoverLongFlagToken returns the original argv token for an unknown
// long flag whose stripped name (e.g. `--no-such`) appears in pflag's
// error. If the user wrote `--no-such=value`, the full token is
// returned; otherwise the bare flag is returned. Stops at `--` so a
// later positional like `-- --no-such=foo` is never misclassified.
func recoverLongFlagToken(flag string, args []string) string {
prefix := flag + "="
for _, a := range args {
if a == "--" {
break
}
if a == flag || strings.HasPrefix(a, prefix) {
return a
}
}
return flag
}

// shortFlagEqualsValueIn reports whether args contains a token of the
// form `-X=...` whose shorthand char X matches the shorthand encoded
// in descriptor (e.g. `-h, --human-readable` → X=`h`). Used to detect
// the GNU-getopt shorthand=value error class.
func shortFlagEqualsValueIn(descriptor string, args []string) bool {
short, ok := shortFlagFromDescriptor(descriptor)
if !ok {
return false
}
for _, a := range args {
if a == "--" {
break
}
if len(a) >= 3 && a[0] == '-' && a[1] != '-' && a[1] == short && a[2] == '=' {
return true
}
}
return false
}

// shortFlagFromDescriptor extracts the single shorthand char from a
// pflag descriptor like `-h, --human-readable`. Returns false for
// long-only descriptors (`--total`).
func shortFlagFromDescriptor(d string) (byte, bool) {
if len(d) >= 2 && d[0] == '-' && d[1] != '-' {
return d[1], true
}
return 0, false
}

// shortMissingArg parses pflag's short-form payload for
// `flag needs an argument: 'X' in -Y` and returns the bare char
// (`X`). The second return is false when payload is in the long-form
// (`--foo`), letting the caller fall through.
func shortMissingArg(payload string) (string, bool) {
if !strings.HasPrefix(payload, "'") {
return "", false
}
char, _, found := strings.Cut(payload[1:], "'")
if !found {
return "", false
}
return char, true
}

// extractFlagDescriptor parses pflag's `invalid argument "..." for
// "DESC" flag: ...` wrapper and returns the DESC segment.
func extractFlagDescriptor(msg string) (string, bool) {
_, after, found := strings.Cut(msg, ` for "`)
if !found {
return "", false
}
desc, _, found := strings.Cut(after, `" flag:`)
if !found {
return "", false
}
return desc, true
}

// longFlagName returns the long-form (--name) flag name from a pflag
// descriptor like `-X, --LONG` or `--LONG`. For shorthand-only flags
// (rare; e.g. df's hidden `-k`) the descriptor is just `-X` and we
// return it unchanged.
func longFlagName(descriptor string) string {
if i := strings.LastIndex(descriptor, ", "); i >= 0 {
return descriptor[i+2:]
}
return descriptor
}

// CallContext provides the capabilities available to builtin commands.
// It is created by the Runner for each builtin invocation.
type CallContext struct {
Expand Down
2 changes: 1 addition & 1 deletion builtins/du/du_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,7 +273,7 @@ func TestDuUnknownFlag(t *testing.T) {
_, stderr, code := cmdRun(t, "du --no-such-flag .", dir)
assert.Equal(t, 1, code)
assert.Contains(t, stderr, "du:")
assert.Contains(t, stderr, "unknown flag")
assert.Contains(t, stderr, "unrecognized option")
}

// --- Security-sensitive flags must be rejected ---
Expand Down
Loading
Loading