From c2f3017e45fc3af0600786bad92426102dc7ae25 Mon Sep 17 00:00:00 2001 From: Aleksei Sviridkin Date: Tue, 12 May 2026 14:02:35 +0300 Subject: [PATCH 1/2] =?UTF-8?q?feat(commands):=20wrapper=20UX=20hardening?= =?UTF-8?q?=20=E2=80=94=20persistent=20flags,=20-n=20drop,=20crashdump,=20?= =?UTF-8?q?kubeconfig=20hint,=20dmesg=20cushion,=20TUI=20refusal?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Six open-issue fixes in one batch, all touching the talosctl wrapper layer at pkg/commands/talosctl_wrapper.go and its sibling per-command handlers. # Phase 1 — persistent flag propagation (closes #194) wrapTalosCommand's flag-copy loop visited cmd.Flags() only, returning the command's LOCAL flag set. Persistent flags from an upstream parent (e.g. imageCmd.PersistentFlags().StringVar(..., "namespace", ...), metaCmd.PersistentFlags().BoolVarP(..., "insecure", "i", ...)) live in a separate FlagSet and were silently lost by the wrapped tree — ~25 flags affected across image, image talos-bundle, image kubernetes-bundle, image cache create/serve/certs gen, image integration, meta. propagatePersistentFlags copies upstream's PersistentFlags onto wrappedCmd.PersistentFlags so cobra's mergePersistentFlags walks them to wrapped children at parse time. Names already registered on talm's own rootCmd are skipped via rootShadowedPersistentFlags — without that, upstream's addCommand-installed --nodes / --talosconfig / --context / --cluster / --endpoints / --skip-verify / --siderov1-keys-dir bindings would bind to taloscommands.GlobalArgs. and the wrapper PreRunE's struct sync (taloscommands.GlobalArgs = commands.GlobalArgs) would wipe the parsed value. The shadow map is table-tested; the -f -> -F shorthand rename is pinned as well. # Drop -n shorthand from root --nodes (closes #186) talm's root --nodes claimed shorthand -n. Operators typing `talm get hostnames -n network --nodes $NODE --endpoints $NODE` saw `network` parsed silently as a second node entry, then failed inside the gRPC name resolver with "produced zero addresses". Drop the shorthand so cobra cleanly errors with "flag -n not defined" — loud refusal instead of silent misinterpretation. Long form --nodes and modeline auto-population unchanged. Upstream talosctl does not register -n for --namespace on any wrapped subcommand (image and get both use shorthand-free --namespace), so the drop closes a shadow trap without introducing an inherited-alias gap. registerRootFlags is extracted from Execute so the NodesHasNoShorthand test can exercise the registration without running cobra's executor. # Crashdump per-class node prepopulation (closes #180) Upstream crashdump uses per-class node flags (--init-node / --control-plane-nodes / --worker-nodes) rather than the global --nodes. The upstream WithClient guard at cmd/talosctl/pkg/talos/global/client.go pre-validates GlobalArgs.Nodes regardless, so operators following the documented per-class shape hit "nodes are not set for the command" before crashdump's own deprecation message (it redirects to `talosctl support`) can surface. wrapCrashdumpCommand chains the original PreRunE AFTER populating GlobalArgs.Nodes from the union of the per-class flags. The before-the-chain placement is forced by the upstream-sync ordering: wrapTalosCommand's PreRunE copies commands.GlobalArgs into taloscommands.GlobalArgs near its end, so any population that runs after the chain only updates talm's side. Resulting precedence: - Explicit --nodes on CLI wins (lands in GlobalArgs.Nodes during cobra's flag parse, before any PreRunE) - Per-class flags win over modeline (forced by the chain order required by the upstream guard) - Modeline wins only when nothing else has # Rewrite kubeconfig positional-path rejection (closes #193) The previous error claimed `use --login flag to pass arguments`, conflating two distinct things: --login switches the kubeconfig target between local and system, it does not pass a positional path. A reader could reasonably believe they should re-run with `--login /some/path` — that neither does what they want nor matches what --login is for. Replace with a structured errors.WithHint chain whose body names the actual default (writes to project root, positional path not consumed) and whose hint lists the real alternatives: --login for system kubeconfig, shell redirection or --root for a custom local path. # Cushion dmesg --tail=N ParseBool error (closes #195) Upstream talosctl registers --tail as a BoolVarP toggling tail-mode for --follow (Dmesg(ctx, follow, tail bool) on the wire). Operators' first instinct is tail(1)-style line count; `talm dmesg --tail=3` then surfaces `strconv.ParseBool: parsing "3": invalid syntax` with no guidance. wrapDmesgCommand installs a FlagErrorFunc that detects the ParseBool-on-tail pattern and wraps (not replaces) the original error with operator-facing context. The body names the actual contract; the hint suggests pipe-to-tail(1) for line count or --follow --tail for stream-mode. errors.Wrap keeps the original error reachable via errors.Unwrap so verbose debugging stays possible. # Refuse non-tty stdin on dashboard/edit (closes #183) talosctl's dashboard and edit subcommands rely on gdamore/tcell. When stdin is not attached to a terminal, tcell fails to allocate and the program panics inside tScreen.finish with `close of nil channel` during teardown. Operators running under CI / piped stdin see a Go stack trace with no signal. wrapTUICommand chains the original PreRunE, then consults the stdinIsTTY predicate (function-typed for test injection). On non-tty stdin, returns a structured errors.WithHint: the body names the command and the missing tty; the hint lists what to do instead (real terminal for the TUI, or `talm get` for non-interactive inspection). stdinIsTTY uses golang.org/x/term IsTerminal against os.Stdin.Fd() — already in the indirect dep graph. # Tests 12 new tests in pkg/commands/talosctl_wrapper_test.go pinning every contract layer: synthetic + real-upstream propagation for #194, shadow-map and -f-rename for the propagation pass, crashdump no-shadow + crashdump per-class populate for #180, kubeconfig contract-msg for #193, dmesg ParseBool rewrite + chain preservation for #195, TUI refuse + tty pass for #183. Plus one new test in main_test.go for the -n drop (#186). All red-first; all green after the implementation commits. Lint clean on host and GOOS=windows, go vet clean, race detector clean. # Real-cluster validation Smoked against a 3-node OCI Talos v1.12.6 stand: - talm image list --namespace system --nodes --endpoints -> system-namespace images returned (#194 namespace) - talm meta -i write 0x0a "x" --nodes --endpoints -> -i shorthand accepted (#194 persistent-shorthand path) - talm get hostnames --namespace network --nodes --endpoints -> server-side resource-not-registered (cleanly past flag parsing, no -n shadow) (#186) - talm crashdump --control-plane-nodes --endpoints -> upstream deprecation message reaches operator (#180) - talm kubeconfig /tmp/x -> new project-root / --login hint (#193) - talm dmesg --tail=3 --nodes --endpoints -> boolean-toggle hint with pipe-to-tail(1) suggestion (#195) - timeout 3 talm dashboard --nodes --endpoints < /dev/null -> clean refusal, no tcell panic (#183) Closes #180 Closes #183 Closes #186 Closes #193 Closes #194 Closes #195 Assisted-By: Claude Signed-off-by: Aleksei Sviridkin --- README.md | 2 +- main.go | 45 +- main_test.go | 35 ++ pkg/commands/crashdump_handler.go | 126 ++++++ pkg/commands/dmesg_handler.go | 68 +++ pkg/commands/kubeconfig_handler.go | 11 +- pkg/commands/rotate_ca_handler.go | 16 +- pkg/commands/talosctl_wrapper.go | 144 +++++- pkg/commands/talosctl_wrapper_test.go | 630 ++++++++++++++++++++++++++ pkg/commands/tui_handler.go | 92 ++++ 10 files changed, 1154 insertions(+), 15 deletions(-) create mode 100644 pkg/commands/crashdump_handler.go create mode 100644 pkg/commands/dmesg_handler.go create mode 100644 pkg/commands/talosctl_wrapper_test.go create mode 100644 pkg/commands/tui_handler.go diff --git a/README.md b/README.md index fc3ac6fb..4c1235f5 100644 --- a/README.md +++ b/README.md @@ -90,7 +90,7 @@ floatingIP: "" Gather node information: ```bash -talm -n 192.0.2.4 -e 192.0.2.4 template -t templates/controlplane.yaml -i > nodes/node1.yaml +talm --nodes 192.0.2.4 -e 192.0.2.4 template -t templates/controlplane.yaml -i > nodes/node1.yaml ``` Edit `nodes/node1.yaml` file: diff --git a/main.go b/main.go index 3d5dd185..16b39681 100644 --- a/main.go +++ b/main.go @@ -73,8 +73,14 @@ func main() { } } -func Execute() error { - rootCmd.PersistentFlags().StringVar( +// registerRootFlags installs the persistent flag set on rootCmd. +// Extracted from Execute so tests can exercise the registration +// without running cobra's executor. Single-call contract: cobra +// panics on duplicate flag registration, so production calls this +// exactly once from Execute; tests must build a fresh +// *cobra.Command for each invocation. +func registerRootFlags(cmd *cobra.Command) { + cmd.PersistentFlags().StringVar( &commands.GlobalArgs.Talosconfig, "talosconfig", "", @@ -84,13 +90,34 @@ func Execute() error { filepath.Join(constants.ServiceAccountMountPath, constants.TalosconfigFilename), ), ) - rootCmd.PersistentFlags().StringVar(&commands.Config.RootDir, "root", ".", "root directory of the project") - rootCmd.PersistentFlags().StringVar(&commands.GlobalArgs.CmdContext, "context", "", "Context to be used in command") - rootCmd.PersistentFlags().StringSliceVarP(&commands.GlobalArgs.Nodes, "nodes", "n", []string{}, "target the specified nodes") - rootCmd.PersistentFlags().StringSliceVarP(&commands.GlobalArgs.Endpoints, "endpoints", "e", []string{}, "override default endpoints in Talos configuration") - rootCmd.PersistentFlags().StringVar(&commands.GlobalArgs.Cluster, "cluster", "", "Cluster to connect to if a proxy endpoint is used.") - rootCmd.PersistentFlags().BoolVar(&commands.GlobalArgs.SkipVerify, "skip-verify", false, "skip TLS certificate verification (keeps client authentication)") - rootCmd.PersistentFlags().Bool("version", false, "Print the version number of the application") + cmd.PersistentFlags().StringVar(&commands.Config.RootDir, "root", ".", "root directory of the project") + cmd.PersistentFlags().StringVar(&commands.GlobalArgs.CmdContext, "context", "", "Context to be used in command") + // --nodes is registered WITHOUT the `-n` shorthand. The + // previous registration carried `-n`, which silently captured + // any `-n ` an operator typed — for example + // `talm get hostnames -n network --nodes $NODE --endpoints + // $NODE` parsed `network` as a second node entry and then + // failed inside the gRPC name resolver with "produced zero + // addresses". Operators who type `-n namespace` for a + // subcommand argument (the muscle memory pattern from + // `kubectl`-style CLIs) now get a clean "flag -n not defined" + // from cobra — loud refusal instead of silent + // misinterpretation. The long form `--nodes` and modeline + // auto-population continue to work identically. Upstream + // talosctl does NOT register `-n` for `--namespace` on any + // subcommand (verified against image.go's PersistentFlags + // StringVar and get.go's local --namespace StringVar — both + // shorthand-free), so dropping `-n` from talm root closes a + // shadow trap without introducing any inherited-alias gap. + cmd.PersistentFlags().StringSliceVar(&commands.GlobalArgs.Nodes, "nodes", []string{}, "target the specified nodes") + cmd.PersistentFlags().StringSliceVarP(&commands.GlobalArgs.Endpoints, "endpoints", "e", []string{}, "override default endpoints in Talos configuration") + cmd.PersistentFlags().StringVar(&commands.GlobalArgs.Cluster, "cluster", "", "Cluster to connect to if a proxy endpoint is used.") + cmd.PersistentFlags().BoolVar(&commands.GlobalArgs.SkipVerify, "skip-verify", false, "skip TLS certificate verification (keeps client authentication)") + cmd.PersistentFlags().Bool("version", false, "Print the version number of the application") +} + +func Execute() error { + registerRootFlags(rootCmd) cmd, err := rootCmd.ExecuteContextC(context.Background()) if err != nil && !common.SuppressErrors { diff --git a/main_test.go b/main_test.go index 0f7d2f16..9418e382 100644 --- a/main_test.go +++ b/main_test.go @@ -182,6 +182,41 @@ func TestLoadConfig_EmptyApplyTimeoutResolvesDefault(t *testing.T) { } } +// TestRegisterRootFlags_NodesHasNoShorthand pins that talm's +// root `--nodes` does NOT claim the `-n` shorthand. With `-n` +// registered as the alias for `--nodes`, the global captures any +// `-n ` an operator types — `talm get hostnames -n network +// --nodes $NODE --endpoints $NODE` quietly parses `network` as a +// second node entry, then fails inside the gRPC name resolver with +// "produced zero addresses". Operators who type `-n namespace` +// (kubectl muscle memory) now get a clean cobra "flag -n not +// defined" error — loud refusal instead of silent +// misinterpretation. The long form `--nodes` keeps working. +// Upstream talosctl does not register `-n` for `--namespace` on +// any wrapped subcommand (image's PersistentFlags --namespace and +// get's local --namespace are both shorthand-free), so the change +// closes a shadow trap without introducing an inherited-alias gap. +func TestRegisterRootFlags_NodesHasNoShorthand(t *testing.T) { + // registerRootFlags writes default empty strings back through + // the cobra/pflag StringVar bindings into commands.GlobalArgs + // and commands.Config, which are package-level mutables. + // snapshotConfigState (defined above) saves+restores them so + // other tests aren't poisoned. + snapshotConfigState(t) + + cmd := &cobra.Command{Use: "talm-test"} + registerRootFlags(cmd) + + flag := cmd.PersistentFlags().Lookup("nodes") + if flag == nil { + t.Fatal("expected --nodes to be registered, got nil") + } + + if flag.Shorthand != "" { + t.Errorf("--nodes shorthand: got %q, want empty (otherwise it shadows upstream `-n / --namespace`)", flag.Shorthand) + } +} + func TestSkipConfigCommands(t *testing.T) { tests := []struct { name string diff --git a/pkg/commands/crashdump_handler.go b/pkg/commands/crashdump_handler.go new file mode 100644 index 00000000..c8fda2e0 --- /dev/null +++ b/pkg/commands/crashdump_handler.go @@ -0,0 +1,126 @@ +// Copyright Cozystack Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commands + +import "github.com/spf13/cobra" + +// wrapCrashdumpCommand pre-populates GlobalArgs.Nodes from upstream +// crashdump's per-class node flags (--init-node, --control-plane-nodes, +// --worker-nodes) so the upstream WithClient guard in +// `cmd/talosctl/pkg/talos/global/client.go` (which checks +// `len(GlobalArgs.Nodes) > 0`) is satisfied before crashdump's own +// RunE runs. +// +// Without this pre-population an operator who follows talosctl +// crashdump's documented help text — passing only --control-plane-nodes +// and --endpoints — hits a "nodes are not set for the command" error +// from the global client wrapper instead of crashdump's own +// "deprecated, please use `talosctl support` instead" message +// (crashdump is hidden + deprecated upstream, but the wrapper still +// imports it through `taloscommands.Commands`). +// +// Wrapper populates GlobalArgs.Nodes from the per-class flags +// BEFORE chaining to the original PreRunE. wrapTalosCommand's +// PreRunE syncs `taloscommands.GlobalArgs = commands.GlobalArgs` +// near its end — populating after the chain would update only +// talm's side and leave upstream still seeing an empty list. +// +// Resulting precedence: +// +// - Explicit `--nodes` on the CLI wins (it lands in +// GlobalArgs.Nodes during cobra's flag parse, before any +// PreRunE runs; the populate gate sees it non-empty and +// skips). +// - Per-class flags win over modeline. The chain order +// forced by the upstream-sync constraint runs the populate +// before the wrapTalosCommand PreRunE's modeline merge, +// and that merge captures `nodesFromArgs := len(GlobalArgs. +// Nodes) > 0` from the now-populated talm side, telling it +// NOT to overwrite. This is a deliberate trade-off: the +// upstream guard surface-fires unless GlobalArgs.Nodes is +// populated before sync, and the per-class flags carry +// operator intent more specifically than the modeline +// default for a deprecated command. +// - Modeline wins only when GlobalArgs.Nodes is empty AND no +// per-class flag was passed. +// +// Contract with the caller: wrapCrashdumpCommand MUST be installed +// AFTER wrapTalosCommand's PreRunE assignment in +// talosctl_wrapper.go, so its `originalPreRunE` capture points at +// the wrapTalosCommand closure rather than a nil. If a future +// refactor reorders the dispatch in talosctl_wrapper.go to call +// wrapCrashdumpCommand earlier, the chain order inverts and the +// upstream guard fires again. +func wrapCrashdumpCommand(wrappedCmd *cobra.Command) { + originalPreRunE := wrappedCmd.PreRunE + + wrappedCmd.PreRunE = func(cmd *cobra.Command, args []string) error { + populateNodesFromPerClassFlags(cmd) + + if originalPreRunE != nil { + return originalPreRunE(cmd, args) + } + + return nil + } +} + +// populateNodesFromPerClassFlags fills GlobalArgs.Nodes from the +// union of --init-node, --control-plane-nodes, and --worker-nodes +// when GlobalArgs.Nodes is otherwise empty. Used by the wrappers +// for upstream commands that consume these per-class lists rather +// than the global --nodes (crashdump, rotate-ca). +// +// MUST be called BEFORE wrapTalosCommand's PreRunE assignment in +// the chain — that closure syncs +// `taloscommands.GlobalArgs = commands.GlobalArgs` near its end, +// and any population after the sync only updates talm's side, +// leaving the upstream WithClient guard still seeing an empty +// list. Per-command wrappers (wrapCrashdumpCommand, +// wrapRotateCACommand) capture the original PreRunE first, then +// install a closure that calls this helper BEFORE chaining. +// +// Walk order is stable (init -> control-plane -> worker) so the +// resolved list is deterministic when multiple classes are set. +// Explicit --nodes (CLI or modeline) takes precedence: the gate +// is len(GlobalArgs.Nodes) == 0. +// +// Per-class flag lookups are tolerant: missing flag definitions +// return errors that are silently swallowed, so the helper is +// safe to call on any command that may or may not have all three +// registered. +func populateNodesFromPerClassFlags(cmd *cobra.Command) { + if len(GlobalArgs.Nodes) > 0 { + return + } + + populated := []string{} + + if initNode, err := cmd.Flags().GetString("init-node"); err == nil && initNode != "" { + populated = append(populated, initNode) + } + + if cps, err := cmd.Flags().GetStringSlice("control-plane-nodes"); err == nil { + populated = append(populated, cps...) + } + + if workers, err := cmd.Flags().GetStringSlice("worker-nodes"); err == nil { + populated = append(populated, workers...) + } + + if len(populated) > 0 { + GlobalArgs.Nodes = populated + } +} diff --git a/pkg/commands/dmesg_handler.go b/pkg/commands/dmesg_handler.go new file mode 100644 index 00000000..81e4c70b --- /dev/null +++ b/pkg/commands/dmesg_handler.go @@ -0,0 +1,68 @@ +// Copyright Cozystack Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commands + +import ( + "strings" + + "github.com/cockroachdb/errors" + "github.com/spf13/cobra" +) + +// dmesgCmdName labels the wrapped dmesg subcommand in the dispatch. +// Hoisted so a future rename touches one site. +const dmesgCmdName = "dmesg" + +// wrapDmesgCommand installs a FlagErrorFunc that rewrites the +// cryptic ParseBool error from a numeric --tail value into an +// operator-friendly hint. +// +// Upstream talosctl registers --tail as a BoolVarP toggling +// tail-mode for --follow (`Dmesg(ctx, follow, tail bool)` on the +// wire). Operators' first instinct is `tail(1)`-style line count; +// `talm dmesg --tail=3` then surfaces +// `strconv.ParseBool: parsing "3": invalid syntax` with no hint +// at the actual contract. +// +// The original ParseBool error is wrapped (not replaced) so it +// stays reachable via errors.Unwrap and verbose fmt.Sprintf("%+v", +// err) rendering — debugging the underlying pflag failure remains +// possible while the operator-facing top line describes what --tail +// actually does and what to do instead. +func wrapDmesgCommand(wrappedCmd *cobra.Command) { + wrappedCmd.SetFlagErrorFunc(func(_ *cobra.Command, err error) error { + // Substring detection on pflag's error format. Two tokens + // — `--tail` plus `ParseBool` — together are specific + // enough to avoid false positives (pflag only emits + // ParseBool errors for bool flags). Matching on `--tail` + // without surrounding quotes is durable to pflag's + // rendering difference between shorthand-present and + // shorthand-absent flags: today's `BoolVarP(..., "tail", + // "", ...)` emits `"--tail"`, but a future upstream that + // adds a shorthand would emit `-T, --tail` and would + // otherwise silently bypass the cushion. pflag does not + // export a typed error for invalid-argument failures, so + // substring is the only available detection path. + msg := err.Error() + if strings.Contains(msg, "--tail") && strings.Contains(msg, "ParseBool") { + return errors.WithHint( + errors.Wrap(err, "talm dmesg --tail is a boolean toggle (tail-mode for --follow), not a line count"), + "for the last N lines, pipe to tail(1): `talm dmesg --nodes | tail -n N`; to stream only new messages, use `--follow --tail`", + ) + } + + return err + }) +} diff --git a/pkg/commands/kubeconfig_handler.go b/pkg/commands/kubeconfig_handler.go index e3c730f0..dd4e594a 100644 --- a/pkg/commands/kubeconfig_handler.go +++ b/pkg/commands/kubeconfig_handler.go @@ -170,11 +170,18 @@ Otherwise, kubeconfig will be written to PWD.` return nil } - // Set custom Args validation: allow no args by default, but allow args when --login is set + // Set custom Args validation: allow no args by default, but allow + // args when --login is set. The error message describes the + // wrapper's actual contract (default writes to project root; + // --login redirects to the system kubeconfig) instead of the + // previous misleading wording about "--login to pass arguments". wrappedCmd.Args = func(cmd *cobra.Command, args []string) error { loginFlagValue, _ := cmd.Flags().GetBool("login") if !loginFlagValue && len(args) > 0 { - return errors.New("kubeconfig command does not accept arguments (use --login flag to pass arguments)") + return errors.WithHint( + errors.New("talm kubeconfig writes to project root by default; positional path is not consumed in this mode"), + "to write to a specific path, pass --login /your/path (the positional argument is honoured under --login); --login with no positional defaults to ~/.kube/config (the system kubeconfig)", + ) } return nil diff --git a/pkg/commands/rotate_ca_handler.go b/pkg/commands/rotate_ca_handler.go index f7f29c06..4fb5a14d 100644 --- a/pkg/commands/rotate_ca_handler.go +++ b/pkg/commands/rotate_ca_handler.go @@ -57,7 +57,7 @@ The command works by: 3. Gracefully rolling out the new CAs to all nodes 4. Updating local configs (talosconfig, secrets.yaml, kubeconfig) -IMPORTANT: You must specify exactly ONE control-plane node via --endpoints/-e or --nodes/-n +IMPORTANT: You must specify exactly ONE control-plane node via --endpoints/-e or --nodes flags, or through a single config file (-f). The node must be a control-plane node. By default, both Talos API CA and Kubernetes API CA are rotated. Use --talos=false @@ -92,6 +92,18 @@ The command runs in dry-run mode by default. Use --dry-run=false to perform actu originalPreRunE := wrappedCmd.PreRunE wrappedCmd.PreRunE = func(cmd *cobra.Command, args []string) error { + // Populate GlobalArgs.Nodes from upstream's per-class node + // flags (--init-node / --control-plane-nodes / --worker-nodes) + // BEFORE chaining the original PreRunE. See + // populateNodesFromPerClassFlags godoc for the chain-order + // rationale (upstream WithClient guard reads from + // taloscommands.GlobalArgs after the wrapper's sync; populating + // after the sync leaves upstream blind to the per-class lists). + // Upstream rotate-ca's contract is "exactly one CP node" — the + // multi-node guard below catches the case where the populated + // list ends up with more than one entry. + populateNodesFromPerClassFlags(cmd) + // Run original PreRunE first (processes modeline, syncs GlobalArgs, etc.) if originalPreRunE != nil { if err := originalPreRunE(cmd, args); err != nil { @@ -110,7 +122,7 @@ The command runs in dry-run mode by default. Use --dry-run=false to perform actu if len(GlobalArgs.Nodes) > 1 { return errors.WithHint( errors.Newf("rotate-ca requires exactly one control-plane node, but %d nodes were provided", len(GlobalArgs.Nodes)), - "the rotate-ca command coordinates CA rotation across the entire cluster from a single control-plane node; specify only one node via --nodes or a single config file", + "the rotate-ca command coordinates CA rotation across the entire cluster from a single control-plane node; specify only one node via --nodes, --control-plane-nodes with a single IP, or a single config file", ) } diff --git a/pkg/commands/talosctl_wrapper.go b/pkg/commands/talosctl_wrapper.go index 6cc4cf3e..f701f5da 100644 --- a/pkg/commands/talosctl_wrapper.go +++ b/pkg/commands/talosctl_wrapper.go @@ -28,6 +28,106 @@ import ( "k8s.io/client-go/tools/clientcmd" ) +// crashdumpCmdName labels the upstream crashdump subcommand both +// inside the wrapper dispatch and in test fixtures that synthesise +// a wrapped crashdump. Hoisted so a future rename touches one site. +const crashdumpCmdName = "crashdump" + +// rotateCACmdName labels the upstream rotate-ca subcommand in the +// dispatch + tests. Mirrors crashdumpCmdName's hoist rationale. +const rotateCACmdName = "rotate-ca" + +// rootShadowedPersistentFlags is the set of flag names whose +// upstream PersistentFlag registrations must be dropped by the +// propagation pass. Two motivating categories: +// +// 1. **talm-root duplicates.** talm's own `registerRootFlags` +// already registers --talosconfig, --root, --context, --nodes, +// --endpoints, --cluster, --skip-verify, --version bound to +// `commands.GlobalArgs.`. Upstream's `addCommand` pattern +// (see siderolabs/talos cmd/talosctl/cmd/talos/root.go) +// registers the same names as PersistentFlags on every +// top-level command it imports, but bound to +// `taloscommands.GlobalArgs.`. Propagating those would +// shadow talm's bindings — pflag would write to upstream's +// variables, then the wrapper's PreRunE sync +// (`taloscommands.GlobalArgs = commands.GlobalArgs`) would +// OVERWRITE the just-parsed value with talm's (empty) one. +// Operators see "nodes are not set for the command" after +// passing --nodes. +// 2. **Intentional drops.** Some upstream persistent flags bind +// to `taloscommands.GlobalArgs` fields that talm does not +// model (e.g. SideroV1KeysDir). Letting them propagate would +// accept the flag, parse it into the upstream side, then have +// the sync wipe it — silent acceptance with no effect. +// Dropping forces a clean "unknown flag" error so the operator +// knows the option isn't supported in talm. +// +// Either way, the propagation skip is correct. Other upstream +// persistent flags (--namespace on imageCmd, --insecure on +// metaCmd, --overlays on image-talos-bundle, etc.) continue to +// propagate normally — they have no talm-root counterpart, bind +// to upstream's own variables which are read at upstream RunE +// time, and the sync direction doesn't disturb them. +// +//nolint:gochecknoglobals // package-level map keyed by name; cheap O(1) lookup in the propagation hot path. +var rootShadowedPersistentFlags = map[string]struct{}{ + talosconfigName: {}, + "root": {}, + "context": {}, + "nodes": {}, + "endpoints": {}, + "cluster": {}, + "skip-verify": {}, + "version": {}, + // siderov1-keys-dir is registered by upstream addCommand into + // taloscommands.GlobalArgs.SideroV1KeysDir, but talm does not + // model that auth path and never populates the field. If we + // let the upstream registration propagate, the parsed value + // would be wiped by the wrapper PreRunE's struct sync + // (`taloscommands.GlobalArgs = commands.GlobalArgs`) the same + // way --nodes was. Shadowing here means `talm + // --siderov1-keys-dir` errors with "unknown flag" — clean + // refusal instead of silent acceptance + drop. + "siderov1-keys-dir": {}, +} + +// propagatePersistentFlags mirrors the upstream command's persistent +// flags onto the wrapped command's PersistentFlags so cobra's +// mergePersistentFlags walks them through to wrapped children at +// parse time. Skips talm-root-shadowed names so the wrapper PreRunE's +// `taloscommands.GlobalArgs = commands.GlobalArgs` sync direction +// stays correct (see rootShadowedPersistentFlags godoc for the +// failure mode). The -f → -F shorthand rename mirrors the local-flag +// loop's treatment of the same collision class. +func propagatePersistentFlags(cmd, wrappedCmd *cobra.Command) { + cmd.PersistentFlags().VisitAll(func(flag *pflag.Flag) { + if _, shadowed := rootShadowedPersistentFlags[flag.Name]; shadowed { + return + } + + if flag.Shorthand == "f" { + renamed := &pflag.Flag{ + Name: flag.Name, + Usage: flag.Usage, + Value: flag.Value, + DefValue: flag.DefValue, + Changed: flag.Changed, + NoOptDefVal: flag.NoOptDefVal, + Deprecated: flag.Deprecated, + Hidden: flag.Hidden, + Shorthand: "F", + Annotations: flag.Annotations, + } + wrappedCmd.PersistentFlags().AddFlag(renamed) + + return + } + + wrappedCmd.PersistentFlags().AddFlag(flag) + }) +} + // wrapTalosCommand wraps a talosctl command to add --file flag support. // //nolint:gocognit,gocyclo,cyclop,funlen // cobra wrapper for talosctl forward; branching over (--insecure, --talosconfig override, file/template flags, modeline) is each one short branch. @@ -78,6 +178,21 @@ func wrapTalosCommand(cmd *cobra.Command, cmdName string) *cobra.Command { } }) + // Also copy persistent flags so children of this wrapped command + // inherit them through cobra's standard mergePersistentFlags() at + // parse time. cmd.Flags() above enumerates only the LOCAL flag set + // — persistent flags from the upstream parent's PersistentFlags() + // are stored separately and merged at runtime via VisitParents. + // Without this pass the wrapped tree loses every parent-registered + // persistent flag (--namespace on imageCmd, --insecure on metaCmd, + // and the rest of the ~25 flags affected by the dropped chain). + // + // The same -f → -F shorthand rename applies: defensive against a + // future upstream that registers a persistent flag with shorthand + // "f". Today no such upstream flag exists, but the cost is one + // branch and the surface stays uniform with the local-flag loop. + propagatePersistentFlags(cmd, wrappedCmd) + // Add --file flag only if it doesn't already exist in the original command var configFiles []string @@ -173,10 +288,37 @@ func wrapTalosCommand(cmd *cobra.Command, cmdName string) *cobra.Command { } // Special handling for rotate-ca command - if baseCmdName == "rotate-ca" { + if baseCmdName == rotateCACmdName { wrapRotateCACommand(wrappedCmd, originalRunE) } + // Special handling for crashdump: upstream pre-validates + // GlobalArgs.Nodes before its own RunE runs, but crashdump's + // documented shape is `--init-node` / `--control-plane-nodes` / + // `--worker-nodes` rather than `--nodes`. Populate GlobalArgs.Nodes + // from those per-class flags so the upstream guard is satisfied + // and the operator-facing deprecation message can surface. + if baseCmdName == crashdumpCmdName { + wrapCrashdumpCommand(wrappedCmd) + } + + // Special handling for dmesg: rewrite the cryptic ParseBool + // error from a numeric --tail value into a hint that describes + // upstream's actual --tail contract. + if baseCmdName == dmesgCmdName { + wrapDmesgCommand(wrappedCmd) + } + + // Special handling for the interactive-only commands + // (dashboard, edit): refuse non-tty stdin up front so the + // operator gets a clear hint instead of a no-output failure. + // dashboard would panic in tcell teardown; edit would hang in + // the kubectl external-editor helper. See wrapTUICommand + // godoc for the per-command rationale. + if baseCmdName == dashboardCmdName || baseCmdName == editCmdName { + wrapTUICommand(wrappedCmd, baseCmdName) + } + // Copy all subcommands for _, subCmd := range cmd.Commands() { wrappedCmd.AddCommand(wrapTalosCommand(subCmd, subCmd.Name())) diff --git a/pkg/commands/talosctl_wrapper_test.go b/pkg/commands/talosctl_wrapper_test.go new file mode 100644 index 00000000..e8d9e166 --- /dev/null +++ b/pkg/commands/talosctl_wrapper_test.go @@ -0,0 +1,630 @@ +// Copyright Cozystack Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commands + +import ( + "strings" + "testing" + + "github.com/cockroachdb/errors" + taloscommands "github.com/siderolabs/talos/cmd/talosctl/cmd/talos" + "github.com/spf13/cobra" +) + +// TestPropagatePersistentFlags_SkipsRootShadowedNames asserts that +// every name in rootShadowedPersistentFlags is dropped from the +// propagation pass. talm's root already registers these flags +// bound to commands.GlobalArgs.; letting upstream's PersistentFlag +// registration propagate would bind the subcommand-level parse to +// taloscommands.GlobalArgs. and then the wrapper PreRunE's +// `taloscommands.GlobalArgs = commands.GlobalArgs` sync would wipe +// it. Table-driven so adding a name to the shadow map automatically +// extends coverage. +func TestPropagatePersistentFlags_SkipsRootShadowedNames(t *testing.T) { + for name := range rootShadowedPersistentFlags { + t.Run(name, func(t *testing.T) { + upstream := &cobra.Command{Use: "upstream"} + upstream.PersistentFlags().String(name, "default", "shadowed persistent flag") + + wrapped := &cobra.Command{Use: "wrapped"} + + propagatePersistentFlags(upstream, wrapped) + + if got := wrapped.PersistentFlags().Lookup(name); got != nil { + t.Errorf("shadowed name %q must not propagate to wrappedCmd.PersistentFlags(); got %+v", name, got) + } + }) + } +} + +// TestPropagatePersistentFlags_NonShadowedPropagates is the +// companion check: a name NOT in the shadow map flows through the +// propagation pass normally. Builds a synthetic flag whose name is +// distinct from anything talm root reserves (e.g. "namespace" is +// the real upstream case but feels load-bearing in production; use +// a synthetic name so the test can't false-positive on a future +// schema collision). +func TestPropagatePersistentFlags_NonShadowedPropagates(t *testing.T) { + upstream := &cobra.Command{Use: "upstream"} + upstream.PersistentFlags().String("definitely-not-shadowed", "v", "regular upstream persistent flag") + + wrapped := &cobra.Command{Use: "wrapped"} + + propagatePersistentFlags(upstream, wrapped) + + if got := wrapped.PersistentFlags().Lookup("definitely-not-shadowed"); got == nil { + t.Fatal("non-shadowed persistent flag must propagate to wrappedCmd.PersistentFlags(); got nil") + } +} + +// TestPropagatePersistentFlags_RenamesShorthandF pins the defensive +// `-f` → `-F` rename branch in propagatePersistentFlags. Today no +// upstream persistent flag carries shorthand `f`, so this branch +// is dead code on production input — exactly the regression-trap +// shape: defensive code without coverage is the kind that gets +// "cleaned up" by a future refactor and silently re-introduces +// the collision. +// +// Build a synthetic upstream parent with --foo / -f as a persistent +// flag; propagate; assert the wrapped command has --foo with +// shorthand F (not f, which would collide with talm's own +// --file / -f flag added by the local-flag loop in wrapTalosCommand). +func TestPropagatePersistentFlags_RenamesShorthandF(t *testing.T) { + upstream := &cobra.Command{Use: "upstream"} + upstream.PersistentFlags().StringP("foo", "f", "default", "shadow shorthand f") + + wrapped := &cobra.Command{Use: "wrapped"} + + propagatePersistentFlags(upstream, wrapped) + + got := wrapped.PersistentFlags().Lookup("foo") + if got == nil { + t.Fatal("propagation must register --foo on wrappedCmd.PersistentFlags(); got nil") + } + + if got.Shorthand != "F" { + t.Errorf("propagation must rename shorthand from 'f' to 'F' to avoid collision with talm's --file / -f flag; got %q", got.Shorthand) + } +} + +// TestWrapTalosCommand_InheritsParentPersistentFlags pins the +// structural contract behind #194: when an upstream parent registers +// a persistent flag, every wrapped child must surface that flag in +// its effective flag set. cobra's mergePersistentFlags walks the +// wrapped parent's PersistentFlags() at parse time; if the wrapper +// only copies LOCAL flags, persistent ones from the upstream parent +// are silently dropped from the wrapped tree. +// +// Synthetic tree (no taloscommands dependency) so the test stays +// hermetic: build a parent with a persistent --foo, a child with a +// local --bar; wrap; assert the wrapped child sees both after +// ParseFlags. +func TestWrapTalosCommand_InheritsParentPersistentFlags(t *testing.T) { + parent := &cobra.Command{Use: "parent"} + parent.PersistentFlags().String("foo", "default", "persistent on parent") + + child := &cobra.Command{Use: "child", Run: func(_ *cobra.Command, _ []string) {}} + child.Flags().String("bar", "default", "local on child") + parent.AddCommand(child) + + wrappedParent := wrapTalosCommand(parent, "parent") + wrappedChild, _, err := wrappedParent.Find([]string{"child"}) + if err != nil { + t.Fatalf("Find child on wrapped parent: %v", err) + } + + if err := wrappedChild.ParseFlags([]string{"--foo=fromparent", "--bar=fromchild"}); err != nil { + t.Fatalf("ParseFlags on wrapped child: %v — persistent --foo from parent was dropped", err) + } + + fooFlag := wrappedChild.Flags().Lookup("foo") + if fooFlag == nil { + t.Fatal("wrapped child must see parent's persistent --foo after merge; got nil") + } + + if fooFlag.Value.String() != "fromparent" { + t.Errorf("--foo value: got %q, want %q", fooFlag.Value.String(), "fromparent") + } + + barFlag := wrappedChild.Flags().Lookup("bar") + if barFlag == nil || barFlag.Value.String() != "fromchild" { + t.Errorf("wrapped child must still see its own --bar; got %v", barFlag) + } +} + +// TestWrapTalosCommand_RealImageListPropagatesNamespace is the +// regression pin against the actual upstream surface. The upstream +// imageCmd.PersistentFlags() registers --namespace; image list is +// its subcommand. Through the talm wrapper, --namespace must remain +// resolvable on `image list`. Empirically broken before the fix +// (talm image list --help showed no --namespace). +func TestWrapTalosCommand_RealImageListPropagatesNamespace(t *testing.T) { + var imageCmd *cobra.Command + + for _, cmd := range taloscommands.Commands { + if cmd.Name() == "image" { + imageCmd = cmd + + break + } + } + + if imageCmd == nil { + t.Skip("upstream taloscommands.Commands has no 'image' command — schema changed") + } + + wrapped := wrapTalosCommand(imageCmd, "image") + + listCmd, _, err := wrapped.Find([]string{"list"}) + if err != nil { + t.Fatalf("Find list under wrapped image: %v", err) + } + + if err := listCmd.ParseFlags([]string{"--namespace", "system"}); err != nil { + t.Fatalf("ParseFlags on wrapped image list: %v — --namespace from imageCmd.PersistentFlags() was dropped", err) + } + + ns := listCmd.Flags().Lookup("namespace") + if ns == nil { + t.Fatal("wrapped image list must see --namespace from parent's PersistentFlags()") + } + + if ns.Value.String() != "system" { + t.Errorf("--namespace value: got %q, want %q", ns.Value.String(), "system") + } +} + +// TestWrapCrashdumpCommand_PrepopulatesGlobalArgsNodes pins the +// contract for #180: when crashdump's per-class node flags +// (--init-node, --control-plane-nodes, --worker-nodes) are set +// and GlobalArgs.Nodes is otherwise empty, the wrapper's PreRunE +// populates GlobalArgs.Nodes from their union so the upstream +// WithClient guard at cmd/talosctl/pkg/talos/global/client.go +// is satisfied. Without this, operators following the documented +// `talm crashdump --control-plane-nodes ` shape hit +// "nodes are not set for the command" before crashdump's own +// deprecation message can surface. +func TestWrapCrashdumpCommand_PrepopulatesGlobalArgsNodes(t *testing.T) { + savedNodes := GlobalArgs.Nodes + + t.Cleanup(func() { GlobalArgs.Nodes = savedNodes }) + + GlobalArgs.Nodes = nil + + cmd := &cobra.Command{Use: crashdumpCmdName} + cmd.Flags().String("init-node", "", "") + cmd.Flags().StringSlice("control-plane-nodes", nil, "") + cmd.Flags().StringSlice("worker-nodes", nil, "") + + wrapCrashdumpCommand(cmd) + + if err := cmd.Flags().Set("control-plane-nodes", "192.0.2.10"); err != nil { + t.Fatalf("set --control-plane-nodes: %v", err) + } + + if err := cmd.Flags().Set("worker-nodes", "192.0.2.11"); err != nil { + t.Fatalf("set --worker-nodes: %v", err) + } + + if err := cmd.PreRunE(cmd, nil); err != nil { + t.Fatalf("PreRunE returned: %v", err) + } + + if len(GlobalArgs.Nodes) != 2 { + t.Fatalf("expected GlobalArgs.Nodes populated from per-class flags; got %v", GlobalArgs.Nodes) + } + + if GlobalArgs.Nodes[0] != "192.0.2.10" || GlobalArgs.Nodes[1] != "192.0.2.11" { + t.Errorf("expected [192.0.2.10, 192.0.2.11], got %v", GlobalArgs.Nodes) + } +} + +// TestWrapCrashdumpCommand_DoesNotShadowExistingNodes pins the +// no-overwrite contract: when GlobalArgs.Nodes is already set (via +// --nodes flag or modeline), the per-class flag values are NOT +// merged in. This keeps the explicit --nodes assignment +// authoritative — same precedence as the rest of talm's wrapper +// (modeline pre-population is also no-overwrite when --nodes is +// explicit). +func TestWrapCrashdumpCommand_DoesNotShadowExistingNodes(t *testing.T) { + savedNodes := GlobalArgs.Nodes + + t.Cleanup(func() { GlobalArgs.Nodes = savedNodes }) + + GlobalArgs.Nodes = []string{"192.0.2.99"} + + cmd := &cobra.Command{Use: crashdumpCmdName} + cmd.Flags().String("init-node", "", "") + cmd.Flags().StringSlice("control-plane-nodes", nil, "") + cmd.Flags().StringSlice("worker-nodes", nil, "") + + wrapCrashdumpCommand(cmd) + + if err := cmd.Flags().Set("control-plane-nodes", "192.0.2.10"); err != nil { + t.Fatalf("set --control-plane-nodes: %v", err) + } + + if err := cmd.PreRunE(cmd, nil); err != nil { + t.Fatalf("PreRunE: %v", err) + } + + if len(GlobalArgs.Nodes) != 1 || GlobalArgs.Nodes[0] != "192.0.2.99" { + t.Errorf("explicit --nodes must take precedence; got %v", GlobalArgs.Nodes) + } +} + +// TestWrapKubeconfigCommand_PositionalPathErrorMessageMatchesContract +// pins the rewritten error message for #193. The previous wording +// claimed `use --login flag to pass arguments`, which conflated two +// distinct things: --login switches the kubeconfig target between +// local and system, it does not pass a positional path. The new +// message describes what the wrapper actually does (default writes +// to project root; --login redirects to system) and lists actual +// alternatives. +func TestWrapKubeconfigCommand_PositionalPathErrorMessageMatchesContract(t *testing.T) { + var kubeconfigCmd *cobra.Command + + for _, cmd := range taloscommands.Commands { + if cmd.Name() == defaultKubeconfigName { + kubeconfigCmd = cmd + + break + } + } + + if kubeconfigCmd == nil { + t.Skipf("upstream taloscommands.Commands has no %q command", defaultKubeconfigName) + } + + wrapped := wrapTalosCommand(kubeconfigCmd, defaultKubeconfigName) + + err := wrapped.Args(wrapped, []string{"/some/positional/path"}) + if err == nil { + t.Fatal("kubeconfig with positional path must error when --login is unset") + } + + body := err.Error() + if strings.Contains(body, "use --login flag to pass arguments") { + t.Errorf("error body still carries the misleading '--login to pass arguments' wording; got: %q", body) + } + + // Body must mention project root (default behaviour) so the + // operator understands what default mode actually does. + if !strings.Contains(strings.ToLower(body), "project root") { + t.Errorf("error body must describe the default destination (project root); got: %q", body) + } + + // cockroachdb/errors.WithHint stores the hint chain separately + // from the body; err.Error() returns only the body. Walk the + // hint chain via GetAllHints to inspect the operator-facing + // guidance. + hints := strings.Join(errors.GetAllHints(err), "\n") + + // Hint must NOT suggest stdout redirection. The kubeconfig is + // written to the filesystem path (not stdout), so + // `talm kubeconfig > /path` would leave the operator with an + // empty /path and an unexpected ./kubeconfig in the project + // root. Pin against the previous misleading advice. + if strings.Contains(strings.ToLower(hints), "redirect stdout") || strings.Contains(hints, "kubeconfig > /") { + t.Errorf("hint must not suggest stdout redirection; kubeconfig is written to filesystem path. got hints: %q", hints) + } + + // Hint must describe the actual --login workflow: positional + // path is honoured under --login, and --login with no + // positional defaults to ~/.kube/config (the system + // kubeconfig). Pinning either "--login /" (suggesting a path + // after --login) or "~/.kube/config" verifies the workflow + // reaches the operator accurately. + if !strings.Contains(hints, "--login /") && !strings.Contains(hints, "~/.kube/config") { + t.Errorf("hint must describe the --login workflow (pass --login /path or rely on the ~/.kube/config default); got hints: %q", hints) + } +} + +// TestWrapDmesgCommand_TailEqualsNumeric_RewritesError pins the +// FlagErrorFunc cushion for #195. Upstream talosctl registers +// --tail as a BoolVarP (toggling tail-mode for --follow), but +// operators' first instinct is tail(1)-style line count. `--tail=3` +// trips pflag's ParseBool and surfaces a cryptic +// 'strconv.ParseBool: parsing "3": invalid syntax' error. +// +// The wrapper installs a FlagErrorFunc that detects this specific +// pattern and rewrites it with a hint pointing at the actual +// upstream contract + a pipe-to-tail(1) suggestion for the +// line-count case operators usually want. +func TestWrapDmesgCommand_TailEqualsNumeric_RewritesError(t *testing.T) { + var dmesgCmd *cobra.Command + + for _, cmd := range taloscommands.Commands { + if cmd.Name() == "dmesg" { + dmesgCmd = cmd + + break + } + } + + if dmesgCmd == nil { + t.Skip("upstream taloscommands.Commands has no 'dmesg' command — schema changed") + } + + wrapped := wrapTalosCommand(dmesgCmd, "dmesg") + + // FlagErrorFunc is invoked by cobra's Execute() lifecycle on a + // ParseFlags failure. Reproduce that path by capturing the + // ParseFlags error and feeding it through the registered hook — + // this is exactly what cobra does internally in command.execute. + rawErr := wrapped.ParseFlags([]string{"--tail=3"}) + if rawErr == nil { + t.Fatal("--tail=3 must error from pflag (upstream --tail is bool); got nil") + } + + hook := wrapped.FlagErrorFunc() + if hook == nil { + t.Fatal("wrapped dmesg must register a FlagErrorFunc to rewrite ParseBool errors") + } + + err := hook(wrapped, rawErr) + + msg := err.Error() + + // Top-level message must describe the actual contract + // (boolean toggle for --follow) so the operator sees it first + // before any inherited ParseBool noise. + if !strings.Contains(strings.ToLower(msg), "boolean") && !strings.Contains(strings.ToLower(msg), "follow") { + t.Errorf("rewritten error must describe --tail's actual contract (boolean toggle for --follow); got: %q", msg) + } + + // Chain must preserve the original ParseBool error via + // errors.Wrap so the underlying pflag failure remains + // reachable for verbose debugging (errors.Unwrap, + // fmt.Sprintf("%+v", err), etc.). Without wrap-not-replace, + // downstream tooling that walks the chain or matches against + // the upstream error type loses information. + if !strings.Contains(msg, "ParseBool") { + t.Errorf("rewritten error must wrap (not replace) the original ParseBool error so the chain stays inspectable; got: %q", msg) + } + + if unwrapped := errors.Unwrap(err); unwrapped == nil { + t.Errorf("rewritten error must be unwrappable to the original; errors.Unwrap returned nil for %q", msg) + } +} + +// TestWrapTUICommand_NonTTY_RefusesWithHint pins the cushion for +// #183 across BOTH wrapped interactive-only commands (dashboard, +// edit). Each has a different upstream failure mechanism: +// dashboard panics in tcell teardown, edit hangs in the kubectl +// external-editor helper. The refusal here is the same shape for +// both — clear cobra-surfaced error with operator-facing hint — +// and the cmdLabel substitution lets the message correlate to +// the command the operator typed. +// +// Table-driven so the dispatch in wrapTalosCommand (which routes +// both dashboardCmdName and editCmdName through wrapTUICommand) +// is exercised symmetrically. A future refactor that hardcodes +// one branch or flips the OR silently breaks one side without +// failing the matrix. +func TestWrapTUICommand_NonTTY_RefusesWithHint(t *testing.T) { + savedStdinIsTTY := stdinIsTTY + + t.Cleanup(func() { stdinIsTTY = savedStdinIsTTY }) + + stdinIsTTY = func() bool { return false } + + tests := []string{dashboardCmdName, editCmdName} + for _, label := range tests { + t.Run(label, func(t *testing.T) { + cmd := &cobra.Command{Use: label} + wrapTUICommand(cmd, label) + + err := cmd.PreRunE(cmd, nil) + if err == nil { + t.Fatalf("non-tty stdin must refuse %s up front; got nil", label) + } + + msg := err.Error() + if !strings.Contains(strings.ToLower(msg), "tty") && !strings.Contains(strings.ToLower(msg), "terminal") { + t.Errorf("refusal must mention tty/terminal so the operator can correlate; got: %q", msg) + } + + // cmdLabel must appear in the message so an operator + // running CI logs can grep for the command name and + // land on the refusal directly. Pins the label- + // substitution contract against a future refactor + // that hardcodes the message. + if !strings.Contains(msg, label) { + t.Errorf("refusal must include the command label %q so the operator can correlate; got: %q", label, msg) + } + }) + } +} + +// TestWrapRotateCACommand_LongDoesNotReferenceDroppedNShorthand +// pins the rotate-ca help text against the -n shorthand drop. The +// Long previously said "specify exactly ONE control-plane node via +// --endpoints/-e or --nodes/-n" — after the shorthand drop the +// reference to `-n` is stale and would teach operators a flag +// shape that errors out at parse time. Catches the same class of +// drift on the next change. +func TestWrapRotateCACommand_LongDoesNotReferenceDroppedNShorthand(t *testing.T) { + cmd := &cobra.Command{Use: rotateCACmdName} + cmd.Flags().Bool("with-docs", true, "") + cmd.Flags().Bool("with-examples", true, "") + cmd.PreRunE = func(_ *cobra.Command, _ []string) error { return nil } + + wrapRotateCACommand(cmd, nil) + + for _, banned := range []string{"--nodes/-n", " -n,", " -n "} { + if strings.Contains(cmd.Long, banned) { + t.Errorf("rotate-ca Long must not reference the dropped -n shorthand; found %q in:\n%s", banned, cmd.Long) + } + } +} + +// TestWrapRotateCACommand_PerClassFlagsPopulateNodes pins the +// rotate-ca extension to the per-class populate logic. Like +// crashdump, rotate-ca's upstream registers --init-node / +// --control-plane-nodes / --worker-nodes (its API surface for +// CA rotation against a heterogeneous cluster), and the upstream +// WithClient guard pre-validates GlobalArgs.Nodes regardless of +// whether the operator used the global --nodes or a per-class +// flag. Without populating, operators following the documented +// `rotate-ca --control-plane-nodes X` shape hit the nodes-not-set +// guard before rotate-ca's RunE runs. +// +// Unlike crashdump (which collects all per-class flags as a +// diagnostic union), rotate-ca's contract is "exactly one CP +// node". The populate puts whatever the operator passed into +// GlobalArgs.Nodes; the existing multi-node guard then catches +// the case where multiple were passed and returns the same hint +// it always has. +func TestWrapRotateCACommand_PerClassFlagsPopulateNodes(t *testing.T) { + savedNodes := GlobalArgs.Nodes + + t.Cleanup(func() { GlobalArgs.Nodes = savedNodes }) + + GlobalArgs.Nodes = nil + + cmd := &cobra.Command{Use: rotateCACmdName} + cmd.Flags().String("init-node", "", "") + cmd.Flags().StringSlice("control-plane-nodes", nil, "") + cmd.Flags().StringSlice("worker-nodes", nil, "") + cmd.PreRunE = func(_ *cobra.Command, _ []string) error { return nil } + + wrapRotateCACommand(cmd, nil) + + if err := cmd.Flags().Set("control-plane-nodes", "192.0.2.10"); err != nil { + t.Fatalf("set --control-plane-nodes: %v", err) + } + + if err := cmd.PreRunE(cmd, nil); err != nil { + t.Fatalf("PreRunE returned: %v", err) + } + + if len(GlobalArgs.Nodes) != 1 || GlobalArgs.Nodes[0] != "192.0.2.10" { + t.Errorf("expected GlobalArgs.Nodes populated from --control-plane-nodes; got %v", GlobalArgs.Nodes) + } +} + +// TestWrapTUICommand_TTY_PassesThrough pins the no-op contract: +// when stdin IS a terminal the wrapper does NOT interfere with +// the command's normal flow. +func TestWrapTUICommand_TTY_PassesThrough(t *testing.T) { + savedStdinIsTTY := stdinIsTTY + + t.Cleanup(func() { stdinIsTTY = savedStdinIsTTY }) + + stdinIsTTY = func() bool { return true } + + cmd := &cobra.Command{Use: "dashboard"} + wrapTUICommand(cmd, "dashboard") + + if err := cmd.PreRunE(cmd, nil); err != nil { + t.Errorf("tty path must pass through; got %v", err) + } +} + +// TestWrapTalosCommand_RealCrashdumpPopulatesNodesFromControlPlane +// is the real-upstream-path companion to the synthetic crashdump +// populate test. Wraps the actual taloscommands.Commands entry +// for `crashdump` and confirms that --control-plane-nodes +// populates GlobalArgs.Nodes via the full wrapper chain. +// +// Matters because the dispatch ordering in wrapTalosCommand +// (wrapCrashdumpCommand installed AFTER wrapTalosCommand's +// PreRunE assignment so populate runs before the sync) is +// untested in isolation. A refactor that flips that order +// silently breaks the cushion — only the real-path test catches +// it. +func TestWrapTalosCommand_RealCrashdumpPopulatesNodesFromControlPlane(t *testing.T) { + savedNodes := GlobalArgs.Nodes + + t.Cleanup(func() { GlobalArgs.Nodes = savedNodes }) + + var crashdumpCmd *cobra.Command + + for _, cmd := range taloscommands.Commands { + if cmd.Name() == crashdumpCmdName { + crashdumpCmd = cmd + + break + } + } + + if crashdumpCmd == nil { + t.Skipf("upstream taloscommands.Commands has no %q command", crashdumpCmdName) + } + + GlobalArgs.Nodes = nil + + wrapped := wrapTalosCommand(crashdumpCmd, crashdumpCmdName) + + if err := wrapped.Flags().Set("control-plane-nodes", "192.0.2.10"); err != nil { + t.Fatalf("set --control-plane-nodes: %v", err) + } + + if err := wrapped.PreRunE(wrapped, nil); err != nil { + t.Fatalf("PreRunE returned: %v", err) + } + + if len(GlobalArgs.Nodes) == 0 { + t.Fatal("real-upstream crashdump must populate GlobalArgs.Nodes from --control-plane-nodes; got empty") + } + + if GlobalArgs.Nodes[0] != "192.0.2.10" { + t.Errorf("populated node value: got %q, want 192.0.2.10", GlobalArgs.Nodes[0]) + } +} + +// TestWrapTalosCommand_RealMetaWritePropagatesInsecure pins the +// persistent-shorthand path: metaCmd.PersistentFlags() registers +// -i / --insecure with shorthand "i". Through the wrapper, both +// the long and short forms must resolve on `meta write`. Pinning +// the shorthand catches a future regression where the wrapper +// might copy the long flag but lose the shorthand attribute. +func TestWrapTalosCommand_RealMetaWritePropagatesInsecure(t *testing.T) { + var metaCmd *cobra.Command + + for _, cmd := range taloscommands.Commands { + if cmd.Name() == "meta" { + metaCmd = cmd + + break + } + } + + if metaCmd == nil { + t.Skip("upstream taloscommands.Commands has no 'meta' command — schema changed") + } + + wrapped := wrapTalosCommand(metaCmd, "meta") + + writeCmd, _, err := wrapped.Find([]string{"write"}) + if err != nil { + t.Fatalf("Find write under wrapped meta: %v", err) + } + + // Long form. + if err := writeCmd.ParseFlags([]string{"--insecure"}); err != nil { + t.Fatalf("ParseFlags --insecure on wrapped meta write: %v", err) + } + + if writeCmd.Flags().Lookup("insecure") == nil { + t.Fatal("wrapped meta write must see --insecure from metaCmd.PersistentFlags()") + } + + // Short form. Re-parse to exercise the -i alias path. + if err := writeCmd.ParseFlags([]string{"-i"}); err != nil { + t.Errorf("ParseFlags -i on wrapped meta write: %v — shorthand attribute lost during copy?", err) + } +} diff --git a/pkg/commands/tui_handler.go b/pkg/commands/tui_handler.go new file mode 100644 index 00000000..73abe972 --- /dev/null +++ b/pkg/commands/tui_handler.go @@ -0,0 +1,92 @@ +// Copyright Cozystack Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package commands + +import ( + "os" + + "github.com/cockroachdb/errors" + "github.com/spf13/cobra" + "golang.org/x/term" +) + +// Names of the TUI-style subcommands the wrapper refuses on +// non-tty stdin. Hoisted so the dispatch + tests share one +// canonical list. +const ( + dashboardCmdName = "dashboard" + editCmdName = "edit" +) + +// stdinIsTTY reports whether stdin is currently attached to a +// terminal. Defined as a package-level var so tests can stub it +// (the term package's IsTerminal touches the real fd and can't be +// faked otherwise). +// +//nolint:gochecknoglobals // function-type indirection for test injection; the same pattern is used by linksDisksReader / machineConfigReader / versionReader elsewhere in the package. +var stdinIsTTY = func() bool { + return term.IsTerminal(int(os.Stdin.Fd())) +} + +// wrapTUICommand installs a PreRunE that refuses the wrapped +// interactive-only command when stdin is not attached to a +// terminal. Two upstream commands take this path with different +// failure mechanisms: +// +// - `dashboard` uses gdamore/tcell directly. Without a tty it +// panics inside `tScreen.finish` with `close of nil channel` +// on teardown — operators running under CI / piped stdin / +// `< /dev/null` see a Go stack trace. +// - `edit` shells out to the kubectl external-editor helper +// (`k8s.io/kubectl/pkg/cmd/util/editor`). Without a tty it +// hangs allocating an editor session — operators see no +// output and have to ^C. +// +// Both manifest as no-actionable-signal failures the operator +// can't diagnose; the refusal here surfaces a clear hint instead. +// +// The cmdLabel parameter ("dashboard" / "edit") customises the +// refusal copy so the operator's correlation between the command +// they ran and the error they got is immediate. +// +// The PreRunE chains the original PreRunE first (modeline, +// GlobalArgs sync). If stdin is a terminal, behaviour is unchanged +// from upstream. +func wrapTUICommand(wrappedCmd *cobra.Command, cmdLabel string) { + originalPreRunE := wrappedCmd.PreRunE + + wrappedCmd.PreRunE = func(cmd *cobra.Command, args []string) error { + // TTY check runs BEFORE chaining the original PreRunE. + // Inverting the obvious chain order is deliberate: + // originalPreRunE is wrapTalosCommand's closure, which + // reads modeline files and mutates package-level + // GlobalArgs. For a non-tty invocation we know the + // command will be refused — there's no point parsing + // modeline files or mutating shared state. Refuse fast, + // touch nothing. + if !stdinIsTTY() { + return errors.WithHint( + errors.Newf("talm %s requires an interactive terminal; stdin is not a tty", cmdLabel), + "run from a regular terminal (not a CI job, piped stdin, or `< /dev/null`); for non-interactive resource inspection use `talm get` instead", + ) + } + + if originalPreRunE != nil { + return originalPreRunE(cmd, args) + } + + return nil + } +} From 4ecd2df58cb584695bce629cea403b85cfd37f54 Mon Sep 17 00:00:00 2001 From: Aleksei Sviridkin Date: Tue, 12 May 2026 16:51:54 +0300 Subject: [PATCH 2/2] fix(commands): preserve ShorthandDeprecated in flag clones and dedupe via helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address review feedback from gemini-code-assist on pkg/commands/talosctl_wrapper.go:121: the manual pflag.Flag clone used for the -f -> -F shorthand rename was missing the ShorthandDeprecated field. Without it, an upstream flag that marks its short form as deprecated would have that deprecation lost on the wrapped clone, and pflag's MarkShorthandDeprecated check (flag.go:719) would re-emit the shorthand in help output. Extract a renameFlagShorthand helper that mirrors every pflag.Flag metadata field, and use it from both the local-flag and persistent-flag copy loops. Value is intentionally shared (not deep-copied) — the parser must write to upstream's variable so the wrapped RunE (assigned from upstream) can still read its own state via cmd.Flags().Changed/.Value lookups against the wrapped command instance cobra passes at runtime. Assisted-By: Claude Signed-off-by: Aleksei Sviridkin --- pkg/commands/talosctl_wrapper.go | 53 ++++++++++++++++---------------- 1 file changed, 26 insertions(+), 27 deletions(-) diff --git a/pkg/commands/talosctl_wrapper.go b/pkg/commands/talosctl_wrapper.go index f701f5da..4b0f3ea9 100644 --- a/pkg/commands/talosctl_wrapper.go +++ b/pkg/commands/talosctl_wrapper.go @@ -92,6 +92,30 @@ var rootShadowedPersistentFlags = map[string]struct{}{ "siderov1-keys-dir": {}, } +// renameFlagShorthand clones an upstream flag with its shorthand +// replaced by `newShorthand`. Every pflag.Flag metadata field — +// including ShorthandDeprecated, which controls upstream's deprecation +// warning behaviour — is mirrored so downstream consumers that +// introspect the wrapped flag observe the same shape as the original. +// Value is intentionally NOT deep-copied: the parser must write to +// upstream's variable so the wrapped RunE (assigned from upstream) +// can still read its own state. +func renameFlagShorthand(flag *pflag.Flag, newShorthand string) *pflag.Flag { + return &pflag.Flag{ + Name: flag.Name, + Usage: flag.Usage, + Value: flag.Value, + DefValue: flag.DefValue, + Changed: flag.Changed, + NoOptDefVal: flag.NoOptDefVal, + Deprecated: flag.Deprecated, + Hidden: flag.Hidden, + Shorthand: newShorthand, + ShorthandDeprecated: flag.ShorthandDeprecated, + Annotations: flag.Annotations, + } +} + // propagatePersistentFlags mirrors the upstream command's persistent // flags onto the wrapped command's PersistentFlags so cobra's // mergePersistentFlags walks them through to wrapped children at @@ -107,19 +131,7 @@ func propagatePersistentFlags(cmd, wrappedCmd *cobra.Command) { } if flag.Shorthand == "f" { - renamed := &pflag.Flag{ - Name: flag.Name, - Usage: flag.Usage, - Value: flag.Value, - DefValue: flag.DefValue, - Changed: flag.Changed, - NoOptDefVal: flag.NoOptDefVal, - Deprecated: flag.Deprecated, - Hidden: flag.Hidden, - Shorthand: "F", - Annotations: flag.Annotations, - } - wrappedCmd.PersistentFlags().AddFlag(renamed) + wrappedCmd.PersistentFlags().AddFlag(renameFlagShorthand(flag, "F")) return } @@ -159,20 +171,7 @@ func wrapTalosCommand(cmd *cobra.Command, cmdName string) *cobra.Command { // If this flag has shorthand 'f', we need to change it to 'F' if flag.Shorthand == "f" { - // Create a copy with new shorthand - newFlag := &pflag.Flag{ - Name: flag.Name, - Usage: flag.Usage, - Value: flag.Value, - DefValue: flag.DefValue, - Changed: flag.Changed, - NoOptDefVal: flag.NoOptDefVal, // Important for bool flags to work without '=' - Deprecated: flag.Deprecated, - Hidden: flag.Hidden, - Shorthand: "F", // Change shorthand from 'f' to 'F' - Annotations: flag.Annotations, - } - wrappedCmd.Flags().AddFlag(newFlag) + wrappedCmd.Flags().AddFlag(renameFlagShorthand(flag, "F")) } else { wrappedCmd.Flags().AddFlag(flag) }