diff --git a/go.mod b/go.mod index 445feaf4..e65912ff 100644 --- a/go.mod +++ b/go.mod @@ -29,7 +29,7 @@ require ( github.com/mitchellh/go-ps v1.0.0 github.com/nais/device v0.0.0-20250703090236-08bd8b276591 github.com/nais/liberator v0.0.0-20250703075635-7da81032e1ae - github.com/nais/naistrix v0.3.1 + github.com/nais/naistrix v0.4.0 github.com/pterm/pterm v0.12.81 github.com/savioxavier/termlink v1.4.3 github.com/sethvargo/go-retry v0.3.0 @@ -70,6 +70,7 @@ require ( dario.cat/mergo v1.0.2 // indirect github.com/AlekSi/pointer v1.2.0 // indirect github.com/BurntSushi/toml v1.4.1-0.20240526193622-a339e1f7089c // indirect + github.com/MakeNowJust/heredoc v1.0.0 // indirect github.com/Masterminds/goutils v1.1.1 // indirect github.com/Masterminds/semver/v3 v3.3.1 // indirect github.com/Masterminds/sprig/v3 v3.3.0 // indirect diff --git a/go.sum b/go.sum index 83904782..251ecb69 100644 --- a/go.sum +++ b/go.sum @@ -40,6 +40,8 @@ github.com/GoogleCloudPlatform/cloudsql-proxy v1.37.7 h1:+ugXZyYXIb2NFWaXleLbouy github.com/GoogleCloudPlatform/cloudsql-proxy v1.37.7/go.mod h1:9iAgV6bpVWBq2BpG4J691fNCdKYd4JqR5ey9m4odfHE= github.com/Khan/genqlient v0.8.1 h1:wtOCc8N9rNynRLXN3k3CnfzheCUNKBcvXmVv5zt6WCs= github.com/Khan/genqlient v0.8.1/go.mod h1:R2G6DzjBvCbhjsEajfRjbWdVglSH/73kSivC9TLWVjU= +github.com/MakeNowJust/heredoc v1.0.0 h1:cXCdzVdstXyiTqTvfqk9SDHpKNjxuom+DOlyEeQ4pzQ= +github.com/MakeNowJust/heredoc v1.0.0/go.mod h1:mG5amYoWBHf8vpLOuehzbGGw0EHxpZZ6lCpQ4fNJ8LE= github.com/MakeNowJust/heredoc/v2 v2.0.1 h1:rlCHh70XXXv7toz95ajQWOWQnN4WNLt0TdpZYIR/J6A= github.com/MakeNowJust/heredoc/v2 v2.0.1/go.mod h1:6/2Abh5s+hc3g9nbWLe9ObDIOhaRrqsyY9MWy+4JdRM= github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs= @@ -414,8 +416,8 @@ github.com/nais/device v0.0.0-20250703090236-08bd8b276591 h1:MgFQR6KwI6nB8Dl8c6I github.com/nais/device v0.0.0-20250703090236-08bd8b276591/go.mod h1:5EJDz4LiIliyvsUZb8gRDwpqihR7r4J+T4pJxUjBc9I= github.com/nais/liberator v0.0.0-20250703075635-7da81032e1ae h1:aZqDazMXxDdL1qSG9SpTMm+WZYY0RJp0yTrs8TSzJ+0= github.com/nais/liberator v0.0.0-20250703075635-7da81032e1ae/go.mod h1:Z0ycpT5Ug9Edacd5173bShlh9vCtogCwoLCbjv/pz18= -github.com/nais/naistrix v0.3.1 h1:9/S297620cAtbFRN51105kJNKKVBXasa2ALmWbrITlM= -github.com/nais/naistrix v0.3.1/go.mod h1:lLHnRUy/wzrjr79pH1PqdzUDc3N7hp9Ex+H76RytWAw= +github.com/nais/naistrix v0.4.0 h1:Pf3bdnhWSnVDqwoJ496ZyupXvv5Gf4uu6BKXhFmj0CA= +github.com/nais/naistrix v0.4.0/go.mod h1:KQj7//htD3ZW/93v+6eyHjL/x6w93sc5dUBFCC/QdKY= github.com/onsi/ginkgo/v2 v2.23.4 h1:ktYTpKJAVZnDT4VjxSbiBenUjmlL/5QkBEocaWXiQus= github.com/onsi/ginkgo/v2 v2.23.4/go.mod h1:Bt66ApGPBFzHyR+JO10Zbt0Gsp4uWxu5mIOTusL46e8= github.com/onsi/gomega v1.37.0 h1:CdEG8g0S133B4OswTDC/5XPSzE1OeP29QOioj2PID2Y= diff --git a/internal/application/application.go b/internal/application/application.go index 47c92ef6..43f1f2e0 100644 --- a/internal/application/application.go +++ b/internal/application/application.go @@ -44,7 +44,7 @@ func newApplication(flags *root.Flags) *naistrix.Application { func Run(ctx context.Context, w io.Writer) error { flags := &root.Flags{} app := newApplication(flags) - executedCommand, err := app.Run(ctx, naistrix.NewWriter(w), os.Args[1:]) + err := app.Run(naistrix.RunWithContext(ctx), naistrix.RunWithOutput(naistrix.NewWriter(w))) autoComplete := slices.Contains(os.Args[1:], "__complete") if !autoComplete { @@ -57,7 +57,7 @@ func Run(ctx context.Context, w io.Writer) error { }() } - if !autoComplete && executedCommand != nil { + if executedCommand := app.ExecutedCommand(); !autoComplete && executedCommand != nil { collectCommandHistogram(ctx, executedCommand, err) } diff --git a/internal/application/application_test.go b/internal/application/application_test.go index 3b20847a..baaaebed 100644 --- a/internal/application/application_test.go +++ b/internal/application/application_test.go @@ -30,7 +30,7 @@ func runCommand(t *testing.T, ctx context.Context, cmd *naistrix.Command, parent t.Fatalf("failed to run command %q: %v", strings.Join(helpCmd, " "), err) } }() - _, err := newApplication(&root.Flags{}).Run(ctx, naistrix.Discard(), helpCmd) + err := newApplication(&root.Flags{}).Run(naistrix.RunWithContext(ctx), naistrix.RunWithOutput(naistrix.Discard()), naistrix.RunWithArgs(helpCmd)) if err != nil { t.Fatalf("failed to run command %s: %v", strings.Join(helpCmd, " "), err) } diff --git a/internal/debug/command.go b/internal/debug/command.go index 9bd412e6..4fc8d913 100644 --- a/internal/debug/command.go +++ b/internal/debug/command.go @@ -1,6 +1,7 @@ package debug import ( + "context" "fmt" "github.com/nais/cli/internal/debug/command/flag" @@ -8,23 +9,25 @@ import ( "k8s.io/client-go/kubernetes" ) -const debugImageDefault = "europe-north1-docker.pkg.dev/nais-io/nais/images/debug:latest" - -func Run(workloadName string, flags *flag.Debug) error { - clientSet, err := SetupClient(flags.DebugSticky, flags.Context) +func Run(ctx context.Context, workloadName string, flags *flag.Debug) error { + clientSet, err := SetupClient(flags, flags.Context) if err != nil { return err } - dg := Setup(clientSet, flags.DebugSticky, workloadName, debugImageDefault, flags.ByPod) - if err := dg.Debug(); err != nil { + dg := &Debug{ + podsClient: clientSet.CoreV1().Pods(flags.Namespace), + flags: flags, + workloadName: workloadName, + } + if err := dg.Debug(ctx); err != nil { return fmt.Errorf("debugging instance: %w", err) } return nil } -func SetupClient(flags *flag.DebugSticky, cluster flag.Context) (kubernetes.Interface, error) { +func SetupClient(flags *flag.Debug, cluster flag.Context) (kubernetes.Interface, error) { client := k8s.SetupControllerRuntimeClient(k8s.WithKubeContext(string(cluster))) if flags.Namespace == "" { @@ -32,7 +35,7 @@ func SetupClient(flags *flag.DebugSticky, cluster flag.Context) (kubernetes.Inte } if cluster != "" { - flags.Context = flag.Context(cluster) + flags.Context = cluster } clientSet, err := k8s.SetupClientGo(string(cluster)) diff --git a/internal/debug/command/debug.go b/internal/debug/command/debug.go index c33d01d2..64d7bdc0 100644 --- a/internal/debug/command/debug.go +++ b/internal/debug/command/debug.go @@ -2,11 +2,11 @@ package command import ( "context" + "time" "github.com/MakeNowJust/heredoc/v2" "github.com/nais/cli/internal/debug" "github.com/nais/cli/internal/debug/command/flag" - "github.com/nais/cli/internal/debug/tidy" "github.com/nais/cli/internal/k8s" "github.com/nais/cli/internal/root" "github.com/nais/naistrix" @@ -14,58 +14,28 @@ import ( func Debug(parentFlags *root.Flags) *naistrix.Command { defaultContext, defaultNamespace := k8s.GetDefaultContextAndNamespace() - stickyFlags := &flag.DebugSticky{ + flags := &flag.Debug{ Flags: parentFlags, Context: flag.Context(defaultContext), Namespace: defaultNamespace, - } - - debugFlags := &flag.Debug{ - DebugSticky: stickyFlags, + TTL: 24 * time.Hour, + Timeout: 30 * time.Second, } return &naistrix.Command{ Name: "debug", Title: "Create and attach to a debug container.", Description: heredoc.Doc(` - When flag "--copy" is set, the command can be used to debug a copy of the original pod, allowing you to troubleshoot without affecting the live pod. + When "--copy" is used the command can be used to debug a copy of the original pod, allowing you to troubleshoot without affecting the live pod. To debug a live pod, run the command without the "--copy" flag. - - You can only reconnect to the debug session if the pod is running. - `), - Args: []naistrix.Argument{ - {Name: "app_name"}, - }, - Flags: debugFlags, - StickyFlags: stickyFlags, - RunFunc: func(ctx context.Context, out naistrix.Output, args []string) error { - return debug.Run(args[0], debugFlags) - }, - SubCommands: []*naistrix.Command{ - tidyCommand(stickyFlags), - }, - } -} - -func tidyCommand(parentFlags *flag.DebugSticky) *naistrix.Command { - flags := &flag.DebugTidy{ - DebugSticky: parentFlags, - } - return &naistrix.Command{ - Name: "tidy", - Title: "Clean up debug containers and debug pods.", - Description: heredoc.Doc(` - Remove debug containers created by the "nais debug" command. - - Set the "--copy" flag to delete copy pods. `), Args: []naistrix.Argument{ {Name: "app_name"}, }, Flags: flags, RunFunc: func(ctx context.Context, out naistrix.Output, args []string) error { - return tidy.Run(args[0], flags) + return debug.Run(ctx, args[0], flags) }, } } diff --git a/internal/debug/command/flag/flag.go b/internal/debug/command/flag/flag.go index a3ac355e..86a03179 100644 --- a/internal/debug/command/flag/flag.go +++ b/internal/debug/command/flag/flag.go @@ -1,24 +1,18 @@ package flag import ( + "time" + "github.com/nais/cli/internal/root" ) -type ( - Context string - DebugSticky struct { - *root.Flags - Context Context `name:"context" short:"c" usage:"The kubeconfig |CONTEXT| to use. Defaults to current context."` - Namespace string `name:"namespace" short:"n" usage:"The kubernetes |NAMESPACE| to use. Defaults to current namespace."` - Copy bool `name:"copy" usage:"Create a copy of the pod with a debug container. The original pod remains running and unaffected."` - } -) +type Context string type Debug struct { - *DebugSticky - ByPod bool `name:"by-pod" short:"b" usage:"Attach to a specific |BY-POD| in a workload."` -} - -type DebugTidy struct { - *DebugSticky + *root.Flags + Context Context `short:"c" usage:"The kubeconfig |context| to use. Defaults to current context."` + Namespace string `short:"n" usage:"The kubernetes |namespace| to use. Defaults to current namespace."` + Copy bool `usage:"Create a copy of the pod with a debug container. The original pod remains running and unaffected."` + TTL time.Duration `usage:"|Duration| the debug pod remains after exit. Only has effect when --copy is specified."` + Timeout time.Duration `usage:"|Duration| to wait for each remote interaction this command does. Usually the default is sufficient."` } diff --git a/internal/debug/debug.go b/internal/debug/debug.go index 0b4ab1e3..759e9513 100644 --- a/internal/debug/debug.go +++ b/internal/debug/debug.go @@ -9,225 +9,316 @@ import ( "time" "github.com/nais/cli/internal/debug/command/flag" + "github.com/nais/cli/internal/task" "github.com/pterm/pterm" - core_v1 "k8s.io/api/core/v1" + corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/client-go/kubernetes" + v1 "k8s.io/client-go/kubernetes/typed/core/v1" ) const ( - debuggerSuffix = "nais-debugger" - debuggerContainerDefaultName = "debugger" + // debugImage is the image used for the debug container. + debugImage = "europe-north1-docker.pkg.dev/nais-io/nais/images/debug:latest" + + // debugPodSuffix will be appended to the pod name when creating a debug pod. + debugPodSuffix = "nais-debugger" + + // debugPodContainerName is the name of the container that will be created in the debug pod. This name is not used + // when creating ephemeral debug containers. + debugPodContainerName = "debugger" ) type Debug struct { - ctx context.Context - client kubernetes.Interface - flags *flag.DebugSticky + podsClient v1.PodInterface + flags *flag.Debug workloadName string - debugImage string - byPod bool } -func Setup(client kubernetes.Interface, flags *flag.DebugSticky, workloadName, debugImage string, byPod bool) *Debug { - return &Debug{ - ctx: context.Background(), - client: client, - flags: flags, - workloadName: workloadName, - debugImage: debugImage, - byPod: byPod, +func (d *Debug) Debug(ctx context.Context) error { + pods, err := task.Timed(ctx, d.flags.Timeout, "Fetching pods for workload", func(ctx context.Context) (*corev1.PodList, error) { + return d.getPodsForWorkload(ctx) + }) + if err != nil { + pterm.Error.Println("Failed to get pods for workload") + return err + } + + if len(pods.Items) == 0 { + pterm.Info.Println("No pods found.") + return nil + } + + pod, err := interactiveSelectPod(pods.Items) + if err != nil { + pterm.Error.Println("Failed to select pod") + return err + } + + if err := d.debugPod(ctx, *pod); err != nil { + pterm.Error.Println("Failed to debug pod") + return err } + + return nil } -func (d *Debug) getPodsForWorkload() (*core_v1.PodList, error) { - pterm.Info.Println("Fetching workload...") - var podList *core_v1.PodList - var err error - podList, err = d.client.CoreV1().Pods(d.flags.Namespace).List(d.ctx, metav1.ListOptions{ - LabelSelector: fmt.Sprintf("app.kubernetes.io/name=%s", d.workloadName), - }) - if len(podList.Items) == 0 { - podList, err = d.client.CoreV1().Pods(d.flags.Namespace).List(d.ctx, metav1.ListOptions{ - LabelSelector: fmt.Sprintf("app=%s", d.workloadName), - }) +func (d *Debug) getPodsForWorkload(ctx context.Context) (*corev1.PodList, error) { + podList, err := d.podsClient.List(ctx, labelSelector("app.kubernetes.io/name", d.workloadName)) + if err != nil { + return nil, fmt.Errorf("failed to get pods: %w", err) + } + + if len(podList.Items) > 0 { + return podList, nil } + + podList, err = d.podsClient.List(ctx, labelSelector("app=", d.workloadName)) if err != nil { return nil, fmt.Errorf("failed to get pods: %w", err) } + return podList, nil } -func debuggerContainerName(podName string) string { - return fmt.Sprintf("%s-%s", podName, debuggerSuffix) +func (d *Debug) podExists(name string) func(context.Context) (bool, error) { + return func(ctx context.Context) (bool, error) { + if _, err := d.podsClient.Get(ctx, name, metav1.GetOptions{}); err == nil { + return true, nil + } else if k8serrors.IsNotFound(err) { + return false, nil + } else { + return false, err + } + } } -func (d *Debug) debugPod(podName string) error { - const maxRetries = 6 - const pollInterval = 5 +func (d *Debug) debugPod(ctx context.Context, pod corev1.Pod) error { + args := []string{ + "debug", + "pod/" + pod.Name, + "--namespace", d.flags.Namespace, + "--context", string(d.flags.Context), + "--stdin", + "--tty", + "--profile=restricted", + "--image", debugImage, + "--quiet", + } if d.flags.Copy { - pN := debuggerContainerName(podName) - _, err := d.client.CoreV1().Pods(d.flags.Namespace).Get(d.ctx, pN, metav1.GetOptions{}) - if err == nil { - pterm.Info.Printf("%s already exists, trying to attach...\n", pN) - - // Polling loop to check if the debugger container is running - for i := 0; i < maxRetries; i++ { - pterm.Info.Printf("Attempt %d/%d: Time remaining: %d seconds\n", i+1, maxRetries, (maxRetries-i)*pollInterval) - pod, err := d.client.CoreV1().Pods(d.flags.Namespace).Get(d.ctx, pN, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("failed to get debug pod copy %s: %v", pN, err) - } - - for _, c := range pod.Status.ContainerStatuses { - if c.Name == debuggerContainerDefaultName && c.State.Running != nil { - pterm.Success.Println("Container is running. Attaching...") - return d.attachToExistingDebugContainer(pN) - } - } - time.Sleep(time.Duration(pollInterval) * time.Second) - } + return d.createDebugPod(ctx, args, pod) + } - // If the loop finishes without finding the running container - return fmt.Errorf("container did not start within the expected time") - } else if !k8serrors.IsNotFound(err) { - return fmt.Errorf("failed to check for existing debug pod copy %s: %v", pN, err) - } - } else { - pod, err := d.client.CoreV1().Pods(d.flags.Namespace).Get(d.ctx, podName, metav1.GetOptions{}) - if err != nil { - return fmt.Errorf("failed to get pod %s: %v", podName, err) - } + return d.createDebugContainer(ctx, args) +} - if len(pod.Spec.EphemeralContainers) > 0 { - pterm.Warning.Printf("The container %s already has %d terminated debug containers.\n", podName, len(pod.Spec.EphemeralContainers)) - pterm.Info.Printf("Please consider using 'nais debug tidy %s' to clean up\n", d.workloadName) - } +func (d *Debug) createDebugContainer(ctx context.Context, commonArgs []string) error { + args := append(commonArgs, "--target", d.workloadName) // workloadName is the same as container name for nais apps + + _, err := task.Timed(ctx, d.flags.Timeout, "Creating ephemeral debug container", func(ctx context.Context) (*any, error) { + return nil, d.kubectl(ctx, true, args...) + }) + if err != nil { + pterm.Error.Println("Failed to create ephemeral debug container") + return err } - return d.createDebugPod(podName) + pterm.Info.Println("Remember to restart the pod to remove the debug container") + return nil } -func (d *Debug) attachToExistingDebugContainer(podName string) error { - cmd := exec.Command( - "kubectl", - "attach", - "-n", d.flags.Namespace, - fmt.Sprintf("pod/%s", podName), - "-c", debuggerContainerDefaultName, - "-i", - "-t", - ) +func (d *Debug) createDebugPod(ctx context.Context, commonArgs []string, pod corev1.Pod) error { + debugPodName := createDebugPodName(pod.Name) - if d.flags.Context != "" { - cmd.Args = append(cmd.Args, "--context", string(d.flags.Context)) + exists, err := task.Timed(ctx, d.flags.Timeout, "Check for existing debug pod", d.podExists(debugPodName)) + if err != nil { + return fmt.Errorf("failed to check for existing debug pod: %w", err) + } else if exists { + return d.attach(ctx, debugPodName) } - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr + args := append(commonArgs, + "--copy-to", debugPodName, + "--container", debugPodContainerName, + "--keep-annotations", + "--keep-liveness", + "--keep-readiness", + "--keep-startup", + "--attach=false", + ) + _, err = task.Timed(ctx, d.flags.Timeout, "Create debug pod", func(ctx context.Context) (*any, error) { + return nil, d.kubectl(ctx, false, args...) + }) + if err != nil { + return fmt.Errorf("failed to create debug pod: %v", err) + } - if err := cmd.Start(); err != nil { - return fmt.Errorf("failed to start attach command: %v", err) + _, err = task.Timed(ctx, d.flags.Timeout, "Annotate debug pod", func(ctx context.Context) (*any, error) { + return nil, d.annotateAndLabelDebugPod(ctx, debugPodName, pod.Labels) + }) + if err != nil { + return fmt.Errorf("failed to annotate and label debug pod: %w", err) } - pterm.Success.Printf("Attached to pod %s\n", podName) - if err := cmd.Wait(); err != nil { - return fmt.Errorf("attach command failed: %v", err) + if err := d.attach(ctx, debugPodName); err != nil { + return fmt.Errorf("failed to attach to debug pod %q: %w", debugPodName, err) } + // TODO ask if the user wants to delete the debug pod after attaching + pterm.Info.Printf("Debug pod will self-destruct in %s\n", d.flags.TTL) return nil } -func (d *Debug) createDebugPod(podName string) error { +func (d *Debug) annotateAndLabelDebugPod(ctx context.Context, debugPodName string, existingLabels map[string]string) error { args := []string{ - "debug", - "-n", d.flags.Namespace, - fmt.Sprintf("pod/%s", podName), - "-it", - "--stdin", - "--tty", - "--profile=restricted", - "-q", - "--image", d.debugImage, + "label", + "pod/" + debugPodName, + "cli.nais.io/debug=true", + "euthanaisa.nais.io/enabled=true", } - if d.flags.Context != "" { - args = append(args, "--context", string(d.flags.Context)) + delete(existingLabels, "pod-template-hash") + for label, value := range existingLabels { + args = append(args, fmt.Sprintf("%s=%s", label, value)) } - if d.flags.Copy { - args = append(args, - "--copy-to", debuggerContainerName(podName), - "-c", "debugger", - ) - } else { - args = append(args, - "--target", d.workloadName) + if err := d.kubectl( + ctx, + false, + args..., + ); err != nil { + return fmt.Errorf("unable to label debug pod: %w", err) } - cmd := exec.Command("kubectl", args...) - cmd.Stdin = os.Stdin - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr + killAfter := time.Now().Add(d.flags.TTL).Format(time.RFC3339) - if err := cmd.Start(); err != nil { - return fmt.Errorf("failed to start debug command: %v", err) + if err := d.kubectl( + ctx, + false, + "annotate", + "pod/"+debugPodName, + "euthanaisa.nais.io/kill-after="+killAfter, + ); err != nil { + return fmt.Errorf("unable to annotate debug pod: %w", err) } - if d.flags.Copy { - pterm.Info.Printf("Debugging pod copy created, enable process namespace sharing in %s\n", debuggerContainerName(podName)) - } else { - pterm.Info.Println("Debugging container created...") - } - pterm.Info.Printf("Using debugger image %s\n", d.debugImage) + return nil +} - if err := cmd.Wait(); err != nil { - if strings.Contains(err.Error(), "exit status 1") { - pterm.Info.Println("Debugging container exited") - return nil +func interactiveSelectPod(pods []corev1.Pod) (*corev1.Pod, error) { + if len(pods) > 1 { + var podNames []string + for _, p := range pods { + podNames = append(podNames, p.Name) } - return fmt.Errorf("debug command failed: %v", err) + + result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).WithDefaultText(pterm.Normal("Please select a pod")).Show() + if err != nil { + pterm.Error.Println("Prompt failed") + return nil, err + } + + for _, p := range pods { + if p.Name == result { + return &p, nil + } + } + } else if len(pods) == 1 { + return &pods[0], nil } - if d.flags.Copy { - pterm.Info.Printf("Run 'nais debug -cp %s' command to attach to the debug pod\n", d.workloadName) + return nil, fmt.Errorf("no pod selected or found") +} + +func labelSelector(key, value string) metav1.ListOptions { + excludeDebugPods := "cli.nais.io/debug!=true" + return metav1.ListOptions{ + LabelSelector: strings.Join([]string{excludeDebugPods, key + "=" + value}, ","), } +} - return nil +// debugPodName generates a name for the debug pod copy given a pod name. +func createDebugPodName(podName string) string { + return podName + "-" + debugPodSuffix +} + +func (d *Debug) debugContainerIsReady(podName string) func(ctx context.Context) (*corev1.Pod, error) { + return func(ctx context.Context) (*corev1.Pod, error) { + pod, err := d.podsClient.Get(ctx, podName, metav1.GetOptions{}) + if err != nil { + return nil, err + } + + for _, c := range pod.Status.ContainerStatuses { + if c.Name == debugPodContainerName && c.State.Running != nil { + return pod, nil + } + } + + return nil, fmt.Errorf("no ready debug container with name %q found in pod %q", debugPodContainerName, podName) + } } -func (d *Debug) Debug() error { - pods, err := d.getPodsForWorkload() +func (d *Debug) attach(ctx context.Context, podName string) error { + _, err := task.Timed(ctx, d.flags.Timeout, "Attaching to container", func(ctx context.Context) (*any, error) { + _, err := withRetryOnErr(d.debugContainerIsReady(podName))(ctx) + return nil, err + }) if err != nil { - return err + return fmt.Errorf("debug container did not start: %w", err) } - var podNames []string - for _, pod := range pods.Items { - podNames = append(podNames, pod.Name) + pterm.Info.Printf("You are now typing in the debug container in %q. Type exit to exit.\n", podName) + return d.kubectl(ctx, true, "attach", "pod/"+podName, "--container", debugPodContainerName, "--stdin", "--tty", "--quiet") +} + +func (d *Debug) kubectl(ctx context.Context, attach bool, args ...string) error { + cmd := exec.CommandContext(ctx, + "kubectl", + append(args, + "--namespace", d.flags.Namespace, + "--context", string(d.flags.Context), + )..., + ) + + if d.flags.IsDebug() { + pterm.Info.Println("Running command:", strings.Join(cmd.Args, " ")) } - if len(podNames) == 0 { - pterm.Info.Println("No pods found.") - return nil + if attach { + cmd.Stdin = os.Stdin + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + return cmd.Run() } - podName := podNames[0] - if d.byPod { - result, err := pterm.DefaultInteractiveSelect.WithOptions(podNames).Show() - if err != nil { - pterm.Error.Printf("Prompt failed: %v\n", err) - return err - } - podName = result + out, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("kubectl command failed: %w\nOutput: %s", err, string(out)) } - if err := d.debugPod(podName); err != nil { - pterm.Error.Printf("Failed to debug pod %s: %v\n", podName, err) + if d.flags.IsVerbose() { + pterm.Info.Println("Command output:", string(out)) } return nil } + +// withRetryOnErr retries the function until it returns nil error, or context is done. +func withRetryOnErr[T any](f func(context.Context) (*T, error)) func(context.Context) (*T, error) { + return func(ctx context.Context) (*T, error) { + ret, err := f(ctx) + for err != nil { + select { + case <-ctx.Done(): + return nil, err + + default: + ret, err = f(ctx) + } + } + + return ret, err + } +} diff --git a/internal/debug/tidy.go b/internal/debug/tidy.go deleted file mode 100644 index ba1bbc78..00000000 --- a/internal/debug/tidy.go +++ /dev/null @@ -1,65 +0,0 @@ -package debug - -import ( - "fmt" - - "github.com/pterm/pterm" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -func (d *Debug) Tidy() error { - pods, err := d.getPodsForWorkload() - if err != nil { - return err - } - - var podNames []string - for _, pod := range pods.Items { - podNames = append(podNames, pod.Name) - } - - if len(podNames) == 0 { - pterm.Info.Println("No pods found") - return nil - } - - for _, pod := range pods.Items { - podName := pod.Name - if d.flags.Copy { - podName = debuggerContainerName(pod.Name) - } - - if !d.flags.Copy && len(pod.Spec.EphemeralContainers) == 0 { - pterm.Info.Printf("No debug container found for: %s\n", pod.Name) - continue - } - - _, err := d.client.CoreV1().Pods(d.flags.Namespace).Get(d.ctx, podName, metav1.GetOptions{}) - if err != nil { - if k8serrors.IsNotFound(err) { - pterm.Info.Printf("No debug pod found for: %s\n", pod.Name) - continue - } - pterm.Error.Printf("Failed to get pod %s: %v\n", podName, err) - return err - } - - confirm, _ := pterm.DefaultInteractiveConfirm. - WithDefaultText(fmt.Sprintf("Pod '%s' with debug container, do you want to clean up?", podName)). - Show() - - if !confirm { - pterm.Info.Printf("Skipping deletion for pod: %s\n", podName) - continue - } - - // Delete pod if user confirms - if err := d.client.CoreV1().Pods(d.flags.Namespace).Delete(d.ctx, podName, metav1.DeleteOptions{}); err != nil { - pterm.Error.Printf("Failed to delete pod %s: %v\n", podName, err) - } else { - pterm.Success.Printf("Deleted pod: %s\n", podName) - } - } - return nil -} diff --git a/internal/debug/tidy/command.go b/internal/debug/tidy/command.go deleted file mode 100644 index 8c4e48fe..00000000 --- a/internal/debug/tidy/command.go +++ /dev/null @@ -1,22 +0,0 @@ -package tidy - -import ( - "fmt" - - "github.com/nais/cli/internal/debug" - "github.com/nais/cli/internal/debug/command/flag" -) - -func Run(workloadName string, flags *flag.DebugTidy) error { - clientSet, err := debug.SetupClient(flags.DebugSticky, flags.Context) - if err != nil { - return err - } - - dg := debug.Setup(clientSet, flags.DebugSticky, workloadName, "", false) - if err := dg.Tidy(); err != nil { - return fmt.Errorf("debugging instance: %w", err) - } - - return nil -} diff --git a/internal/metric/otel.go b/internal/metric/otel.go index 661f0578..f79fa59f 100644 --- a/internal/metric/otel.go +++ b/internal/metric/otel.go @@ -43,7 +43,7 @@ func Initialize() func(verbose bool) { ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second) defer cancel() err := provider.Shutdown(ctx) - if err != nil { + if err != nil && verbose { fmt.Printf("Failed up upload metrics: %v\n", err) } } diff --git a/internal/task/task.go b/internal/task/task.go new file mode 100644 index 00000000..d02109a6 --- /dev/null +++ b/internal/task/task.go @@ -0,0 +1,83 @@ +package task + +// template +// _, _ = runTimedTask(d.ctx, d.flags.Timeout, "desc", func(ctx context.Context) (*any, error) { +// return nil, nil +// }) + +import ( + "context" + "sync" + "time" + + "github.com/pterm/pterm" +) + +func Timed[T any](parentCtx context.Context, timeout time.Duration, description string, f func(ctx context.Context) (T, error)) (T, error) { + ctx, cancel := context.WithTimeout(parentCtx, timeout) + defer cancel() + + done := make(chan string) + lock := &sync.Mutex{} + + lock.Lock() + go start(ctx, description, done, lock) + ret, err := f(ctx) + result := pterm.Green("done") + if err != nil { + result = pterm.Red("err") + } + stop(done, result, lock) + return ret, err +} + +func stop(done chan string, status string, lock *sync.Mutex) { + if done != nil { + select { + case done <- status: + default: + } + lock.Lock() // Ensure we wait for the status area to finish before returning + } else { + close(done) + done = nil + } +} + +func start(ctx context.Context, description string, done chan string, lock *sync.Mutex) { + defer lock.Unlock() + statusArea, err := (&pterm.AreaPrinter{}).Start() + if err != nil { + pterm.Error.Printf("failed to create status area: %v\n", err) + return + } + defer statusArea.Stop() + + setStatus := func(status string) { + statusArea.Update(pterm.Sprintf("%s [%v]", description, status)) + } + + for { + select { + case status := <-done: + setStatus(status) + pterm.Println() + return + case <-ctx.Done(): + switch err := ctx.Err(); err { + case context.DeadlineExceeded: + setStatus(pterm.Yellow("timeout")) + case context.Canceled: + setStatus(pterm.Red("cancelled")) + } + pterm.Println() + return + case <-time.After(50 * time.Millisecond): + if deadline, ok := ctx.Deadline(); ok { + setStatus(time.Until(deadline).Round(time.Second).String()) + } else { + setStatus("?") + } + } + } +}