diff --git a/cli/command/telemetry_docker.go b/cli/command/telemetry_docker.go index dea7a979e6..45497aad9c 100644 --- a/cli/command/telemetry_docker.go +++ b/cli/command/telemetry_docker.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "net/url" + "os" "path" "github.com/pkg/errors" @@ -14,7 +15,11 @@ import ( sdktrace "go.opentelemetry.io/otel/sdk/trace" ) -const otelContextFieldName = "otel" +const ( + otelContextFieldName string = "otel" + otelExporterOTLPEndpoint string = "OTEL_EXPORTER_OTLP_ENDPOINT" + debugEnvVarPrefix string = "DOCKER_CLI_" +) // dockerExporterOTLPEndpoint retrieves the OTLP endpoint used for the docker reporter // from the current context. @@ -49,8 +54,15 @@ func dockerExporterOTLPEndpoint(cli Cli) (endpoint string, secure bool) { } // keys from https://opentelemetry.io/docs/concepts/sdk-configuration/otlp-exporter-configuration/ - endpoint, ok = otelMap["OTEL_EXPORTER_OTLP_ENDPOINT"].(string) - if !ok { + endpoint, _ = otelMap[otelExporterOTLPEndpoint].(string) + + // Override with env var value if it exists AND IS SET + // (ignore otel defaults for this override when the key exists but is empty) + if override := os.Getenv(debugEnvVarPrefix + otelExporterOTLPEndpoint); override != "" { + endpoint = override + } + + if endpoint == "" { return "", false } diff --git a/cli/command/telemetry_utils.go b/cli/command/telemetry_utils.go new file mode 100644 index 0000000000..034fa1f00a --- /dev/null +++ b/cli/command/telemetry_utils.go @@ -0,0 +1,158 @@ +package command + +import ( + "context" + "fmt" + "strconv" + "strings" + "time" + + "github.com/docker/cli/cli/version" + "github.com/pkg/errors" + "github.com/spf13/cobra" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/metric" +) + +// BaseMetricAttributes returns an attribute.Set containing attributes to attach to metrics/traces +func BaseMetricAttributes(cmd *cobra.Command) attribute.Set { + attrList := []attribute.KeyValue{ + attribute.String("command.name", getCommandName(cmd)), + } + return attribute.NewSet(attrList...) +} + +// InstrumentCobraCommands wraps all cobra commands' RunE funcs to set a command duration metric using otel. +// +// Note: this should be the last func to wrap/modify the PersistentRunE/RunE funcs before command execution. +// +// can also be used for spans! +func InstrumentCobraCommands(cmd *cobra.Command, mp metric.MeterProvider) { + meter := getDefaultMeter(mp) + // If PersistentPreRunE is nil, make it execute PersistentPreRun and return nil by default + ogPersistentPreRunE := cmd.PersistentPreRunE + if ogPersistentPreRunE == nil { + ogPersistentPreRun := cmd.PersistentPreRun + //nolint:unparam // necessary because error will always be nil here + ogPersistentPreRunE = func(cmd *cobra.Command, args []string) error { + ogPersistentPreRun(cmd, args) + return nil + } + cmd.PersistentPreRun = nil + } + + // wrap RunE in PersistentPreRunE so that this operation gets executed on all children commands + cmd.PersistentPreRunE = func(cmd *cobra.Command, args []string) error { + // If RunE is nil, make it execute Run and return nil by default + ogRunE := cmd.RunE + if ogRunE == nil { + ogRun := cmd.Run + //nolint:unparam // necessary because error will always be nil here + ogRunE = func(cmd *cobra.Command, args []string) error { + ogRun(cmd, args) + return nil + } + cmd.Run = nil + } + cmd.RunE = func(cmd *cobra.Command, args []string) error { + // start the timer as the first step of every cobra command + stopCobraCmdTimer := startCobraCommandTimer(cmd, meter) + cmdErr := ogRunE(cmd, args) + stopCobraCmdTimer(cmdErr) + return cmdErr + } + + return ogPersistentPreRunE(cmd, args) + } +} + +func startCobraCommandTimer(cmd *cobra.Command, meter metric.Meter) func(err error) { + ctx := cmd.Context() + baseAttrs := BaseMetricAttributes(cmd) + durationCounter, _ := meter.Float64Counter( + "command.time", + metric.WithDescription("Measures the duration of the cobra command"), + metric.WithUnit("ms"), + ) + start := time.Now() + + return func(err error) { + duration := float64(time.Since(start)) / float64(time.Millisecond) + cmdStatusAttrs := attributesFromError(err) + durationCounter.Add(ctx, duration, + metric.WithAttributeSet(baseAttrs), + metric.WithAttributeSet(attribute.NewSet(cmdStatusAttrs...)), + ) + } +} + +func attributesFromError(err error) []attribute.KeyValue { + attrs := []attribute.KeyValue{} + exitCode := 0 + if err != nil { + exitCode = 1 + if stderr, ok := err.(statusError); ok { + // StatusError should only be used for errors, and all errors should + // have a non-zero exit status, so only set this here if this value isn't 0 + if stderr.StatusCode != 0 { + exitCode = stderr.StatusCode + } + } + attrs = append(attrs, attribute.String("command.error.type", otelErrorType(err))) + } + attrs = append(attrs, attribute.String("command.status.code", strconv.Itoa(exitCode))) + + return attrs +} + +// otelErrorType returns an attribute for the error type based on the error category. +func otelErrorType(err error) string { + name := "generic" + if errors.Is(err, context.Canceled) { + name = "canceled" + } + return name +} + +// statusError reports an unsuccessful exit by a command. +type statusError struct { + Status string + StatusCode int +} + +func (e statusError) Error() string { + return fmt.Sprintf("Status: %s, Code: %d", e.Status, e.StatusCode) +} + +// getCommandName gets the cobra command name in the format +// `... parentCommandName commandName` by traversing it's parent commands recursively. +// until the root command is reached. +// +// Note: The root command's name is excluded. If cmd is the root cmd, return "" +func getCommandName(cmd *cobra.Command) string { + fullCmdName := getFullCommandName(cmd) + i := strings.Index(fullCmdName, " ") + if i == -1 { + return "" + } + return fullCmdName[i+1:] +} + +// getFullCommandName gets the full cobra command name in the format +// `... parentCommandName commandName` by traversing it's parent commands recursively +// until the root command is reached. +func getFullCommandName(cmd *cobra.Command) string { + if cmd.HasParent() { + return fmt.Sprintf("%s %s", getFullCommandName(cmd.Parent()), cmd.Name()) + } + return cmd.Name() +} + +// getDefaultMeter gets the default metric.Meter for the application +// using the given metric.MeterProvider +func getDefaultMeter(mp metric.MeterProvider) metric.Meter { + return mp.Meter( + "github.com/docker/cli", + metric.WithInstrumentationVersion(version.Version), + ) +} diff --git a/cli/command/telemetry_utils_test.go b/cli/command/telemetry_utils_test.go new file mode 100644 index 0000000000..b70ed0f69c --- /dev/null +++ b/cli/command/telemetry_utils_test.go @@ -0,0 +1,94 @@ +package command + +import ( + "testing" + + "github.com/spf13/cobra" + "gotest.tools/v3/assert" +) + +func setupCobraCommands() (*cobra.Command, *cobra.Command, *cobra.Command) { + rootCmd := &cobra.Command{ + Use: "root [OPTIONS] COMMAND [ARG...]", + } + childCmd := &cobra.Command{ + Use: "child [OPTIONS] COMMAND [ARG...]", + } + grandchildCmd := &cobra.Command{ + Use: "grandchild [OPTIONS] COMMAND [ARG...]", + } + childCmd.AddCommand(grandchildCmd) + rootCmd.AddCommand(childCmd) + + return rootCmd, childCmd, grandchildCmd +} + +func TestGetFullCommandName(t *testing.T) { + rootCmd, childCmd, grandchildCmd := setupCobraCommands() + + t.Parallel() + + for _, tc := range []struct { + testName string + cmd *cobra.Command + expected string + }{ + { + testName: "rootCmd", + cmd: rootCmd, + expected: "root", + }, + { + testName: "childCmd", + cmd: childCmd, + expected: "root child", + }, + { + testName: "grandChild", + cmd: grandchildCmd, + expected: "root child grandchild", + }, + } { + tc := tc + t.Run(tc.testName, func(t *testing.T) { + t.Parallel() + actual := getFullCommandName(tc.cmd) + assert.Equal(t, actual, tc.expected) + }) + } +} + +func TestGetCommandName(t *testing.T) { + rootCmd, childCmd, grandchildCmd := setupCobraCommands() + + t.Parallel() + + for _, tc := range []struct { + testName string + cmd *cobra.Command + expected string + }{ + { + testName: "rootCmd", + cmd: rootCmd, + expected: "", + }, + { + testName: "childCmd", + cmd: childCmd, + expected: "child", + }, + { + testName: "grandchildCmd", + cmd: grandchildCmd, + expected: "child grandchild", + }, + } { + tc := tc + t.Run(tc.testName, func(t *testing.T) { + t.Parallel() + actual := getCommandName(tc.cmd) + assert.Equal(t, actual, tc.expected) + }) + } +} diff --git a/cmd/docker/docker.go b/cmd/docker/docker.go index c79bfaaa0d..1b871289ee 100644 --- a/cmd/docker/docker.go +++ b/cmd/docker/docker.go @@ -1,6 +1,7 @@ package main import ( + "context" "fmt" "os" "os/exec" @@ -21,17 +22,19 @@ import ( "github.com/sirupsen/logrus" "github.com/spf13/cobra" "github.com/spf13/pflag" + "go.opentelemetry.io/otel" ) func main() { - dockerCli, err := command.NewDockerCli() + ctx := context.Background() + dockerCli, err := command.NewDockerCli(command.WithBaseContext(ctx)) if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } logrus.SetOutput(dockerCli.Err()) - if err := runDocker(dockerCli); err != nil { + if err := runDocker(ctx, dockerCli); err != nil { if sterr, ok := err.(cli.StatusError); ok { if sterr.Status != "" { fmt.Fprintln(dockerCli.Err(), sterr.Status) @@ -286,7 +289,7 @@ func tryPluginRun(dockerCli command.Cli, cmd *cobra.Command, subcommand string, } //nolint:gocyclo -func runDocker(dockerCli *command.DockerCli) error { +func runDocker(ctx context.Context, dockerCli *command.DockerCli) error { tcmd := newDockerCommand(dockerCli) cmd, args, err := tcmd.HandleGlobalFlags() @@ -298,6 +301,11 @@ func runDocker(dockerCli *command.DockerCli) error { return err } + mp := dockerCli.MeterProvider(ctx) + defer mp.Shutdown(ctx) + otel.SetMeterProvider(mp) + command.InstrumentCobraCommands(cmd, mp) + var envs []string args, os.Args, envs, err = processAliases(dockerCli, cmd, args, os.Args) if err != nil {