-
Notifications
You must be signed in to change notification settings - Fork 159
/
Copy pathmetrics.go
198 lines (166 loc) · 5.85 KB
/
metrics.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
/*
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"errors"
"fmt"
"net/http"
"os"
"google.golang.org/grpc/codes"
"k8s.io/component-base/metrics"
"k8s.io/klog/v2"
"k8s.io/mount-utils"
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
)
const (
// envGKEPDCSIVersion is an environment variable set in the PDCSI controller manifest
// with the current version of the GKE component.
envGKEPDCSIVersion = "GKE_PDCSI_VERSION"
pdcsiDriverName = "pd.csi.storage.gke.io"
DefaultDiskTypeForMetric = "unknownDiskType"
DefaultEnableConfidentialCompute = "unknownConfidentialMode"
DefaultEnableStoragePools = "unknownStoragePools"
)
var (
// This metric is exposed only from the controller driver component when GKE_PDCSI_VERSION env variable is set.
gkeComponentVersion = metrics.NewGaugeVec(&metrics.GaugeOpts{
Name: "component_version",
Help: "Metric to expose the version of the PDCSI GKE component.",
}, []string{"component_version"})
pdcsiOperationErrorsMetric = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: "csidriver",
Name: "operation_errors",
Help: "CSI server side error metrics",
StabilityLevel: metrics.ALPHA,
},
[]string{"driver_name", "method_name", "grpc_status_code", "disk_type", "enable_confidential_storage", "enable_storage_pools"})
mountErrorMetric = metrics.NewCounterVec(&metrics.CounterOpts{
Subsystem: "node",
Name: "mount_errors",
Help: "Node server file system mounting errors",
StabilityLevel: metrics.ALPHA,
},
[]string{"error_type"},
)
)
type MetricsManager struct {
registry metrics.KubeRegistry
}
func NewMetricsManager() MetricsManager {
mm := MetricsManager{
registry: metrics.NewKubeRegistry(),
}
return mm
}
func (mm *MetricsManager) GetRegistry() metrics.KubeRegistry {
return mm.registry
}
func (mm *MetricsManager) registerComponentVersionMetric() {
mm.registry.MustRegister(gkeComponentVersion)
}
func (mm *MetricsManager) RegisterPDCSIMetric() {
mm.registry.MustRegister(pdcsiOperationErrorsMetric)
}
func (mm *MetricsManager) RegisterMountMetric() {
mm.registry.MustRegister(mountErrorMetric)
}
func (mm *MetricsManager) recordComponentVersionMetric() error {
v := getEnvVar(envGKEPDCSIVersion)
if v == "" {
klog.V(2).Info("Skip emitting component version metric")
return fmt.Errorf("Failed to register GKE component version metric, env variable %v not defined", envGKEPDCSIVersion)
}
gkeComponentVersion.WithLabelValues(v).Set(1.0)
klog.Infof("Recorded GKE component version : %v", v)
return nil
}
func (mm *MetricsManager) RecordOperationErrorMetrics(
fullMethodName string,
operationErr error,
diskType string,
enableConfidentialStorage string,
enableStoragePools string) {
errCode := errorCodeLabelValue(operationErr)
pdcsiOperationErrorsMetric.WithLabelValues(pdcsiDriverName, fullMethodName, errCode, diskType, enableConfidentialStorage, enableStoragePools).Inc()
klog.Infof("Recorded PDCSI operation error code: %q", errCode)
}
func (mm *MetricsManager) RecordMountErrorMetric(err error) {
mntErr := &mount.MountError{}
if errors.As(err, mntErr) {
mountErrorMetric.WithLabelValues(string(mntErr.Type)).Inc()
}
klog.Infof("Recorded mount error type: %q", mntErr.Type)
}
func (mm *MetricsManager) EmmitProcessStartTime() error {
return metrics.RegisterProcessStartTime(mm.registry.Register)
}
func (mm *MetricsManager) EmitGKEComponentVersion() error {
mm.registerComponentVersionMetric()
if err := mm.recordComponentVersionMetric(); err != nil {
return err
}
return nil
}
// Server represents any type that could serve HTTP requests for the metrics
// endpoint.
type Server interface {
Handle(pattern string, handler http.Handler)
}
// RegisterToServer registers an HTTP handler for this metrics manager to the
// given server at the specified address/path.
func (mm *MetricsManager) registerToServer(s Server, metricsPath string) {
s.Handle(metricsPath, metrics.HandlerFor(
mm.GetRegistry(),
metrics.HandlerOpts{
ErrorHandling: metrics.ContinueOnError}))
}
// InitializeHttpHandler sets up a server and creates a handler for metrics.
func (mm *MetricsManager) InitializeHttpHandler(address, path string) {
mux := http.NewServeMux()
mm.registerToServer(mux, path)
go func() {
klog.Infof("Metric server listening at %q", address)
if err := http.ListenAndServe(address, mux); err != nil {
klog.Fatalf("Failed to start metric server at specified address (%q) and path (%q): %v", address, path, err.Error())
}
}()
}
func getEnvVar(envVarName string) string {
v, ok := os.LookupEnv(envVarName)
if !ok {
klog.Warningf("%q env not set", envVarName)
return ""
}
return v
}
func IsGKEComponentVersionAvailable() bool {
if getEnvVar(envGKEPDCSIVersion) == "" {
return false
}
return true
}
// errorCodeLabelValue returns the label value for the given operation error.
// This was separated into a helper function for unit testing purposes.
func errorCodeLabelValue(operationErr error) string {
err := codes.OK.String()
if operationErr != nil {
// If the operationErr is a TemporaryError, unwrap the temporary error before passing it to CodeForError.
var tempErr *common.TemporaryError
if errors.As(operationErr, &tempErr) {
operationErr = tempErr.Unwrap()
}
err = common.CodeForError(operationErr).String()
}
return err
}