Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,3 +367,24 @@ func (c *metricsCollector) emitMetricFamily(metricFamily *dto.MetricFamily, ch c
}
}
}

// CombineMetricFamilies writes combined metric families to the response writer.
func CombineMetricFamilies(w http.ResponseWriter, families ...map[string]*dto.MetricFamily) error {
allFamilies := make(map[string]*dto.MetricFamily)
for _, familyMap := range families {
for name, family := range familyMap {
if _, ok := allFamilies[name]; !ok {
allFamilies[name] = family
} else {
allFamilies[name].Metric = append(allFamilies[name].Metric, family.Metric...)
}
}
}

for _, family := range allFamilies {
if _, err := expfmt.MetricFamilyToText(w, family); err != nil {
return fmt.Errorf("failed to write metric family %q to text: %w", family.GetName(), err)
}
}
return nil
}
58 changes: 46 additions & 12 deletions pkg/sidecar_mounter/sidecar_mounter.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/cloud_provider/auth"
cpmeta "github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/cloud_provider/metadata"
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/cloud_provider/storage"
"github.com/prometheus/client_golang/prometheus"
dto "github.com/prometheus/client_model/go"

"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/cloud_provider/clientset"
"github.com/googlecloudplatform/gcs-fuse-csi-driver/pkg/metrics"
Expand All @@ -55,6 +57,17 @@

const metricEndpointFmt = "http://localhost:%v/metrics"

var (
sidecarMounterRegistry = prometheus.NewRegistry()

bucketAccessCheckRetries = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "gcsfuse_sidecar_bucket_access_check_retries",
Help: "Number of times the bucket access check has been retried, emitted every 10 retries.",
},
)
)

// Mounter will be used in the sidecar container to invoke gcsfuse.
type Mounter struct {
mounterPath string
Expand Down Expand Up @@ -283,6 +296,8 @@
// exposing a unix domain socket for CSI driver to connect.
func collectMetrics(ctx context.Context, port, tempDir string) {
metricEndpoint := fmt.Sprintf(metricEndpointFmt, port)
// Register the sidecar mounter's own metrics.
sidecarMounterRegistry.MustRegister(bucketAccessCheckRetries)

// Create a unix domain socket and listen for incoming connections.
socketPath := filepath.Join(tempDir, metrics.SocketName)
Expand All @@ -308,8 +323,20 @@
timeoutCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()

if err := scrapeMetrics(timeoutCtx, metricEndpoint, w); err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
gcsfuseMetrics, err := scrapeGcsfuseMetrics(timeoutCtx, metricEndpoint)
if err != nil {
http.Error(w, fmt.Sprintf("failed to scrape gcsfuse metrics: %v", err), http.StatusInternalServerError)
return
}

sidecarMetrics, err := sidecarMounterRegistry.Gather()
if err != nil {
http.Error(w, fmt.Sprintf("failed to gather sidecar mounter metrics: %v", err), http.StatusInternalServerError)
return
}

if err := metrics.CombineMetricFamilies(w, gcsfuseMetrics, sidecarMetrics); err != nil {

Check failure on line 338 in pkg/sidecar_mounter/sidecar_mounter.go

View workflow job for this annotation

GitHub Actions / build

cannot use sidecarMetrics (variable of type []*io_prometheus_client.MetricFamily) as map[string]*io_prometheus_client.MetricFamily value in argument to metrics.CombineMetricFamilies

Check failure on line 338 in pkg/sidecar_mounter/sidecar_mounter.go

View workflow job for this annotation

GitHub Actions / unit-test

cannot use sidecarMetrics (variable of type []*io_prometheus_client.MetricFamily) as map[string]*io_prometheus_client.MetricFamily value in argument to metrics.CombineMetricFamilies

Check failure on line 338 in pkg/sidecar_mounter/sidecar_mounter.go

View workflow job for this annotation

GitHub Actions / verify

cannot use sidecarMetrics (variable of type []*io_prometheus_client.MetricFamily) as map[string]*io_prometheus_client.MetricFamily value in argument to metrics.CombineMetricFamilies

Check failure on line 338 in pkg/sidecar_mounter/sidecar_mounter.go

View workflow job for this annotation

GitHub Actions / Analyze (go)

cannot use sidecarMetrics (variable of type []*io_prometheus_client.MetricFamily) as map[string]*io_prometheus_client.MetricFamily value in argument to metrics.CombineMetricFamilies
klog.Errorf("failed to combine and write metrics: %v", err)
}
})

Expand All @@ -323,29 +350,29 @@
}
}

// scrapeMetrics connects to the metrics endpoint and scrapes latest metrics sample.
// The response is written to a new http.ResponseWriter.
func scrapeMetrics(ctx context.Context, metricEndpoint string, w http.ResponseWriter) error {
// scrapeGcsfuseMetrics connects to the gcsfuse metrics endpoint and scrapes the latest metrics.
func scrapeGcsfuseMetrics(ctx context.Context, metricEndpoint string) (map[string]*dto.MetricFamily, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, metricEndpoint, nil)
if err != nil {
return fmt.Errorf("failed to create HTTP request to %q: %w", metricEndpoint, err)
return nil, fmt.Errorf("failed to create HTTP request to %q: %w", metricEndpoint, err)
}

resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("failed to make HTTP request to %q: %w", metricEndpoint, err)
return nil, fmt.Errorf("failed to make HTTP request to %q: %w", metricEndpoint, err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
return fmt.Errorf("unexpected HTTP status: %v", resp.Status)
return nil, fmt.Errorf("unexpected HTTP status: %v", resp.Status)
}

if _, err := io.Copy(w, resp.Body); err != nil {
return fmt.Errorf("failed to copy response: %w", err)
metricFamilies, err := metrics.ProcessMetricsData(resp.Body)
if err != nil {
return nil, fmt.Errorf("failed to process metrics data: %w", err)
}

return nil
return metricFamilies, nil
}

func StartTokenServer(ctx context.Context, tokenURLBasePath, tokenSocketName string, identityProvider string) {
Expand Down Expand Up @@ -422,11 +449,18 @@
Jitter: mc.SidecarRetryConfig.Jitter, // Adds randomness, this will give +/- 10% of the current delay
}

var retryCount int
ssCreateAndBucketCheckFunc := func(ctx context.Context) (bool, error) {
retryCount++
// After the first attempt, increment the metric every 10 retries.
if retryCount > 1 && (retryCount-1)%10 == 0 {
bucketAccessCheckRetries.Inc()
klog.Infof("Bucket access check has retried %d times, incrementing metric.", retryCount-1)
}
ss, err := m.StorageServiceManager.SetupStorageServiceForSidecar(ctx, tokenSource)
if err != nil {
klog.Errorf("Failed to setup storage service got error %q, retrying...", err)
return false, nil
return false, nil // Returning false, nil indicates to the backoff manager to retry.
}
klog.V(4).Infof("Created storage service %v", ss)
if bucketName != "_" {
Expand Down
Loading