@@ -24,7 +24,6 @@ import (
2424 "github.com/GoogleContainerTools/config-sync/pkg/api/configsync"
2525 "github.com/GoogleContainerTools/config-sync/pkg/core"
2626 "github.com/GoogleContainerTools/config-sync/pkg/kinds"
27- "github.com/GoogleContainerTools/config-sync/pkg/metrics"
2827 ocmetrics "github.com/GoogleContainerTools/config-sync/pkg/metrics"
2928 "github.com/GoogleContainerTools/config-sync/pkg/util/log"
3029 prometheusapi "github.com/prometheus/client_golang/api"
@@ -184,14 +183,15 @@ func ValidateStandardMetricsForSync(nt *NT, syncKind testmetrics.SyncKind, syncL
184183 ReconcilerSyncSuccess (nt , syncLabels , commitHash ),
185184 ReconcilerSourceMetrics (nt , syncLabels , commitHash , count ),
186185 ReconcilerSyncMetrics (nt , syncLabels , commitHash ),
186+ ReconcilerParserMetrics (nt , syncLabels , commitHash ),
187187 ReconcilerOperationsMetrics (nt , syncLabels , ops ... ),
188188 ReconcilerErrorMetrics (nt , syncLabels , commitHash , summary .Errors ))
189189}
190190
191191// ReconcilerManagerMetrics returns a MetricsPredicate that validates the
192192// ReconcileDurationView metric.
193193func ReconcilerManagerMetrics (nt * NT ) MetricsPredicate {
194- nt .Logger .Debugf ("[METRICS] Expecting reconciler-manager reconciling status: %s" , metrics .StatusSuccess )
194+ nt .Logger .Debugf ("[METRICS] Expecting reconciler-manager reconciling status: %s" , ocmetrics .StatusSuccess )
195195 return func (ctx context.Context , v1api prometheusv1.API ) error {
196196 metricName := ocmetrics .ReconcileDurationName
197197 // ReconcileDurationView is a distribution. Query count to aggregate.
@@ -219,15 +219,32 @@ func ReconcilerSourceMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, commit
219219// ReconcilerSyncMetrics returns a MetricsPredicate that validates the
220220// LastApplyTimestampView, ApplyDurationView, and LastSyncTimestampView metrics.
221221func ReconcilerSyncMetrics (nt * NT , syncLabels prometheusmodel.LabelSet , commitHash string ) MetricsPredicate {
222- nt .Logger .Debugf ("[METRICS] Expecting last apply & sync status (commit: %s): %s" , commitHash , metrics .StatusSuccess )
222+ nt .Logger .Debugf ("[METRICS] Expecting last apply & sync status (commit: %s): %s" , commitHash , ocmetrics .StatusSuccess )
223223 return func (ctx context.Context , v1api prometheusv1.API ) error {
224224 var err error
225225 err = multierr .Append (err , metricLastApplyTimestampHasStatus (ctx , nt , v1api ,
226- syncLabels , commitHash , metrics .StatusSuccess ))
226+ syncLabels , commitHash , ocmetrics .StatusSuccess ))
227227 err = multierr .Append (err , metricApplyDurationViewHasStatus (ctx , nt , v1api ,
228- syncLabels , commitHash , metrics .StatusSuccess ))
228+ syncLabels , commitHash , ocmetrics .StatusSuccess ))
229229 err = multierr .Append (err , metricLastSyncTimestampHasStatus (ctx , nt , v1api ,
230- syncLabels , commitHash , metrics .StatusSuccess ))
230+ syncLabels , commitHash , ocmetrics .StatusSuccess ))
231+ return err
232+ }
233+ }
234+
235+ // ReconcilerParserMetrics returns a MetricsPredicate that validates the
236+ // ParserDurationView metric for all parser sources (read, parse, update).
237+ func ReconcilerParserMetrics (nt * NT , syncLabels prometheusmodel.LabelSet , commitHash string ) MetricsPredicate {
238+ nt .Logger .Debugf ("[METRICS] Expecting parser duration status (commit: %s): %s" , commitHash , ocmetrics .StatusSuccess )
239+ return func (ctx context.Context , v1api prometheusv1.API ) error {
240+ var err error
241+ // Check parser duration for all three sources: read, parse, update
242+ err = multierr .Append (err , metricParserDurationViewHasStatusAndSource (ctx , nt , v1api ,
243+ syncLabels , "read" , ocmetrics .StatusSuccess ))
244+ err = multierr .Append (err , metricParserDurationViewHasStatusAndSource (ctx , nt , v1api ,
245+ syncLabels , "parse" , ocmetrics .StatusSuccess ))
246+ err = multierr .Append (err , metricParserDurationViewHasStatusAndSource (ctx , nt , v1api ,
247+ syncLabels , "update" , ocmetrics .StatusSuccess ))
231248 return err
232249 }
233250}
@@ -268,8 +285,10 @@ func reconcilerOperationMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, op
268285// - ResourceConflictsView
269286// - InternalErrorsView
270287// - ReconcilerErrorsView
288+ // - PipelineErrorView
271289func ReconcilerErrorMetrics (nt * NT , syncLabels prometheusmodel.LabelSet , commitHash string , summary testmetrics.ErrorSummary ) MetricsPredicate {
272290 nt .Logger .Debugf ("[METRICS] Expecting reconciler errors: %s" , log .AsJSON (summary ))
291+ nt .Logger .Debugf ("[METRICS] Expecting pipeline error metrics: source=%d, rendering=%d, sync=%d" , summary .Source , summary .Rendering , summary .Sync )
273292
274293 var predicates []MetricsPredicate
275294 // Metrics aggregated by total count
@@ -280,6 +299,10 @@ func ReconcilerErrorMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, commitH
280299 predicates = append (predicates , metricReconcilerErrorsHasValue (nt , syncLabels , componentRendering , summary .Rendering ))
281300 predicates = append (predicates , metricReconcilerErrorsHasValue (nt , syncLabels , componentSource , summary .Source ))
282301 predicates = append (predicates , metricReconcilerErrorsHasValue (nt , syncLabels , componentSync , summary .Sync ))
302+ // Pipeline error metrics
303+ predicates = append (predicates , metricPipelineErrorViewHasValue (nt , syncLabels , "source" , summary .Source ))
304+ predicates = append (predicates , metricPipelineErrorViewHasValue (nt , syncLabels , "rendering" , summary .Rendering ))
305+ predicates = append (predicates , metricPipelineErrorViewHasValue (nt , syncLabels , "sync" , summary .Sync ))
283306
284307 return func (ctx context.Context , v1api prometheusv1.API ) error {
285308 var err error
@@ -293,20 +316,39 @@ func ReconcilerErrorMetrics(nt *NT, syncLabels prometheusmodel.LabelSet, commitH
293316// ReconcilerSyncSuccess returns a MetricsPredicate that validates that the
294317// latest commit synced successfully for the specified reconciler and commit.
295318func ReconcilerSyncSuccess (nt * NT , syncLabels prometheusmodel.LabelSet , commitHash string ) MetricsPredicate {
296- nt .Logger .Debugf ("[METRICS] Expecting last sync status (commit: %s): %s" , commitHash , metrics .StatusSuccess )
319+ nt .Logger .Debugf ("[METRICS] Expecting last sync status (commit: %s): %s" , commitHash , ocmetrics .StatusSuccess )
297320 return func (ctx context.Context , v1api prometheusv1.API ) error {
298321 return metricLastSyncTimestampHasStatus (ctx , nt , v1api ,
299- syncLabels , commitHash , metrics .StatusSuccess )
322+ syncLabels , commitHash , ocmetrics .StatusSuccess )
300323 }
301324}
302325
303326// ReconcilerSyncError returns a MetricsPredicate that validates that the
304327// latest commit sync errored for the specified reconciler and commit.
305328func ReconcilerSyncError (nt * NT , syncLabels prometheusmodel.LabelSet , commitHash string ) MetricsPredicate {
306- nt .Logger .Debugf ("[METRICS] Expecting last sync status (commit: %s): %s" , commitHash , metrics .StatusError )
329+ nt .Logger .Debugf ("[METRICS] Expecting last sync status (commit: %s): %s" , commitHash , ocmetrics .StatusError )
307330 return func (ctx context.Context , v1api prometheusv1.API ) error {
308331 return metricLastSyncTimestampHasStatus (ctx , nt , v1api ,
309- syncLabels , commitHash , metrics .StatusError )
332+ syncLabels , commitHash , ocmetrics .StatusError )
333+ }
334+ }
335+
336+ // ReconcilerParserDuration returns a MetricsPredicate that validates the
337+ // ParserDurationView metric for the specified reconciler, commit, source, and status.
338+ func ReconcilerParserDuration (nt * NT , syncLabels prometheusmodel.LabelSet , commitHash , source , status string ) MetricsPredicate {
339+ nt .Logger .Debugf ("[METRICS] Expecting parser duration (commit: %s, source: %s, status: %s)" , commitHash , source , status )
340+ return func (ctx context.Context , v1api prometheusv1.API ) error {
341+ return metricParserDurationViewHasStatusAndSource (ctx , nt , v1api ,
342+ syncLabels , source , status )
343+ }
344+ }
345+
346+ // ReconcilerPipelineError returns a MetricsPredicate that validates the
347+ // PipelineErrorView metric for the specified reconciler and component.
348+ func ReconcilerPipelineError (nt * NT , syncLabels prometheusmodel.LabelSet , component string , value int ) MetricsPredicate {
349+ nt .Logger .Debugf ("[METRICS] Expecting pipeline error (component: %s, value: %d)" , component , value )
350+ return func (ctx context.Context , v1api prometheusv1.API ) error {
351+ return metricPipelineErrorViewHasValue (nt , syncLabels , component , value )(ctx , v1api )
310352 }
311353}
312354
@@ -398,6 +440,30 @@ func metricInternalErrorsHasValueAtLeast(nt *NT, syncLabels prometheusmodel.Labe
398440 }
399441}
400442
443+ // metricPipelineErrorViewHasValue returns a MetricsPredicate that validates that
444+ // the latest pod for the specified reconciler has emitted a pipeline error
445+ // metric with the specified component and value.
446+ // If the expected value is zero, the metric being not found is also acceptable.
447+ // Expected components: "source", "rendering", or "sync".
448+ func metricPipelineErrorViewHasValue (nt * NT , syncLabels prometheusmodel.LabelSet , componentName string , value int ) MetricsPredicate {
449+ return func (ctx context.Context , v1api prometheusv1.API ) error {
450+ metricName := ocmetrics .PipelineErrorName
451+ metricName = fmt .Sprintf ("%s%s" , prometheusConfigSyncMetricPrefix , metricName )
452+ labels := prometheusmodel.LabelSet {
453+ prometheusmodel .LabelName (string (ocmetrics .KeyComponent )): prometheusmodel .LabelValue (componentName ),
454+ }.Merge (syncLabels )
455+ // PipelineErrorView only keeps the LastValue, so we don't need to aggregate
456+ query := fmt .Sprintf ("%s%s" , metricName , labels )
457+
458+ if value == 0 {
459+ // When there's no error, the metric may not be recorded.
460+ // So tolerate missing metrics when expecting a zero value.
461+ return metricExistsWithValueOrDoesNotExist (ctx , nt , v1api , query , 0 )
462+ }
463+ return metricExistsWithValue (ctx , nt , v1api , query , 1 )
464+ }
465+ }
466+
401467func metricLastSyncTimestampHasStatus (ctx context.Context , nt * NT , v1api prometheusv1.API , syncLabels prometheusmodel.LabelSet , commitHash , status string ) error {
402468 metricName := ocmetrics .LastSyncName
403469 metricName = fmt .Sprintf ("%s%s" , prometheusConfigSyncMetricPrefix , metricName )
@@ -437,6 +503,19 @@ func metricApplyDurationViewHasStatus(ctx context.Context, nt *NT, v1api prometh
437503 return metricExists (ctx , nt , v1api , query )
438504}
439505
506+ func metricParserDurationViewHasStatusAndSource (ctx context.Context , nt * NT , v1api prometheusv1.API , syncLabels prometheusmodel.LabelSet , source , status string ) error {
507+ metricName := ocmetrics .ParserDurationName
508+ // ParserDurationView is a distribution. Query count to aggregate.
509+ metricName = fmt .Sprintf ("%s%s%s" , prometheusConfigSyncMetricPrefix , metricName , prometheusDistributionCountSuffix )
510+ labels := prometheusmodel.LabelSet {
511+ prometheusmodel .LabelName (string (ocmetrics .KeyParserSource )): prometheusmodel .LabelValue (source ),
512+ prometheusmodel .LabelName (string (ocmetrics .KeyStatus )): prometheusmodel .LabelValue (status ),
513+ }.Merge (syncLabels )
514+ query := fmt .Sprintf ("%s%s" , metricName , labels )
515+
516+ return metricExists (ctx , nt , v1api , query )
517+ }
518+
440519func metricDeclaredResourcesViewHasValue (ctx context.Context , nt * NT , v1api prometheusv1.API , syncLabels prometheusmodel.LabelSet , commitHash string , numResources int ) error {
441520 metricName := ocmetrics .DeclaredResourcesName
442521 metricName = fmt .Sprintf ("%s%s" , prometheusConfigSyncMetricPrefix , metricName )
@@ -502,6 +581,7 @@ func metricQueryNow(ctx context.Context, nt *NT, v1api prometheusv1.API, query s
502581 if len (warnings ) > 0 {
503582 nt .T .Logf ("prometheus warnings: %v" , warnings )
504583 }
584+
505585 return response , nil
506586}
507587
0 commit comments