From 8175828e0ad69e8962d7236b85f08fcdb84a8bc9 Mon Sep 17 00:00:00 2001 From: Sean Liao Date: Fri, 5 Jan 2024 21:50:48 +0000 Subject: [PATCH] pkg/metrics: add 100/1000s buckets for workqueue histograms Controllers making many external requests in large clusters may have normal operating latency on the order of ~100s. Add buckets that cover the range, allowing metric backends to interpolate percentile estimates properly. Fixes #2625 --- pkg/metrics/workqueue.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/metrics/workqueue.go b/pkg/metrics/workqueue.go index 277b878810..cff1de4c1c 100644 --- a/pkg/metrics/workqueue.go +++ b/pkg/metrics/workqueue.go @@ -54,14 +54,14 @@ var ( Subsystem: WorkQueueSubsystem, Name: QueueLatencyKey, Help: "How long in seconds an item stays in workqueue before being requested", - Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 12), }, []string{"name"}) workDuration = prometheus.NewHistogramVec(prometheus.HistogramOpts{ Subsystem: WorkQueueSubsystem, Name: WorkDurationKey, Help: "How long in seconds processing an item from workqueue takes.", - Buckets: prometheus.ExponentialBuckets(10e-9, 10, 10), + Buckets: prometheus.ExponentialBuckets(10e-9, 10, 12), }, []string{"name"}) unfinished = prometheus.NewGaugeVec(prometheus.GaugeOpts{