From d10ae95a3583b4b75a682ec3288cf970f9449a5a Mon Sep 17 00:00:00 2001 From: Alvaro Aleman Date: Sat, 13 May 2023 17:58:59 -0400 Subject: [PATCH] :sparkling: Add terminal error This change adds a terminal error into the reconcile package. The purpose of that error is to still be logged and used in metrics but avoid retrying in situations where it is known that that will not help. It also adds a new metric for just terminal errors, as they often indicate that some kind of human intervention is needed. --- pkg/controller/controllertest/testing.go | 16 +++++++---- pkg/handler/eventhandler_test.go | 2 +- pkg/internal/controller/controller.go | 6 +++- pkg/internal/controller/controller_test.go | 25 ++++++++++++++++- pkg/internal/controller/metrics/metrics.go | 8 ++++++ pkg/internal/source/internal_test.go | 32 +++++++++++----------- pkg/reconcile/reconcile.go | 24 ++++++++++++++++ pkg/reconcile/reconcile_test.go | 8 ++++++ 8 files changed, 97 insertions(+), 24 deletions(-) diff --git a/pkg/controller/controllertest/testing.go b/pkg/controller/controllertest/testing.go index b9f97d5289..627915f94b 100644 --- a/pkg/controller/controllertest/testing.go +++ b/pkg/controller/controllertest/testing.go @@ -17,6 +17,7 @@ limitations under the License. package controllertest import ( + "sync" "time" "k8s.io/apimachinery/pkg/runtime" @@ -35,28 +36,33 @@ func (ErrorType) GetObjectKind() schema.ObjectKind { return nil } // DeepCopyObject implements runtime.Object. func (ErrorType) DeepCopyObject() runtime.Object { return nil } -var _ workqueue.RateLimitingInterface = Queue{} +var _ workqueue.RateLimitingInterface = &Queue{} // Queue implements a RateLimiting queue as a non-ratelimited queue for testing. // This helps testing by having functions that use a RateLimiting queue synchronously add items to the queue. type Queue struct { workqueue.Interface + AddedRateLimitedLock sync.Mutex + AddedRatelimited []any } // AddAfter implements RateLimitingInterface. -func (q Queue) AddAfter(item interface{}, duration time.Duration) { +func (q *Queue) AddAfter(item interface{}, duration time.Duration) { q.Add(item) } // AddRateLimited implements RateLimitingInterface. TODO(community): Implement this. -func (q Queue) AddRateLimited(item interface{}) { +func (q *Queue) AddRateLimited(item interface{}) { + q.AddedRateLimitedLock.Lock() + q.AddedRatelimited = append(q.AddedRatelimited, item) + q.AddedRateLimitedLock.Unlock() q.Add(item) } // Forget implements RateLimitingInterface. TODO(community): Implement this. -func (q Queue) Forget(item interface{}) {} +func (q *Queue) Forget(item interface{}) {} // NumRequeues implements RateLimitingInterface. TODO(community): Implement this. -func (q Queue) NumRequeues(item interface{}) int { +func (q *Queue) NumRequeues(item interface{}) int { return 0 } diff --git a/pkg/handler/eventhandler_test.go b/pkg/handler/eventhandler_test.go index 17f975ea7d..6ae87b0988 100644 --- a/pkg/handler/eventhandler_test.go +++ b/pkg/handler/eventhandler_test.go @@ -46,7 +46,7 @@ var _ = Describe("Eventhandler", func() { var pod *corev1.Pod var mapper meta.RESTMapper BeforeEach(func() { - q = controllertest.Queue{Interface: workqueue.New()} + q = &controllertest.Queue{Interface: workqueue.New()} pod = &corev1.Pod{ ObjectMeta: metav1.ObjectMeta{Namespace: "biz", Name: "baz"}, } diff --git a/pkg/internal/controller/controller.go b/pkg/internal/controller/controller.go index 969eeeb7d2..83aba28cb7 100644 --- a/pkg/internal/controller/controller.go +++ b/pkg/internal/controller/controller.go @@ -314,7 +314,11 @@ func (c *Controller) reconcileHandler(ctx context.Context, obj interface{}) { result, err := c.Reconcile(ctx, req) switch { case err != nil: - c.Queue.AddRateLimited(req) + if errors.Is(err, reconcile.TerminalError(nil)) { + ctrlmetrics.TerminalReconcileErrors.WithLabelValues(c.Name).Inc() + } else { + c.Queue.AddRateLimited(req) + } ctrlmetrics.ReconcileErrors.WithLabelValues(c.Name).Inc() ctrlmetrics.ReconcileTotal.WithLabelValues(c.Name, labelError).Inc() log.Error(err, "Reconciler error") diff --git a/pkg/internal/controller/controller_test.go b/pkg/internal/controller/controller_test.go index 9024556fd0..d669b1acf0 100644 --- a/pkg/internal/controller/controller_test.go +++ b/pkg/internal/controller/controller_test.go @@ -359,7 +359,6 @@ var _ = Describe("controller", func() { }) It("should requeue a Request if there is an error and continue processing items", func() { - ctx, cancel := context.WithCancel(context.Background()) defer cancel() go func() { @@ -372,6 +371,9 @@ var _ = Describe("controller", func() { By("Invoking Reconciler which will give an error") fakeReconcile.AddResult(reconcile.Result{}, fmt.Errorf("expected error: reconcile")) Expect(<-reconciled).To(Equal(request)) + queue.AddedRateLimitedLock.Lock() + Expect(queue.AddedRatelimited).To(Equal([]any{request})) + queue.AddedRateLimitedLock.Unlock() By("Invoking Reconciler a second time without error") fakeReconcile.AddResult(reconcile.Result{}, nil) @@ -382,6 +384,27 @@ var _ = Describe("controller", func() { Eventually(func() int { return queue.NumRequeues(request) }, 1.0).Should(Equal(0)) }) + It("should not requeue a Request if there is a terminal error", func() { + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + go func() { + defer GinkgoRecover() + Expect(ctrl.Start(ctx)).NotTo(HaveOccurred()) + }() + + queue.Add(request) + + By("Invoking Reconciler which will give an error") + fakeReconcile.AddResult(reconcile.Result{}, reconcile.TerminalError(fmt.Errorf("expected error: reconcile"))) + Expect(<-reconciled).To(Equal(request)) + + queue.AddedRateLimitedLock.Lock() + Expect(queue.AddedRatelimited).To(BeEmpty()) + queue.AddedRateLimitedLock.Unlock() + + Expect(queue.Len()).Should(Equal(0)) + }) + // TODO(directxman12): we should ensure that backoff occurrs with error requeue It("should not reset backoff until there's a non-error result", func() { diff --git a/pkg/internal/controller/metrics/metrics.go b/pkg/internal/controller/metrics/metrics.go index baec669277..b74ce062be 100644 --- a/pkg/internal/controller/metrics/metrics.go +++ b/pkg/internal/controller/metrics/metrics.go @@ -39,6 +39,13 @@ var ( Help: "Total number of reconciliation errors per controller", }, []string{"controller"}) + // TerminalReconcileErrors is a prometheus counter metrics which holds the total + // number of terminal errors from the Reconciler. + TerminalReconcileErrors = prometheus.NewCounterVec(prometheus.CounterOpts{ + Name: "controller_runtime_terminal_reconcile_errors_total", + Help: "Total number of terminal reconciliation errors per controller", + }, []string{"controller"}) + // ReconcileTime is a prometheus metric which keeps track of the duration // of reconciliations. ReconcileTime = prometheus.NewHistogramVec(prometheus.HistogramOpts{ @@ -67,6 +74,7 @@ func init() { metrics.Registry.MustRegister( ReconcileTotal, ReconcileErrors, + TerminalReconcileErrors, ReconcileTime, WorkerCount, ActiveWorkers, diff --git a/pkg/internal/source/internal_test.go b/pkg/internal/source/internal_test.go index 067753b9e0..0574f7180e 100644 --- a/pkg/internal/source/internal_test.go +++ b/pkg/internal/source/internal_test.go @@ -74,7 +74,7 @@ var _ = Describe("Internal", func() { set = true }, } - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, funcs, nil) + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, funcs, nil) }) Describe("EventHandler", func() { @@ -99,7 +99,7 @@ var _ = Describe("Internal", func() { }) It("should used Predicates to filter CreateEvents", func() { - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return false }}, }) set = false @@ -107,14 +107,14 @@ var _ = Describe("Internal", func() { Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }}, }) instance.OnAdd(pod) Expect(set).To(BeTrue()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }}, predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return false }}, }) @@ -122,7 +122,7 @@ var _ = Describe("Internal", func() { Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return false }}, predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }}, }) @@ -130,7 +130,7 @@ var _ = Describe("Internal", func() { Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }}, predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }}, }) @@ -157,21 +157,21 @@ var _ = Describe("Internal", func() { It("should used Predicates to filter UpdateEvents", func() { set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{UpdateFunc: func(updateEvent event.UpdateEvent) bool { return false }}, }) instance.OnUpdate(pod, newPod) Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return true }}, }) instance.OnUpdate(pod, newPod) Expect(set).To(BeTrue()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return true }}, predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return false }}, }) @@ -179,7 +179,7 @@ var _ = Describe("Internal", func() { Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return false }}, predicate.Funcs{UpdateFunc: func(event.UpdateEvent) bool { return true }}, }) @@ -187,7 +187,7 @@ var _ = Describe("Internal", func() { Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }}, predicate.Funcs{CreateFunc: func(event.CreateEvent) bool { return true }}, }) @@ -215,21 +215,21 @@ var _ = Describe("Internal", func() { It("should used Predicates to filter DeleteEvents", func() { set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return false }}, }) instance.OnDelete(pod) Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }}, }) instance.OnDelete(pod) Expect(set).To(BeTrue()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }}, predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return false }}, }) @@ -237,7 +237,7 @@ var _ = Describe("Internal", func() { Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return false }}, predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }}, }) @@ -245,7 +245,7 @@ var _ = Describe("Internal", func() { Expect(set).To(BeFalse()) set = false - instance = internal.NewEventHandler(ctx, controllertest.Queue{}, setfuncs, []predicate.Predicate{ + instance = internal.NewEventHandler(ctx, &controllertest.Queue{}, setfuncs, []predicate.Predicate{ predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }}, predicate.Funcs{DeleteFunc: func(event.DeleteEvent) bool { return true }}, }) diff --git a/pkg/reconcile/reconcile.go b/pkg/reconcile/reconcile.go index 8285e2ca9b..d51cfc34ab 100644 --- a/pkg/reconcile/reconcile.go +++ b/pkg/reconcile/reconcile.go @@ -18,6 +18,7 @@ package reconcile import ( "context" + "errors" "time" "k8s.io/apimachinery/pkg/types" @@ -100,3 +101,26 @@ var _ Reconciler = Func(nil) // Reconcile implements Reconciler. func (r Func) Reconcile(ctx context.Context, o Request) (Result, error) { return r(ctx, o) } + +// TerminalError is an error that will not be retried but still be logged +// and recorded in metrics. +func TerminalError(wrapped error) error { + return &terminalError{err: wrapped} +} + +type terminalError struct { + err error +} + +func (te *terminalError) Unwrap() error { + return te.err +} + +func (te *terminalError) Error() string { + return "terminal error: " + te.err.Error() +} + +func (te *terminalError) Is(target error) bool { + tp := &terminalError{} + return errors.As(target, &tp) +} diff --git a/pkg/reconcile/reconcile_test.go b/pkg/reconcile/reconcile_test.go index 125b80936d..b5660f1b4f 100644 --- a/pkg/reconcile/reconcile_test.go +++ b/pkg/reconcile/reconcile_test.go @@ -23,6 +23,7 @@ import ( . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) @@ -88,5 +89,12 @@ var _ = Describe("reconcile", func() { Expect(actualResult).To(Equal(result)) Expect(actualErr).To(Equal(err)) }) + + It("should allow unwrapping inner error from terminal error", func() { + inner := apierrors.NewGone("") + terminalError := reconcile.TerminalError(inner) + + Expect(apierrors.IsGone(terminalError)).To(BeTrue()) + }) }) })