Skip to content

Commit

Permalink
Add new "alertmanager_provider_alerts" metric
Browse files Browse the repository at this point in the history
see #1439

Signed-off-by: Patrick Harböck <patrick.harboeck@tngtech.com>
  • Loading branch information
Pharb committed Jun 17, 2019
1 parent fe4760c commit 091062b
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 14 deletions.
2 changes: 1 addition & 1 deletion cmd/alertmanager/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ func run() int {
go peer.Settle(ctx, *gossipInterval*10)
}

alerts, err := mem.NewAlerts(context.Background(), marker, *alertGCInterval, logger)
alerts, err := mem.NewAlerts(context.Background(), marker, *alertGCInterval, logger, prometheus.DefaultRegisterer)
if err != nil {
level.Error(logger).Log("err", err)
return 1
Expand Down
5 changes: 3 additions & 2 deletions dispatch/dispatch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,9 @@ func TestGroups(t *testing.T) {
t.Fatal(err)
}
route := NewRoute(conf.Route, nil)
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, logger)
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := mem.NewAlerts(context.Background(), marker, time.Hour, logger, r)
if err != nil {
t.Fatal(err)
}
Expand Down
34 changes: 33 additions & 1 deletion provider/mem/mem.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ package mem

import (
"context"
"github.com/prometheus/client_golang/prometheus"
"sync"
"time"

Expand Down Expand Up @@ -47,7 +48,7 @@ type listeningAlerts struct {
}

// NewAlerts returns a new alert provider.
func NewAlerts(ctx context.Context, m types.Marker, intervalGC time.Duration, l log.Logger) (*Alerts, error) {
func NewAlerts(ctx context.Context, m types.Marker, intervalGC time.Duration, l log.Logger, r prometheus.Registerer) (*Alerts, error) {
ctx, cancel := context.WithCancel(ctx)
a := &Alerts{
alerts: store.NewAlerts(intervalGC),
Expand Down Expand Up @@ -76,11 +77,31 @@ func NewAlerts(ctx context.Context, m types.Marker, intervalGC time.Duration, l
}
a.mtx.Unlock()
})
a.registerMetrics(r)
a.alerts.Run(ctx)

return a, nil
}

func (a *Alerts) registerMetrics(r prometheus.Registerer) {
newAlertMetricByState := func(st model.AlertStatus) prometheus.GaugeFunc {
return prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Name: "alertmanager_provider_alerts",
Help: "How many alerts for provider by state.",
ConstLabels: prometheus.Labels{"state": string(st)},
},
func() float64 {
return float64(a.countPending(st))
},
)
}

alertsFiring := newAlertMetricByState(model.AlertFiring)

r.MustRegister(alertsFiring)
}

// Close the alert provider.
func (a *Alerts) Close() {
if a.cancel != nil {
Expand Down Expand Up @@ -141,6 +162,17 @@ func (a *Alerts) GetPending() provider.AlertIterator {
return provider.NewAlertIterator(ch, done, nil)
}

func (a *Alerts) countPending(status model.AlertStatus) int {
count := 0

for _, a := range a.alerts.List() {
if a.Status() == status {
count++
}
}
return count
}

// Get returns the alert for a given fingerprint.
func (a *Alerts) Get(fp model.Fingerprint) (*types.Alert, error) {
return a.alerts.Get(fp)
Expand Down
65 changes: 55 additions & 10 deletions provider/mem/mem_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,9 @@ func init() {
// If the channel of a listener is at its limit, `alerts.Lock` is blocked, whereby
// a listener can not unsubscribe as the lock is hold by `alerts.Lock`.
func TestAlertsSubscribePutStarvation(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -135,8 +136,9 @@ func TestAlertsSubscribePutStarvation(t *testing.T) {
}

func TestAlertsPut(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand All @@ -160,11 +162,12 @@ func TestAlertsPut(t *testing.T) {
}

func TestAlertsSubscribe(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)

ctx, cancel := context.WithCancel(context.Background())
defer cancel()
alerts, err := NewAlerts(ctx, marker, 30*time.Minute, log.NewNopLogger())
alerts, err := NewAlerts(ctx, marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -240,8 +243,9 @@ func TestAlertsSubscribe(t *testing.T) {
}

func TestAlertsGetPending(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand Down Expand Up @@ -282,9 +286,50 @@ func TestAlertsGetPending(t *testing.T) {
}
}

func TestAlertsCountPending(t *testing.T) {
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 30*time.Minute, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}

checkCount := func(status model.AlertStatus, expected int) {
count := alerts.countPending(status)
if !(count == expected) {
t.Errorf("Unexpected alert count %d instead of %d for status '%s'", count, expected, status)
}
}

tResolved := time.Now().Add(-time.Second)
tFiring := time.Now().Add(time.Second)

a1 := types.Alert(*alert1)
a2 := types.Alert(*alert2)

a1.EndsAt = tFiring
a2.EndsAt = tFiring

checkCount(model.AlertResolved, 0)
checkCount(model.AlertFiring, 0)

if err := alerts.Put(&a1, &a2); err != nil {
t.Fatalf("Insert failed: %s", err)
}

checkCount(model.AlertResolved, 0)
checkCount(model.AlertFiring, 2)

a1.EndsAt = tResolved

checkCount(model.AlertResolved, 1)
checkCount(model.AlertFiring, 1)
}

func TestAlertsGC(t *testing.T) {
marker := types.NewMarker(prometheus.NewRegistry())
alerts, err := NewAlerts(context.Background(), marker, 200*time.Millisecond, log.NewNopLogger())
r := prometheus.NewRegistry()
marker := types.NewMarker(r)
alerts, err := NewAlerts(context.Background(), marker, 200*time.Millisecond, log.NewNopLogger(), r)
if err != nil {
t.Fatal(err)
}
Expand Down

0 comments on commit 091062b

Please sign in to comment.