From 9498abb679802f46d944d52e5a262fae82ed2b06 Mon Sep 17 00:00:00 2001 From: hc-github-team-secure-vault-core Date: Wed, 16 Aug 2023 13:11:32 -0400 Subject: [PATCH 1/4] Prevent a deadlock that occurs when Restore grabs lease lock then pending lock, while a revocationJob's OnFailure grabs those lock in the reverse order. --- vault/expiration.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vault/expiration.go b/vault/expiration.go index a7110949c8f95..1183d8c6e7899 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -240,12 +240,12 @@ func (r *revocationJob) OnFailure(err error) { r.m.core.metricSink.IncrCounterWithLabels([]string{"expire", "lease_expiration", "error"}, 1, []metrics.Label{metricsutil.NamespaceLabel(r.ns)}) r.m.pendingLock.Lock() - defer r.m.pendingLock.Unlock() pendingRaw, ok := r.m.pending.Load(r.leaseID) if !ok { r.m.logger.Warn("failed to find lease in pending map for revocation retry", "lease_id", r.leaseID) return } + r.m.pendingLock.Unlock() pending := pendingRaw.(pendingInfo) pending.revokesAttempted++ @@ -277,7 +277,9 @@ func (r *revocationJob) OnFailure(err error) { } pending.timer.Reset(newTimer) + r.m.pendingLock.Lock() r.m.pending.Store(r.leaseID, pending) + r.m.pendingLock.Unlock() } func expireLeaseStrategyFairsharing(ctx context.Context, m *ExpirationManager, leaseID string, ns *namespace.Namespace) { From be9ed68a9dba338d6778a22ec9e40c9d2e29b1f9 Mon Sep 17 00:00:00 2001 From: hc-github-team-secure-vault-core Date: Wed, 16 Aug 2023 14:48:03 -0400 Subject: [PATCH 2/4] Fix lock --- vault/expiration.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vault/expiration.go b/vault/expiration.go index 1183d8c6e7899..b2a2758236a5a 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -241,11 +241,11 @@ func (r *revocationJob) OnFailure(err error) { r.m.pendingLock.Lock() pendingRaw, ok := r.m.pending.Load(r.leaseID) + r.m.pendingLock.Unlock() if !ok { r.m.logger.Warn("failed to find lease in pending map for revocation retry", "lease_id", r.leaseID) return } - r.m.pendingLock.Unlock() pending := pendingRaw.(pendingInfo) pending.revokesAttempted++ From eaf84d9415c7bb68fbd991ef62a4af97135d6150 Mon Sep 17 00:00:00 2001 From: hc-github-team-secure-vault-core Date: Wed, 16 Aug 2023 14:55:53 -0400 Subject: [PATCH 3/4] Add CL --- changelog/22374.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog/22374.txt diff --git a/changelog/22374.txt b/changelog/22374.txt new file mode 100644 index 0000000000000..2f744c5c3386c --- /dev/null +++ b/changelog/22374.txt @@ -0,0 +1,3 @@ +```release-note:bug +expiration: Fix a deadlock that could occur when a revocation failure happens while restoring leases on startup. +``` From 04ed2e423128d8abf2ccfa33017fd0f55603a0ff Mon Sep 17 00:00:00 2001 From: hc-github-team-secure-vault-core Date: Wed, 16 Aug 2023 15:22:54 -0400 Subject: [PATCH 4/4] Grab lock before markLeaseIrrevocable --- vault/expiration.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vault/expiration.go b/vault/expiration.go index b2a2758236a5a..245ccadae6f49 100644 --- a/vault/expiration.go +++ b/vault/expiration.go @@ -269,7 +269,9 @@ func (r *revocationJob) OnFailure(err error) { return } + r.m.pendingLock.Lock() r.m.markLeaseIrrevocable(r.nsCtx, le, err) + r.m.pendingLock.Unlock() return } else { r.m.logger.Error("failed to revoke lease", "lease_id", r.leaseID, "error", err,