Skip to content

Commit

Permalink
Ignore errors from rollback manager invocations (#22235) (#22238)
Browse files Browse the repository at this point in the history
* Ignore errors from rollback manager invocations

During reload and mount move operations, we want to ensure that errors
created by the final Rollback are not fatal (which risk failing
replication in Enterprise when the core/mounts table gets invalidated).
This mirrors the behavior of the periodic rollback manager, which
only logs the error.

This updates the noop backend to allow failing just rollback operations,
which we can use in tests to verify this behavior and ensure the core
operations (plugin reload, plugin move, and seal/unseal) are not broken
by this. Note that most of these operations were asynchronous from the
client's PoV and thus did not fail anyways prior to this change.



* Add changelog entry



* Update vault/external_tests/router/router_ext_test.go



---------

Signed-off-by: Alexander Scheel <alex.scheel@hashicorp.com>
Co-authored-by: Alexander Scheel <alex.scheel@hashicorp.com>
Co-authored-by: Nick Cabatoff <ncabatoff@hashicorp.com>
  • Loading branch information
3 people committed Aug 16, 2023
1 parent 7d1f7fb commit c11bc72
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 4 deletions.
3 changes: 3 additions & 0 deletions changelog/22235.txt
@@ -0,0 +1,3 @@
```release-note:improvement
core: Log rollback manager failures during unmount, remount to prevent replication failures on secondary clusters.
```
38 changes: 38 additions & 0 deletions vault/external_tests/router/router_ext_test.go
Expand Up @@ -68,3 +68,41 @@ func testRouter_MountSubpath(t *testing.T, mountPoints []string) {
cluster.UnsealCores(t)
t.Logf("Done: %#v", mountPoints)
}

func TestRouter_UnmountRollbackIsntFatal(t *testing.T) {
coreConfig := &vault.CoreConfig{
LogicalBackends: map[string]logical.Factory{
"noop": vault.NoopBackendRollbackErrFactory,
},
}
cluster := vault.NewTestCluster(t, coreConfig, &vault.TestClusterOptions{
HandlerFunc: vaulthttp.Handler,
})
cluster.Start()
defer cluster.Cleanup()

vault.TestWaitActive(t, cluster.Cores[0].Core)
client := cluster.Cores[0].Client

if err := client.Sys().Mount("noop", &api.MountInput{
Type: "noop",
}); err != nil {
t.Fatalf("failed to mount PKI: %v", err)
}

if _, err := client.Logical().Write("sys/plugins/reload/backend", map[string]interface{}{
"mounts": "noop",
}); err != nil {
t.Fatalf("expected reload of noop with broken periodic func to succeed; got err=%v", err)
}

if _, err := client.Logical().Write("sys/remount", map[string]interface{}{
"from": "noop",
"to": "noop-to",
}); err != nil {
t.Fatalf("expected remount of noop with broken periodic func to succeed; got err=%v", err)
}

cluster.EnsureCoresSealed(t)
cluster.UnsealCores(t)
}
16 changes: 12 additions & 4 deletions vault/mount.go
Expand Up @@ -874,9 +874,13 @@ func (c *Core) unmountInternal(ctx context.Context, path string, updateStorage b

rCtx := namespace.ContextWithNamespace(c.activeContext, ns)
if backend != nil && c.rollback != nil {
// Invoke the rollback manager a final time
// Invoke the rollback manager a final time. This is not fatal as
// various periodic funcs (e.g., PKI) can legitimately error; the
// periodic rollback manager logs these errors rather than failing
// replication like returning this error would do.
if err := c.rollback.Rollback(rCtx, path); err != nil {
return err
c.logger.Error("ignoring rollback error during unmount", "error", err, "path", path)
err = nil
}
}
if backend != nil && c.expiration != nil && updateStorage {
Expand Down Expand Up @@ -1142,11 +1146,15 @@ func (c *Core) remountSecretsEngine(ctx context.Context, src, dst namespace.Moun
}

if !c.IsDRSecondary() {
// Invoke the rollback manager a final time
// Invoke the rollback manager a final time. This is not fatal as
// various periodic funcs (e.g., PKI) can legitimately error; the
// periodic rollback manager logs these errors rather than failing
// replication like returning this error would do.
rCtx := namespace.ContextWithNamespace(c.activeContext, ns)
if c.rollback != nil && c.router.MatchingBackend(ctx, srcRelativePath) != nil {
if err := c.rollback.Rollback(rCtx, srcRelativePath); err != nil {
return err
c.logger.Error("ignoring rollback error during remount", "error", err, "path", src.Namespace.Path+src.MountPath)
err = nil
}
}

Expand Down
10 changes: 10 additions & 0 deletions vault/router_testing.go
Expand Up @@ -29,17 +29,27 @@ type NoopBackend struct {
DefaultLeaseTTL time.Duration
MaxLeaseTTL time.Duration
BackendType logical.BackendType

RollbackErrs bool
}

func NoopBackendFactory(_ context.Context, _ *logical.BackendConfig) (logical.Backend, error) {
return &NoopBackend{}, nil
}

func NoopBackendRollbackErrFactory(_ context.Context, _ *logical.BackendConfig) (logical.Backend, error) {
return &NoopBackend{RollbackErrs: true}, nil
}

func (n *NoopBackend) HandleRequest(ctx context.Context, req *logical.Request) (*logical.Response, error) {
if req.TokenEntry() != nil {
panic("got a non-nil TokenEntry")
}

if n.RollbackErrs && req.Operation == "rollback" {
return nil, fmt.Errorf("no-op backend rollback has erred out")
}

var err error
resp := n.Response
if n.RequestHandler != nil {
Expand Down

0 comments on commit c11bc72

Please sign in to comment.