From 3ffa0c00594205c2c623aa9493847217982b558d Mon Sep 17 00:00:00 2001 From: Prashant Varanasi Date: Fri, 12 Apr 2024 19:59:00 -0700 Subject: [PATCH] Fix deadlock when Stop and flush race (#1430) Fixes #1428. Stop signals the flush loop to end, but if the flush ticker has fired after we took the lock, then it tries to `Sync`, and waits for the same lock that `Stop` is holding. This causes a deadlock, as `Stop` holds the lock waiting for flush to end. Fix by waiting for the flush loop to end outside of the critical section. We only need to wait (and call Sync) if the write syncer has been initialized and stopped. --- zapcore/buffered_write_syncer.go | 23 ++++++++++++----------- zapcore/buffered_write_syncer_test.go | 8 ++++++++ 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/zapcore/buffered_write_syncer.go b/zapcore/buffered_write_syncer.go index a40e93b3e..4b426a564 100644 --- a/zapcore/buffered_write_syncer.go +++ b/zapcore/buffered_write_syncer.go @@ -188,32 +188,33 @@ func (s *BufferedWriteSyncer) flushLoop() { // Stop closes the buffer, cleans up background goroutines, and flushes // remaining unwritten data. func (s *BufferedWriteSyncer) Stop() (err error) { - var stopped bool - // Critical section. - func() { + stopped := func() bool { s.mu.Lock() defer s.mu.Unlock() if !s.initialized { - return + return false } - stopped = s.stopped - if stopped { - return + if s.stopped { + return false } s.stopped = true s.ticker.Stop() close(s.stop) // tell flushLoop to stop - <-s.done // and wait until it has + return true }() - // Don't call Sync on consecutive Stops. + // Not initialized, or already stopped, no need for any cleanup. if !stopped { - err = s.Sync() + return } - return err + // Wait for flushLoop to end outside of the lock, as it may need the lock to complete. + // See https://github.com/uber-go/zap/issues/1428 for details. + <-s.done + + return s.Sync() } diff --git a/zapcore/buffered_write_syncer_test.go b/zapcore/buffered_write_syncer_test.go index d0f6037af..297a6c20b 100644 --- a/zapcore/buffered_write_syncer_test.go +++ b/zapcore/buffered_write_syncer_test.go @@ -53,6 +53,14 @@ func TestBufferWriter(t *testing.T) { assert.Equal(t, "foo", buf.String(), "Unexpected log string") }) + t.Run("stop race with flush", func(t *testing.T) { + buf := &bytes.Buffer{} + ws := &BufferedWriteSyncer{WS: AddSync(buf), FlushInterval: 1} + requireWriteWorks(t, ws) + assert.NoError(t, ws.Stop()) + assert.Equal(t, "foo", buf.String(), "Unexpected log string") + }) + t.Run("stop twice", func(t *testing.T) { ws := &BufferedWriteSyncer{WS: &ztest.FailWriter{}} _, err := ws.Write([]byte("foo"))