From 8d72b10a875dc3cd06695ffe9d7112fe48d62fbb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Jare=C5=A1?= Date: Mon, 24 Apr 2023 10:32:44 +0200 Subject: [PATCH] Fix hangdump running into crashdump (#4378) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Jakub Jareš Co-authored-by: Amaury Levé --- .../BlameCollector.cs | 12 +- .../NetClientCrashDumper.cs | 2 +- .../ProcDumpDumper.cs | 110 +++++++++++++++--- .../BlameCollectorTests.cs | 12 +- 4 files changed, 107 insertions(+), 29 deletions(-) diff --git a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/BlameCollector.cs b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/BlameCollector.cs index 02c9e97215..a8dd7014cc 100644 --- a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/BlameCollector.cs +++ b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/BlameCollector.cs @@ -245,22 +245,21 @@ private void CollectDumpAndAbortTesthost() if (_collectProcessDumpOnCrash) { - // Detach procdump from the testhost process to prevent testhost process from crashing - // if/when we try to kill the existing proc dump process. - // And also prevent collecting dump on exit of the process. + // Detach the dumper from the testhost process to prevent crashing testhost process. When the dumper is procdump.exe + // it must be detached before we try to dump the process, and simply killing it would take down the testhost process. + // + // Detaching also prevents creating an extra dump at the exit of the testhost process. _processDumpUtility.DetachFromTargetProcess(_testHostProcessId); } // Skip creating the dump if the option is set to none, and just kill the process. if ((_hangDumpType ?? HangDumpType.Full) != HangDumpType.None) { - var hangDumpSuccess = false; try { Action logWarning = m => _logger.LogWarning(_context.SessionDataCollectionContext, m); var dumpDirectory = GetDumpDirectory(); _processDumpUtility.StartHangBasedProcessDump(_testHostProcessId, dumpDirectory, _hangDumpType == HangDumpType.Full, _targetFramework!, logWarning); - hangDumpSuccess = true; } catch (Exception ex) { @@ -271,7 +270,8 @@ private void CollectDumpAndAbortTesthost() { try { - var dumpFiles = _processDumpUtility.GetDumpFiles(true, /* if we killed it by hang dumper, we already have our dump, otherwise it might have crashed, and we want all dumps */ !hangDumpSuccess); + var dumpFiles = _processDumpUtility.GetDumpFiles(true, + true /* Get all dumps that there are, if crashdump was created before we hangdumped, get it. It probably has interesting info. */); foreach (var dumpFile in dumpFiles) { try diff --git a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientCrashDumper.cs b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientCrashDumper.cs index 0fe0358e29..333a123a21 100644 --- a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientCrashDumper.cs +++ b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/NetClientCrashDumper.cs @@ -31,7 +31,7 @@ public void DetachFromTargetProcess(int processId) // here we might consider renaming the files to have timestamp } - public IEnumerable GetDumpFiles(bool processCrashed) + public IEnumerable GetDumpFiles(bool _) { return _fileHelper.DirectoryExists(_outputDirectory) ? _fileHelper.GetFiles(_outputDirectory, "*_crashdump*.dmp", SearchOption.AllDirectories) diff --git a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/ProcDumpDumper.cs b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/ProcDumpDumper.cs index 4436f79ff1..8aef1bdd46 100644 --- a/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/ProcDumpDumper.cs +++ b/src/Microsoft.TestPlatform.Extensions.BlameDataCollector/ProcDumpDumper.cs @@ -4,10 +4,11 @@ using System; using System.Collections.Generic; using System.Diagnostics; -using System.Diagnostics.CodeAnalysis; using System.Globalization; using System.IO; using System.Linq; +using System.Threading; +using System.Threading.Tasks; using Microsoft.VisualStudio.TestPlatform.CoreUtilities.Helpers; using Microsoft.VisualStudio.TestPlatform.Execution; @@ -32,6 +33,7 @@ public class ProcDumpDumper : ICrashDumper, IHangDumper private readonly IFileHelper _fileHelper; private readonly IEnvironment _environment; private readonly IEnvironmentVariableHelper _environmentVariableHelper; + private string? _procDumpPath; private Process? _procDumpProcess; private string? _tempDirectory; private string? _dumpFileName; @@ -39,6 +41,8 @@ public class ProcDumpDumper : ICrashDumper, IHangDumper private string? _outputDirectory; private Process? _process; private string? _outputFilePrefix; + private bool _isCrashDumpInProgress; + private readonly int _timeout = EnvironmentHelper.GetConnectionTimeout() * 1000; private readonly ProcDumpExecutableHelper _procDumpExecutableHelper; public ProcDumpDumper() @@ -63,25 +67,47 @@ internal ProcDumpDumper(IProcessHelper processHelper, IFileHelper fileHelper, IE _procDumpExecutableHelper = new ProcDumpExecutableHelper(processHelper, fileHelper, environment, environmentVariableHelper); } - [SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Part of the public API")] protected Action OutputReceivedCallback => (process, data) => - // useful for visibility when debugging this tool - // Console.ForegroundColor = ConsoleColor.Cyan; - // Console.WriteLine(data); - // Console.ForegroundColor = ConsoleColor.White; - // Log all standard output message of procdump in diag files. - // Otherwise they end up coming on console in pipleine. + { EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process: {data ?? ""}"); + // This is what procdump writes to the output when it is creating a crash dump. When hangdump triggers while we are writing a crash dump + // we probably don't want to cancel, because that crashdump probably has the more interesting info. + // [16:06:59] Dump 1 initiated: + // [16:07:00] Dump 1 writing: Estimated dump file size is 11034 MB. + // [16:07:09] Dump 1 complete: 11034 MB written in 10.1 seconds + // We also want to know when we completed writing a dump (and not just set _isCrashDumpInProgress once), because dumpcount larger than 1 + // can be provided externally and then the first dump would prevent hangdump forever from stopping the process, but the not every dump is crashing the process + // so we would run forever. + // + // Yes the two ifs below depend on the content being in english, and containg those words (which is the case for procdump from 2017 till 2023 at least), + // if we get different language it should not break us, we will just cancel more aggressively (unfortunately). + if (data != null && data.Contains("initiated")) + { + EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process contains 'initiated', crashdump is being written. Don't cancel procdump right now."); + _isCrashDumpInProgress = true; + } + + if (data != null && data.Contains("complete")) + { + EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process contains 'complete' dump is finished, you can cancel procdump if you need."); + _isCrashDumpInProgress = false; + } + }; + + internal static Action ErrorReceivedCallback => (process, data) => + EqtTrace.Error($"ProcDumpDumper.ErrorReceivedCallback: Error received from procdump process: {data ?? ""}"); + /// public void WaitForDumpToFinish() { - if (_processHelper == null) + if (_procDumpProcess == null) { EqtTrace.Info($"ProcDumpDumper.WaitForDumpToFinish: ProcDump was not previously attached, this might indicate error during setup, look for ProcDumpDumper.AttachToTargetProcess."); + return; } - _processHelper?.WaitForProcessExit(_procDumpProcess); + _processHelper.WaitForProcessExit(_procDumpProcess); } /// @@ -118,12 +144,13 @@ public void AttachToTargetProcess(int processId, string outputDirectory, DumpTyp isFullDump: dumpType == DumpTypeOption.Full); EqtTrace.Info($"ProcDumpDumper.AttachToTargetProcess: Running ProcDump with arguments: '{procDumpArgs}'."); + _procDumpPath = procDumpPath; _procDumpProcess = (Process)_processHelper.LaunchProcess( procDumpPath, procDumpArgs, _tempDirectory, null, - null, + ErrorReceivedCallback, null, OutputReceivedCallback); @@ -133,27 +160,78 @@ public void AttachToTargetProcess(int processId, string outputDirectory, DumpTyp /// public void DetachFromTargetProcess(int targetProcessId) { - if (_procDumpProcess == null) + if (_procDumpProcess == null || _procDumpPath == null) { EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump was not previously attached, this might indicate error during setup, look for ProcDumpDumper.AttachToTargetProcess."); return; } + if (_procDumpProcess.HasExited) + { + EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump already exited, returning."); + return; + } + try { + if (_isCrashDumpInProgress) + { + EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump is currently dumping process '{targetProcessId}', wait at most {_timeout} ms for it to finish so we get the crashdump."); + var procDumpExit = Task.Run(() => _procDumpProcess.WaitForExit(_timeout)); + // Could do this better with completion source, but we have nothing better to do in this process anyway, + // than wait for the crashdump to finish. + while (_isCrashDumpInProgress && !procDumpExit.IsCompleted) + { + // The timeout is driven by VSTEST_CONNECTION_TIMEOUT which is specified in seconds so it cannot be less than 1000ms. + // (Technically it can be 0, but that will fail way before we ever reach here.) + Thread.Sleep(500); + EqtTrace.Verbose($"ProcDumpDumper.DetachFromTargetProcess: Waiting for procdump to finish dumping the process."); + } + + if (procDumpExit.IsCompleted && procDumpExit.Result == false) + { + EqtTrace.Verbose($"ProcDumpDumper.DetachFromTargetProcess: Procdump did not exit after {_timeout} ms."); + } + } + + if (_procDumpProcess.HasExited) + { + EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump already exited, returning."); + return; + } + EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump detaching from target process '{targetProcessId}'."); + // Alternative to sending this event is calling Procdump -cancel (the dumped process id, not the existing Procdump.exe process id). + // But not all versions of procdump have that parameter (definitely not the one we are getting from the Procdump 0.0.1 nuget package), and it works reliably. + // What was not reliable before was that we sent the message and immediately killed procdump, that caused testhost to crash occasionally, because procdump was not detached, + // and killing the process when it is not detached takes the observed process with it. new Win32NamedEvent($"Procdump-{targetProcessId}").Set(); + EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: Cancel event was sent to Procdump."); + + var sw = Stopwatch.StartNew(); + var exited = _procDumpProcess.WaitForExit(_timeout); + if (exited) + { + EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump cancelled after {sw.ElapsedMilliseconds} ms."); + } + else + { + EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump cancellation timed out, after {sw.ElapsedMilliseconds} ms."); + } } finally { try { - EqtTrace.Info("ProcDumpDumper.DetachFromTargetProcess: Attempting to kill proc dump process."); - _processHelper.TerminateProcess(_procDumpProcess); + if (!_procDumpProcess.HasExited) + { + EqtTrace.Info("ProcDumpDumper.DetachFromTargetProcess: Procdump process is still running after cancellation, force killing it. This will probably take down the process it is attached to as well."); + _processHelper.TerminateProcess(_procDumpProcess); + } } catch (Exception e) { - EqtTrace.Warning($"ProcDumpDumper.DetachFromTargetProcess: Failed to kill proc dump process with exception {e}"); + EqtTrace.Warning($"ProcDumpDumper.DetachFromTargetProcess: Failed to kill procdump process with exception {e}"); } } } @@ -241,7 +319,7 @@ public void Dump(int processId, string outputDirectory, DumpTypeOption dumpType) procDumpArgs, tempDirectory, null, - null, + ErrorReceivedCallback, null, OutputReceivedCallback); diff --git a/test/Microsoft.TestPlatform.Extensions.BlameDataCollector.UnitTests/BlameCollectorTests.cs b/test/Microsoft.TestPlatform.Extensions.BlameDataCollector.UnitTests/BlameCollectorTests.cs index 3e7b6d8003..753b705c13 100644 --- a/test/Microsoft.TestPlatform.Extensions.BlameDataCollector.UnitTests/BlameCollectorTests.cs +++ b/test/Microsoft.TestPlatform.Extensions.BlameDataCollector.UnitTests/BlameCollectorTests.cs @@ -175,7 +175,7 @@ public void InitializeWithDumpForHangShouldCaptureADumpOnTimeout() _mockFileHelper.Setup(x => x.Exists(It.Is(y => y == "abc_hang.dmp"))).Returns(true); _mockFileHelper.Setup(x => x.GetFullPath(It.Is(y => y == "abc_hang.dmp"))).Returns("abc_hang.dmp"); _mockProcessDumpUtility.Setup(x => x.StartHangBasedProcessDump(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny>())); - _mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, false)).Returns(new[] { dumpFile }); + _mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, It.IsAny())).Returns(new[] { dumpFile }); _mockDataCollectionSink.Setup(x => x.SendFileAsync(It.IsAny())).Callback(() => hangBasedDumpcollected.Set()); _blameDataCollector.Initialize( @@ -187,7 +187,7 @@ public void InitializeWithDumpForHangShouldCaptureADumpOnTimeout() hangBasedDumpcollected.Wait(1000); _mockProcessDumpUtility.Verify(x => x.StartHangBasedProcessDump(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny>()), Times.Once); - _mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, false), Times.Once); + _mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, It.IsAny()), Times.Once); _mockDataCollectionSink.Verify(x => x.SendFileAsync(It.Is(y => y.Path == dumpFile)), Times.Once); } @@ -210,7 +210,7 @@ public void InitializeWithDumpForHangShouldCaptureKillTestHostOnTimeoutEvenIfGet _mockFileHelper.Setup(x => x.Exists(It.Is(y => y == "abc_hang.dmp"))).Returns(true); _mockFileHelper.Setup(x => x.GetFullPath(It.Is(y => y == "abc_hang.dmp"))).Returns("abc_hang.dmp"); _mockProcessDumpUtility.Setup(x => x.StartHangBasedProcessDump(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny>())); - _mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, false)).Callback(() => hangBasedDumpcollected.Set()).Throws(new Exception("Some exception")); + _mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, It.IsAny())).Callback(() => hangBasedDumpcollected.Set()).Throws(new Exception("Some exception")); _blameDataCollector.Initialize( GetDumpConfigurationElement(false, false, true, 0), @@ -221,7 +221,7 @@ public void InitializeWithDumpForHangShouldCaptureKillTestHostOnTimeoutEvenIfGet hangBasedDumpcollected.Wait(1000); _mockProcessDumpUtility.Verify(x => x.StartHangBasedProcessDump(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny>()), Times.Once); - _mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, false), Times.Once); + _mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, It.IsAny()), Times.Once); } /// @@ -244,7 +244,7 @@ public void InitializeWithDumpForHangShouldCaptureKillTestHostOnTimeoutEvenIfAtt _mockFileHelper.Setup(x => x.Exists(It.Is(y => y == "abc_hang.dmp"))).Returns(true); _mockFileHelper.Setup(x => x.GetFullPath(It.Is(y => y == "abc_hang.dmp"))).Returns("abc_hang.dmp"); _mockProcessDumpUtility.Setup(x => x.StartHangBasedProcessDump(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny>())); - _mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, false)).Returns(new[] { dumpFile }); + _mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, It.IsAny())).Returns(new[] { dumpFile }); _mockDataCollectionSink.Setup(x => x.SendFileAsync(It.IsAny())).Callback(() => hangBasedDumpcollected.Set()).Throws(new Exception("Some other exception")); _blameDataCollector.Initialize( @@ -256,7 +256,7 @@ public void InitializeWithDumpForHangShouldCaptureKillTestHostOnTimeoutEvenIfAtt hangBasedDumpcollected.Wait(1000); _mockProcessDumpUtility.Verify(x => x.StartHangBasedProcessDump(It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny(), It.IsAny>()), Times.Once); - _mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, false), Times.Once); + _mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, It.IsAny()), Times.Once); _mockDataCollectionSink.Verify(x => x.SendFileAsync(It.Is(y => y.Path == dumpFile)), Times.Once); }