Skip to content

Commit

Permalink
Fix hangdump running into crashdump (microsoft#4378)
Browse files Browse the repository at this point in the history
Co-authored-by: Jakub Jareš <me@jakubjares.com>
Co-authored-by: Amaury Levé <amaury.leve@gmail.com>
  • Loading branch information
nohwnd and Evangelink committed May 25, 2023
1 parent 0e34e21 commit fc10885
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 29 deletions.
Expand Up @@ -245,22 +245,21 @@ private void CollectDumpAndAbortTesthost()

if (_collectProcessDumpOnCrash)
{
// Detach procdump from the testhost process to prevent testhost process from crashing
// if/when we try to kill the existing proc dump process.
// And also prevent collecting dump on exit of the process.
// Detach the dumper from the testhost process to prevent crashing testhost process. When the dumper is procdump.exe
// it must be detached before we try to dump the process, and simply killing it would take down the testhost process.
//
// Detaching also prevents creating an extra dump at the exit of the testhost process.
_processDumpUtility.DetachFromTargetProcess(_testHostProcessId);
}

// Skip creating the dump if the option is set to none, and just kill the process.
if ((_hangDumpType ?? HangDumpType.Full) != HangDumpType.None)
{
var hangDumpSuccess = false;
try
{
Action<string> logWarning = m => _logger.LogWarning(_context.SessionDataCollectionContext, m);
var dumpDirectory = GetDumpDirectory();
_processDumpUtility.StartHangBasedProcessDump(_testHostProcessId, dumpDirectory, _hangDumpType == HangDumpType.Full, _targetFramework!, logWarning);
hangDumpSuccess = true;
}
catch (Exception ex)
{
Expand All @@ -271,7 +270,8 @@ private void CollectDumpAndAbortTesthost()
{
try
{
var dumpFiles = _processDumpUtility.GetDumpFiles(true, /* if we killed it by hang dumper, we already have our dump, otherwise it might have crashed, and we want all dumps */ !hangDumpSuccess);
var dumpFiles = _processDumpUtility.GetDumpFiles(true,
true /* Get all dumps that there are, if crashdump was created before we hangdumped, get it. It probably has interesting info. */);
foreach (var dumpFile in dumpFiles)
{
try
Expand Down
Expand Up @@ -31,7 +31,7 @@ public void DetachFromTargetProcess(int processId)
// here we might consider renaming the files to have timestamp
}

public IEnumerable<string> GetDumpFiles(bool processCrashed)
public IEnumerable<string> GetDumpFiles(bool _)
{
return _fileHelper.DirectoryExists(_outputDirectory)
? _fileHelper.GetFiles(_outputDirectory, "*_crashdump*.dmp", SearchOption.AllDirectories)
Expand Down
Expand Up @@ -4,10 +4,11 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;

using Microsoft.VisualStudio.TestPlatform.CoreUtilities.Helpers;
using Microsoft.VisualStudio.TestPlatform.Execution;
Expand All @@ -32,13 +33,16 @@ public class ProcDumpDumper : ICrashDumper, IHangDumper
private readonly IFileHelper _fileHelper;
private readonly IEnvironment _environment;
private readonly IEnvironmentVariableHelper _environmentVariableHelper;
private string? _procDumpPath;
private Process? _procDumpProcess;
private string? _tempDirectory;
private string? _dumpFileName;
private bool _collectAlways;
private string? _outputDirectory;
private Process? _process;
private string? _outputFilePrefix;
private bool _isCrashDumpInProgress;
private readonly int _timeout = EnvironmentHelper.GetConnectionTimeout() * 1000;
private readonly ProcDumpExecutableHelper _procDumpExecutableHelper;

public ProcDumpDumper()
Expand All @@ -63,25 +67,47 @@ internal ProcDumpDumper(IProcessHelper processHelper, IFileHelper fileHelper, IE
_procDumpExecutableHelper = new ProcDumpExecutableHelper(processHelper, fileHelper, environment, environmentVariableHelper);
}

[SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Part of the public API")]
protected Action<object?, string?> OutputReceivedCallback => (process, data) =>
// useful for visibility when debugging this tool
// Console.ForegroundColor = ConsoleColor.Cyan;
// Console.WriteLine(data);
// Console.ForegroundColor = ConsoleColor.White;
// Log all standard output message of procdump in diag files.
// Otherwise they end up coming on console in pipleine.
{
EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process: {data ?? "<null>"}");
// This is what procdump writes to the output when it is creating a crash dump. When hangdump triggers while we are writing a crash dump
// we probably don't want to cancel, because that crashdump probably has the more interesting info.
// [16:06:59] Dump 1 initiated: <path>
// [16:07:00] Dump 1 writing: Estimated dump file size is 11034 MB.
// [16:07:09] Dump 1 complete: 11034 MB written in 10.1 seconds
// We also want to know when we completed writing a dump (and not just set _isCrashDumpInProgress once), because dumpcount larger than 1
// can be provided externally and then the first dump would prevent hangdump forever from stopping the process, but the not every dump is crashing the process
// so we would run forever.
//
// Yes the two ifs below depend on the content being in english, and containg those words (which is the case for procdump from 2017 till 2023 at least),
// if we get different language it should not break us, we will just cancel more aggressively (unfortunately).
if (data != null && data.Contains("initiated"))
{
EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process contains 'initiated', crashdump is being written. Don't cancel procdump right now.");
_isCrashDumpInProgress = true;
}
if (data != null && data.Contains("complete"))
{
EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process contains 'complete' dump is finished, you can cancel procdump if you need.");
_isCrashDumpInProgress = false;
}
};

internal static Action<object?, string?> ErrorReceivedCallback => (process, data) =>
EqtTrace.Error($"ProcDumpDumper.ErrorReceivedCallback: Error received from procdump process: {data ?? "<null>"}");

/// <inheritdoc/>
public void WaitForDumpToFinish()
{
if (_processHelper == null)
if (_procDumpProcess == null)
{
EqtTrace.Info($"ProcDumpDumper.WaitForDumpToFinish: ProcDump was not previously attached, this might indicate error during setup, look for ProcDumpDumper.AttachToTargetProcess.");
return;
}

_processHelper?.WaitForProcessExit(_procDumpProcess);
_processHelper.WaitForProcessExit(_procDumpProcess);
}

/// <inheritdoc/>
Expand Down Expand Up @@ -118,12 +144,13 @@ public void AttachToTargetProcess(int processId, string outputDirectory, DumpTyp
isFullDump: dumpType == DumpTypeOption.Full);

EqtTrace.Info($"ProcDumpDumper.AttachToTargetProcess: Running ProcDump with arguments: '{procDumpArgs}'.");
_procDumpPath = procDumpPath;
_procDumpProcess = (Process)_processHelper.LaunchProcess(
procDumpPath,
procDumpArgs,
_tempDirectory,
null,
null,
ErrorReceivedCallback,
null,
OutputReceivedCallback);

Expand All @@ -133,27 +160,78 @@ public void AttachToTargetProcess(int processId, string outputDirectory, DumpTyp
/// <inheritdoc/>
public void DetachFromTargetProcess(int targetProcessId)
{
if (_procDumpProcess == null)
if (_procDumpProcess == null || _procDumpPath == null)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump was not previously attached, this might indicate error during setup, look for ProcDumpDumper.AttachToTargetProcess.");
return;
}

if (_procDumpProcess.HasExited)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump already exited, returning.");
return;
}

try
{
if (_isCrashDumpInProgress)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump is currently dumping process '{targetProcessId}', wait at most {_timeout} ms for it to finish so we get the crashdump.");
var procDumpExit = Task.Run(() => _procDumpProcess.WaitForExit(_timeout));
// Could do this better with completion source, but we have nothing better to do in this process anyway,
// than wait for the crashdump to finish.
while (_isCrashDumpInProgress && !procDumpExit.IsCompleted)
{
// The timeout is driven by VSTEST_CONNECTION_TIMEOUT which is specified in seconds so it cannot be less than 1000ms.
// (Technically it can be 0, but that will fail way before we ever reach here.)
Thread.Sleep(500);
EqtTrace.Verbose($"ProcDumpDumper.DetachFromTargetProcess: Waiting for procdump to finish dumping the process.");
}

if (procDumpExit.IsCompleted && procDumpExit.Result == false)
{
EqtTrace.Verbose($"ProcDumpDumper.DetachFromTargetProcess: Procdump did not exit after {_timeout} ms.");
}
}

if (_procDumpProcess.HasExited)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump already exited, returning.");
return;
}

EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump detaching from target process '{targetProcessId}'.");
// Alternative to sending this event is calling Procdump -cancel <targetProcessId> (the dumped process id, not the existing Procdump.exe process id).
// But not all versions of procdump have that parameter (definitely not the one we are getting from the Procdump 0.0.1 nuget package), and it works reliably.
// What was not reliable before was that we sent the message and immediately killed procdump, that caused testhost to crash occasionally, because procdump was not detached,
// and killing the process when it is not detached takes the observed process with it.
new Win32NamedEvent($"Procdump-{targetProcessId}").Set();
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: Cancel event was sent to Procdump.");

var sw = Stopwatch.StartNew();
var exited = _procDumpProcess.WaitForExit(_timeout);
if (exited)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump cancelled after {sw.ElapsedMilliseconds} ms.");
}
else
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump cancellation timed out, after {sw.ElapsedMilliseconds} ms.");
}
}
finally
{
try
{
EqtTrace.Info("ProcDumpDumper.DetachFromTargetProcess: Attempting to kill proc dump process.");
_processHelper.TerminateProcess(_procDumpProcess);
if (!_procDumpProcess.HasExited)
{
EqtTrace.Info("ProcDumpDumper.DetachFromTargetProcess: Procdump process is still running after cancellation, force killing it. This will probably take down the process it is attached to as well.");
_processHelper.TerminateProcess(_procDumpProcess);
}
}
catch (Exception e)
{
EqtTrace.Warning($"ProcDumpDumper.DetachFromTargetProcess: Failed to kill proc dump process with exception {e}");
EqtTrace.Warning($"ProcDumpDumper.DetachFromTargetProcess: Failed to kill procdump process with exception {e}");
}
}
}
Expand Down Expand Up @@ -241,7 +319,7 @@ public void Dump(int processId, string outputDirectory, DumpTypeOption dumpType)
procDumpArgs,
tempDirectory,
null,
null,
ErrorReceivedCallback,
null,
OutputReceivedCallback);

Expand Down
Expand Up @@ -175,7 +175,7 @@ public void InitializeWithDumpForHangShouldCaptureADumpOnTimeout()
_mockFileHelper.Setup(x => x.Exists(It.Is<string>(y => y == "abc_hang.dmp"))).Returns(true);
_mockFileHelper.Setup(x => x.GetFullPath(It.Is<string>(y => y == "abc_hang.dmp"))).Returns("abc_hang.dmp");
_mockProcessDumpUtility.Setup(x => x.StartHangBasedProcessDump(It.IsAny<int>(), It.IsAny<string>(), It.IsAny<bool>(), It.IsAny<string>(), It.IsAny<Action<string>>()));
_mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, false)).Returns(new[] { dumpFile });
_mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, It.IsAny<bool>())).Returns(new[] { dumpFile });
_mockDataCollectionSink.Setup(x => x.SendFileAsync(It.IsAny<FileTransferInformation>())).Callback(() => hangBasedDumpcollected.Set());

_blameDataCollector.Initialize(
Expand All @@ -187,7 +187,7 @@ public void InitializeWithDumpForHangShouldCaptureADumpOnTimeout()

hangBasedDumpcollected.Wait(1000);
_mockProcessDumpUtility.Verify(x => x.StartHangBasedProcessDump(It.IsAny<int>(), It.IsAny<string>(), It.IsAny<bool>(), It.IsAny<string>(), It.IsAny<Action<string>>()), Times.Once);
_mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, false), Times.Once);
_mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, It.IsAny<bool>()), Times.Once);
_mockDataCollectionSink.Verify(x => x.SendFileAsync(It.Is<FileTransferInformation>(y => y.Path == dumpFile)), Times.Once);
}

Expand All @@ -210,7 +210,7 @@ public void InitializeWithDumpForHangShouldCaptureKillTestHostOnTimeoutEvenIfGet
_mockFileHelper.Setup(x => x.Exists(It.Is<string>(y => y == "abc_hang.dmp"))).Returns(true);
_mockFileHelper.Setup(x => x.GetFullPath(It.Is<string>(y => y == "abc_hang.dmp"))).Returns("abc_hang.dmp");
_mockProcessDumpUtility.Setup(x => x.StartHangBasedProcessDump(It.IsAny<int>(), It.IsAny<string>(), It.IsAny<bool>(), It.IsAny<string>(), It.IsAny<Action<string>>()));
_mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, false)).Callback(() => hangBasedDumpcollected.Set()).Throws(new Exception("Some exception"));
_mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, It.IsAny<bool>())).Callback(() => hangBasedDumpcollected.Set()).Throws(new Exception("Some exception"));

_blameDataCollector.Initialize(
GetDumpConfigurationElement(false, false, true, 0),
Expand All @@ -221,7 +221,7 @@ public void InitializeWithDumpForHangShouldCaptureKillTestHostOnTimeoutEvenIfGet

hangBasedDumpcollected.Wait(1000);
_mockProcessDumpUtility.Verify(x => x.StartHangBasedProcessDump(It.IsAny<int>(), It.IsAny<string>(), It.IsAny<bool>(), It.IsAny<string>(), It.IsAny<Action<string>>()), Times.Once);
_mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, false), Times.Once);
_mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, It.IsAny<bool>()), Times.Once);
}

/// <summary>
Expand All @@ -244,7 +244,7 @@ public void InitializeWithDumpForHangShouldCaptureKillTestHostOnTimeoutEvenIfAtt
_mockFileHelper.Setup(x => x.Exists(It.Is<string>(y => y == "abc_hang.dmp"))).Returns(true);
_mockFileHelper.Setup(x => x.GetFullPath(It.Is<string>(y => y == "abc_hang.dmp"))).Returns("abc_hang.dmp");
_mockProcessDumpUtility.Setup(x => x.StartHangBasedProcessDump(It.IsAny<int>(), It.IsAny<string>(), It.IsAny<bool>(), It.IsAny<string>(), It.IsAny<Action<string>>()));
_mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, false)).Returns(new[] { dumpFile });
_mockProcessDumpUtility.Setup(x => x.GetDumpFiles(true, It.IsAny<bool>())).Returns(new[] { dumpFile });
_mockDataCollectionSink.Setup(x => x.SendFileAsync(It.IsAny<FileTransferInformation>())).Callback(() => hangBasedDumpcollected.Set()).Throws(new Exception("Some other exception"));

_blameDataCollector.Initialize(
Expand All @@ -256,7 +256,7 @@ public void InitializeWithDumpForHangShouldCaptureKillTestHostOnTimeoutEvenIfAtt

hangBasedDumpcollected.Wait(1000);
_mockProcessDumpUtility.Verify(x => x.StartHangBasedProcessDump(It.IsAny<int>(), It.IsAny<string>(), It.IsAny<bool>(), It.IsAny<string>(), It.IsAny<Action<string>>()), Times.Once);
_mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, false), Times.Once);
_mockProcessDumpUtility.Verify(x => x.GetDumpFiles(true, It.IsAny<bool>()), Times.Once);
_mockDataCollectionSink.Verify(x => x.SendFileAsync(It.Is<FileTransferInformation>(y => y.Path == dumpFile)), Times.Once);
}

Expand Down

0 comments on commit fc10885

Please sign in to comment.