Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix hangdump running into crashdump #4378

Merged
merged 12 commits into from Apr 24, 2023
Expand Up @@ -66,6 +66,7 @@ public class BlameCollector : DataCollector, ITestExecutionEnvironmentSpecifier
private bool _uploadDumpFiles;
private string? _tempDirectory;
private string? _monitorPostmortemDumpFolderPath;
private DateTime? _testSessionEndedTimestamp;

/// <summary>
/// Initializes a new instance of the <see cref="BlameCollector"/> class.
Expand Down Expand Up @@ -245,9 +246,10 @@ private void CollectDumpAndAbortTesthost()

if (_collectProcessDumpOnCrash)
{
// Detach procdump from the testhost process to prevent testhost process from crashing
// if/when we try to kill the existing proc dump process.
// And also prevent collecting dump on exit of the process.
// Detach the dumper from the testhost process to prevent crashing testhost process. When the dumper is procdump.exe
// it must be detached before we try to dump the process, and simply killing it would take down the testhost process.
//
// Detaching also prevents creating an extra dump at the exit of the testhost process.
_processDumpUtility.DetachFromTargetProcess(_testHostProcessId);
}

Expand All @@ -271,7 +273,9 @@ private void CollectDumpAndAbortTesthost()
{
try
{
var dumpFiles = _processDumpUtility.GetDumpFiles(true, /* if we killed it by hang dumper, we already have our dump, otherwise it might have crashed, and we want all dumps */ !hangDumpSuccess);
var dumpFiles = _processDumpUtility.GetDumpFiles(true,
/* if we killed it by hang dumper, we already have our dump, otherwise it might have crashed, and we want all dumps */ !hangDumpSuccess,
_testSessionEndedTimestamp);
foreach (var dumpFile in dumpFiles)
{
try
Expand Down Expand Up @@ -499,6 +503,7 @@ private void EventsTestCaseEnd(object? sender, TestCaseEndEventArgs e)
/// <param name="args">SessionEndEventArgs</param>
private void SessionEndedHandler(object? sender, SessionEndEventArgs args)
{
_testSessionEndedTimestamp = DateTime.Now;
Evangelink marked this conversation as resolved.
Show resolved Hide resolved
nohwnd marked this conversation as resolved.
Show resolved Hide resolved
TPDebug.Assert(_testSequence != null && _testObjectDictionary != null && _context != null && _dataCollectionSink != null && _logger != null, "Initialize must be called before calling this method");
ResetInactivityTimer();

Expand Down Expand Up @@ -530,7 +535,7 @@ private void SessionEndedHandler(object? sender, SessionEndEventArgs args)
{
try
{
var dumpFiles = _processDumpUtility.GetDumpFiles(warnOnNoDumpFiles: _collectDumpAlways, processCrashedWhenRunningTests);
var dumpFiles = _processDumpUtility.GetDumpFiles(warnOnNoDumpFiles: _collectDumpAlways, processCrashedWhenRunningTests, _testSessionEndedTimestamp);
foreach (var dumpFile in dumpFiles)
{
if (!dumpFile.IsNullOrEmpty())
Expand Down
Expand Up @@ -14,5 +14,5 @@ public interface ICrashDumper

void DetachFromTargetProcess(int processId);

IEnumerable<string> GetDumpFiles(bool processCrashed);
IEnumerable<string> GetDumpFiles(bool processCrashed, DateTime? testSessionEndTimestamp);
}
Expand Up @@ -16,7 +16,7 @@ public interface IProcessDumpUtility
/// <returns>
/// Path of dump file
/// </returns>
IEnumerable<string> GetDumpFiles(bool warnOnNoDumpFiles, bool processCrashed);
IEnumerable<string> GetDumpFiles(bool warnOnNoDumpFiles, bool processCrashed, DateTime? testSessionEndedTimestamp);

/// <summary>
/// Launch proc dump process
Expand Down
Expand Up @@ -31,7 +31,7 @@ public void DetachFromTargetProcess(int processId)
// here we might consider renaming the files to have timestamp
}

public IEnumerable<string> GetDumpFiles(bool processCrashed)
public IEnumerable<string> GetDumpFiles(bool _, DateTime? _2)
{
return _fileHelper.DirectoryExists(_outputDirectory)
? _fileHelper.GetFiles(_outputDirectory, "*_crashdump*.dmp", SearchOption.AllDirectories)
Expand Down
Expand Up @@ -4,10 +4,11 @@
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.IO;
using System.Linq;
using System.Threading;
using System.Threading.Tasks;

using Microsoft.VisualStudio.TestPlatform.CoreUtilities.Helpers;
using Microsoft.VisualStudio.TestPlatform.Execution;
Expand All @@ -32,13 +33,17 @@ public class ProcDumpDumper : ICrashDumper, IHangDumper
private readonly IFileHelper _fileHelper;
private readonly IEnvironment _environment;
private readonly IEnvironmentVariableHelper _environmentVariableHelper;
private string? _procDumpPath;
private Process? _procDumpProcess;
private string? _tempDirectory;
private string? _dumpFileName;
private bool _collectAlways;
private string? _outputDirectory;
private Process? _process;
private string? _outputFilePrefix;
private bool _isCrashDumpInProgress;
private readonly List<DateTime> _crashDumpTimestamps = new();
private readonly int _timeout = EnvironmentHelper.GetConnectionTimeout() * 1000;
private readonly ProcDumpExecutableHelper _procDumpExecutableHelper;

public ProcDumpDumper()
Expand All @@ -63,25 +68,49 @@ internal ProcDumpDumper(IProcessHelper processHelper, IFileHelper fileHelper, IE
_procDumpExecutableHelper = new ProcDumpExecutableHelper(processHelper, fileHelper, environment, environmentVariableHelper);
}

[SuppressMessage("Performance", "CA1822:Mark members as static", Justification = "Part of the public API")]
protected Action<object?, string?> OutputReceivedCallback => (process, data) =>
// useful for visibility when debugging this tool
// Console.ForegroundColor = ConsoleColor.Cyan;
// Console.WriteLine(data);
// Console.ForegroundColor = ConsoleColor.White;
// Log all standard output message of procdump in diag files.
// Otherwise they end up coming on console in pipleine.
{
EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process: {data ?? "<null>"}");

// This is what procdump writes to the output when it is creating a crash dump. When hangdump triggers while we are writing a crash dump
// we probably don't want to cancel, because that crashdump probably has the more interesting info.
// [16:06:59] Dump 1 initiated: <path>
Evangelink marked this conversation as resolved.
Show resolved Hide resolved
// [16:07:00] Dump 1 writing: Estimated dump file size is 11034 MB.
// [16:07:09] Dump 1 complete: 11034 MB written in 10.1 seconds
// We also want to know when we completed writing a dump (and not just set _isCrashDumpInProgress once), because dumpcount larger than 1
// can be provided externally and then the first dump would prevent hangdump forever from stopping the process, but the not every dump is crashing the process
// so we would run forever.
//
// Yes the two ifs below depend on the content being in english, and containg those words (which is the case for procdump from 2017 till 2023 at least),
// if we get different language it should not break us, we will just cancel more aggressively (unfortunately).
if (data != null && data.Contains("initiated"))
Evangelink marked this conversation as resolved.
Show resolved Hide resolved
{
EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process contains 'initiated', crashdump is being written. Don't cancel procdump right now.");
_isCrashDumpInProgress = true;
}

if (data != null && data.Contains("complete"))
{
EqtTrace.Info($"ProcDumpDumper.OutputReceivedCallback: Output received from procdump process contains 'complete' dump is finished, you can cancel procdump if you need.");
_isCrashDumpInProgress = false;
// Do not reset _crashDumpDetected back to false here. Any detected crash dumping should keep it true, so we don't throw away the dump.
_crashDumpTimestamps.Add(DateTime.Now);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same 2 remarks (UtcNow + IClock) here.

}
};

internal static Action<object?, string?> ErrorReceivedCallback => (process, data) =>
Evangelink marked this conversation as resolved.
Show resolved Hide resolved
nohwnd marked this conversation as resolved.
Show resolved Hide resolved
EqtTrace.Error($"ProcDumpDumper.ErrorReceivedCallback: Error received from procdump process: {data ?? "<null>"}");

/// <inheritdoc/>
public void WaitForDumpToFinish()
{
if (_processHelper == null)
if (_procDumpProcess == null)
{
EqtTrace.Info($"ProcDumpDumper.WaitForDumpToFinish: ProcDump was not previously attached, this might indicate error during setup, look for ProcDumpDumper.AttachToTargetProcess.");
return;
}

_processHelper?.WaitForProcessExit(_procDumpProcess);
_processHelper.WaitForProcessExit(_procDumpProcess);
}

/// <inheritdoc/>
Expand Down Expand Up @@ -118,12 +147,13 @@ public void AttachToTargetProcess(int processId, string outputDirectory, DumpTyp
isFullDump: dumpType == DumpTypeOption.Full);

EqtTrace.Info($"ProcDumpDumper.AttachToTargetProcess: Running ProcDump with arguments: '{procDumpArgs}'.");
_procDumpPath = procDumpPath;
_procDumpProcess = (Process)_processHelper.LaunchProcess(
procDumpPath,
procDumpArgs,
_tempDirectory,
null,
null,
ErrorReceivedCallback,
null,
OutputReceivedCallback);

Expand All @@ -133,32 +163,83 @@ public void AttachToTargetProcess(int processId, string outputDirectory, DumpTyp
/// <inheritdoc/>
public void DetachFromTargetProcess(int targetProcessId)
{
if (_procDumpProcess == null)
if (_procDumpProcess == null || _procDumpPath == null)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump was not previously attached, this might indicate error during setup, look for ProcDumpDumper.AttachToTargetProcess.");
return;
}

if (_procDumpProcess.HasExited)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump already exited, returning.");
return;
}

try
{
if (_isCrashDumpInProgress)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump is currently dumping process '{targetProcessId}', wait at most {_timeout} ms for it to finish so we get the crashdump.");
var procDumpExit = Task.Run(() => _procDumpProcess.WaitForExit(_timeout));
// Could do this better with completion source, but we have nothing better to do in this process anyway,
// than wait for the crashdump to finish.
while (_isCrashDumpInProgress && !procDumpExit.IsCompleted)
{
// The timeout is driven by VSTEST_CONNECTION_TIMEOUT which is specified in seconds so it cannot be less than 1000ms.
// (Technically it can be 0, but that will fail way before we ever reach here.)
Thread.Sleep(500);
EqtTrace.Verbose($"ProcDumpDumper.DetachFromTargetProcess: Waiting for procdump to finish dumping the process.");
}

if (procDumpExit.IsCompleted && procDumpExit.Result == false)
{
EqtTrace.Verbose($"ProcDumpDumper.DetachFromTargetProcess: Procdump did not exit after {_timeout} ms.");
}
}

if (_procDumpProcess.HasExited)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump already exited, returning.");
return;
}

EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump detaching from target process '{targetProcessId}'.");
// Alternative to sending this event is calling Procdump -cancel <targetProcessId> (the dumped process id, not the existing Procdump.exe process id).
// But not all versions of procdump have that parameter (definitely not the one we are getting from the Procdump 0.0.1 nuget package), and it works reliably.
// What was not reliable before was that we sent the message and immediately killed procdump, that caused testhost to crash occasionally, because procdump was not detached,
// and killing the process when it is not detached takes the observed process with it.
new Win32NamedEvent($"Procdump-{targetProcessId}").Set();
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: Cancel event was sent to Procdump.");

var sw = Stopwatch.StartNew();
var exited = _procDumpProcess.WaitForExit(_timeout);
if (exited)
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump cancelled after {sw.ElapsedMilliseconds} ms.");
}
else
{
EqtTrace.Info($"ProcDumpDumper.DetachFromTargetProcess: ProcDump cancellation timed out, after {sw.ElapsedMilliseconds} ms.");
}
}
finally
{
try
{
EqtTrace.Info("ProcDumpDumper.DetachFromTargetProcess: Attempting to kill proc dump process.");
_processHelper.TerminateProcess(_procDumpProcess);
if (!_procDumpProcess.HasExited)
{
EqtTrace.Info("ProcDumpDumper.DetachFromTargetProcess: Procdump process is still running after cancellation, force killing it. This will probably take down the process it is attached to as well.");
_processHelper.TerminateProcess(_procDumpProcess);
}
}
catch (Exception e)
{
EqtTrace.Warning($"ProcDumpDumper.DetachFromTargetProcess: Failed to kill proc dump process with exception {e}");
EqtTrace.Warning($"ProcDumpDumper.DetachFromTargetProcess: Failed to kill procdump process with exception {e}");
}
}
}

public IEnumerable<string> GetDumpFiles(bool processCrashed)
public IEnumerable<string> GetDumpFiles(bool processCrashed, DateTime? testSessionEndedTimestamp)
{
var allDumps = _fileHelper.DirectoryExists(_outputDirectory)
? _fileHelper.GetFiles(_outputDirectory, "*_crashdump*.dmp", SearchOption.AllDirectories)
Expand All @@ -174,7 +255,42 @@ public IEnumerable<string> GetDumpFiles(bool processCrashed)
return allDumps;
}

if (processCrashed)
// When we know there was a crash dump collected, either because we detected it from the procdump output, or because
// we got that info from exit code, don't try to remove the extra crash dump that we generate on process exit.

bool crashDumpDetected = false;
if (_crashDumpTimestamps.Count == 0)
{
// When there are 0 crashdumps detected, we know the detection was false.
EqtTrace.Verbose($"There were 0 {nameof(_crashDumpTimestamps)} crash dump was not detected.");
crashDumpDetected = false;
}
else if (_crashDumpTimestamps.Count > 1)
{
// When there are more than 1 crashdumps detected, we know it was true, and want to keep all dumps.
EqtTrace.Verbose($"There were more than 1 {nameof(_crashDumpTimestamps)} ({_crashDumpTimestamps.Count}), keeping all crashdumps.");
crashDumpDetected = true;
}
// From here downwards we know that we have 1 crash dump.
else if (testSessionEndedTimestamp == null)
{
EqtTrace.Verbose($"There was 1 {nameof(_crashDumpTimestamps)} and the test session did not end, keeping all crashdumps.");
// When test session did not end and we have 1 crash dump it was a crash for sure.
crashDumpDetected = true;
}
else
{
EqtTrace.Verbose($"There was 1 {nameof(_crashDumpTimestamps)} and the test session did end. Comparing timestamps to see if we should keep the crashdump.");
// When session ended, and we created a dump within 100ms around it, it was most likely an
// on process exit dump, and we don't need to keep that, unless the user overrides it (e.g. by specifying CollectAlways).
var crashDumpTimestamp = _crashDumpTimestamps.Last();
var timestampDiff = Math.Abs((crashDumpTimestamp - (DateTime)testSessionEndedTimestamp!).TotalMilliseconds);
var isCrashDump = timestampDiff > 100;
EqtTrace.Verbose($"{nameof(_crashDumpTimestamps)}: {crashDumpTimestamp.ToString("o", CultureInfo.InvariantCulture)}, {nameof(testSessionEndedTimestamp)}: {((DateTime)testSessionEndedTimestamp!).ToString("o", CultureInfo.InvariantCulture)}. The difference between the events is {timestampDiff} ms.");
crashDumpDetected = isCrashDump;
}

if (crashDumpDetected || processCrashed)
{
return allDumps;
}
Expand Down Expand Up @@ -241,7 +357,7 @@ public void Dump(int processId, string outputDirectory, DumpTypeOption dumpType)
procDumpArgs,
tempDirectory,
null,
null,
ErrorReceivedCallback,
null,
OutputReceivedCallback);

Expand Down
Expand Up @@ -46,7 +46,7 @@ public ProcessDumpUtility(IProcessHelper processHelper, IFileHelper fileHelper,
EqtTrace.Info($"ProcessDumpUtility.OutputReceivedCallback: Output received from procdump process: {data ?? "<null>"}");

/// <inheritdoc/>
public IEnumerable<string> GetDumpFiles(bool warnOnNoDumpFiles, bool processCrashed)
public IEnumerable<string> GetDumpFiles(bool warnOnNoDumpFiles, bool processCrashed, DateTime? testSessionEndedTimestamp)
{
if (!_wasHangDumped)
{
Expand All @@ -56,7 +56,7 @@ public IEnumerable<string> GetDumpFiles(bool warnOnNoDumpFiles, bool processCras
// If the process was hang dumped we killed it ourselves, so it crashed when executing tests,
// but we already have the hang dump, and should not also collect the exit dump that we got
// from killing the process by the hang dumper.
IEnumerable<string> crashDumps = _crashDumper?.GetDumpFiles(processCrashed) ?? new List<string>();
IEnumerable<string> crashDumps = _crashDumper?.GetDumpFiles(processCrashed, testSessionEndedTimestamp) ?? new List<string>();

IEnumerable<string> hangDumps = _fileHelper.DirectoryExists(_hangDumpDirectory)
? _fileHelper.GetFiles(_hangDumpDirectory, "*_hangdump*.dmp", SearchOption.TopDirectoryOnly)
Expand Down