diff --git a/src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs b/src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs index 7a1a56cd77..3fb9a0050f 100644 --- a/src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs +++ b/src/Agent.Sdk/Util/ArgUtil/IArgUtilInstanced.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Text; namespace Microsoft.VisualStudio.Services.Agent.Util { diff --git a/src/Agent.Worker/DiagnosticLogManager.cs b/src/Agent.Worker/DiagnosticLogManager.cs index 055c237bca..feb88bdafe 100644 --- a/src/Agent.Worker/DiagnosticLogManager.cs +++ b/src/Agent.Worker/DiagnosticLogManager.cs @@ -39,6 +39,8 @@ Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, // support.zip public sealed class DiagnosticLogManager : AgentService, IDiagnosticLogManager { + private const int DefaultTimeoutIncreaseInMilliseconds = 10000; + public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, Pipelines.AgentJobRequestMessage message, DateTime jobStartTimeUtc) @@ -72,7 +74,7 @@ public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, executionContext.Debug("Creating diagnostic log environment file."); string environmentFile = Path.Combine(supportFilesFolder, "environment.txt"); - string content = await GetEnvironmentContent(agentId, agentName, message.Steps); + string content = await GetEnvironmentContent(executionContext, agentId, agentName, message.Steps); File.WriteAllText(environmentFile, content); // Create the capabilities file @@ -145,7 +147,7 @@ public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, executionContext.Debug("Dumping cloud-init logs."); string logsFilePath = $"{HostContext.GetDiagDirectory()}/cloudinit-{jobStartTimeUtc.ToString("yyyyMMdd-HHmmss")}-logs.tar.gz"; - string resultLogs = await DumpCloudInitLogs(logsFilePath); + string resultLogs = await DumpCloudInitLogs(executionContext, logsFilePath); executionContext.Debug(resultLogs); if (File.Exists(logsFilePath)) @@ -197,7 +199,7 @@ public async Task UploadDiagnosticLogsAsync(IExecutionContext executionContext, { try { - string packageVerificationResults = await GetPackageVerificationResult(debsums); + string packageVerificationResults = await GetPackageVerificationResult(executionContext, debsums); IEnumerable brokenPackagesInfo = packageVerificationResults .Split("\n") .Where((line) => !String.IsNullOrEmpty(line) && !line.EndsWith("OK")); @@ -336,7 +338,7 @@ private bool DumpAgentExtensionLogs(IExecutionContext executionContext, string s /// /// Path to collect cloud-init logs /// Returns the method execution logs - private async Task DumpCloudInitLogs(string logsFile) + private async Task DumpCloudInitLogs(IExecutionContext executionContext, string logsFile) { var builder = new StringBuilder(); string cloudInit = WhichUtil.Which("cloud-init", trace: Trace); @@ -349,28 +351,43 @@ private async Task DumpCloudInitLogs(string logsFile) try { - using (var processInvoker = HostContext.CreateService()) + using var processInvoker = HostContext.CreateService(); + processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs args) => { - processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs args) => - { - builder.AppendLine(args.Data); - }; + builder.AppendLine(args.Data); + }; + processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + { + builder.AppendLine(args.Data); + }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => { - builder.AppendLine(args.Data); - }; + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(20)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: cloudInit, + arguments: arguments, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, + (exception) => + { + if (exception is OperationCanceledException) + { + executionContext.Debug("DumpCloudInitLogs process failed by timeout. Retrying..."); + } - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: cloudInit, - arguments: arguments, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken)); - } + return true; + }); } catch (Exception ex) { @@ -478,17 +495,17 @@ private List GetAgentDiagLogFiles(string diagFolder, DateTime jobStartTi return agentLogFiles; } - private async Task GetEnvironmentContent(int agentId, string agentName, IList steps) + private async Task GetEnvironmentContent(IExecutionContext executionContext, int agentId, string agentName, IList steps) { if (PlatformUtil.RunningOnWindows) { - return await GetEnvironmentContentWindows(agentId, agentName, steps); + return await GetEnvironmentContentWindows(executionContext, agentId, agentName, steps); } - return await GetEnvironmentContentNonWindows(agentId, agentName, steps); + return await GetEnvironmentContentNonWindows(executionContext, agentId, agentName, steps); } - + [SupportedOSPlatform("windows")] - private async Task GetEnvironmentContentWindows(int agentId, string agentName, IList steps) + private async Task GetEnvironmentContentWindows(IExecutionContext executionContext, int agentId, string agentName, IList steps) { var builder = new StringBuilder(); @@ -512,9 +529,9 @@ private async Task GetEnvironmentContentWindows(int agentId, string agen // $psversiontable builder.AppendLine("Powershell Version Info:"); - builder.AppendLine(await GetPsVersionInfo()); + builder.AppendLine(await GetPsVersionInfo(executionContext)); - builder.AppendLine(await GetLocalGroupMembership()); + builder.AppendLine(await GetLocalGroupMembership(executionContext)); return builder.ToString(); } @@ -550,7 +567,7 @@ private bool IsFirewallEnabled() } [SupportedOSPlatform("windows")] - private async Task GetPsVersionInfo() + private async Task GetPsVersionInfo(IExecutionContext executionContext) { var builder = new StringBuilder(); @@ -562,21 +579,38 @@ private async Task GetPsVersionInfo() { builder.AppendLine(args.Data); }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => { builder.AppendLine(args.Data); }; - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: powerShellExe, - arguments: arguments, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken)); + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => + { + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(10)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: powerShellExe, + arguments: arguments, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, + (exception) => + { + if (exception is OperationCanceledException) + { + executionContext.Debug("GetPsVersionInfo process failed by timeout. Retrying..."); + } + + return true; + }); } return builder.ToString(); @@ -586,7 +620,7 @@ await processInvoker.ExecuteAsync( /// Gathers a list of local group memberships for the current user. /// [SupportedOSPlatform("windows")] - private async Task GetLocalGroupMembership() + private async Task GetLocalGroupMembership(IExecutionContext executionContext) { var builder = new StringBuilder(); @@ -598,28 +632,43 @@ private async Task GetLocalGroupMembership() try { - using (var processInvoker = HostContext.CreateService()) + using var processInvoker = HostContext.CreateService(); + processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs args) => { - processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs args) => - { - builder.AppendLine(args.Data); - }; + builder.AppendLine(args.Data); + }; + processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + { + builder.AppendLine(args.Data); + }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs args) => + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => + { + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(45)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: powerShellExe, + arguments: arguments, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, + (exception) => { - builder.AppendLine(args.Data); - }; + if (exception is OperationCanceledException) + { + executionContext.Debug("GetLocalGroupMembership process failed by timeout. Retrying..."); + } - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: powerShellExe, - arguments: arguments, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken)); - } + return true; + }); } catch (Exception ex) { @@ -629,7 +678,7 @@ await processInvoker.ExecuteAsync( return builder.ToString(); } - private async Task GetEnvironmentContentNonWindows(int agentId, string agentName, IList steps) + private async Task GetEnvironmentContentNonWindows(IExecutionContext executionContext, int agentId, string agentName, IList steps) { var builder = new StringBuilder(); @@ -638,7 +687,7 @@ private async Task GetEnvironmentContentNonWindows(int agentId, string a builder.AppendLine($"Agent Id: {agentId}"); builder.AppendLine($"Agent Name: {agentName}"); builder.AppendLine($"OS: {System.Runtime.InteropServices.RuntimeInformation.OSDescription}"); - builder.AppendLine($"User groups: {await GetUserGroupsOnNonWindows()}"); + builder.AppendLine($"User groups: {await GetUserGroupsOnNonWindows(executionContext)}"); builder.AppendLine("Steps:"); foreach (Pipelines.TaskStep task in steps.OfType()) @@ -653,34 +702,49 @@ private async Task GetEnvironmentContentNonWindows(int agentId, string a /// Get user groups on a non-windows platform using core utility "id". /// /// Returns the string with user groups - private async Task GetUserGroupsOnNonWindows() + private async Task GetUserGroupsOnNonWindows(IExecutionContext executionContext) { var idUtil = WhichUtil.Which("id"); var stringBuilder = new StringBuilder(); try { - using (var processInvoker = HostContext.CreateService()) + using var processInvoker = HostContext.CreateService(); + processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + { + stringBuilder.AppendLine(mes.Data); + }; + processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => { - processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + stringBuilder.AppendLine(mes.Data); + }; + + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => { - stringBuilder.AppendLine(mes.Data); - }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(45)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: idUtil, + arguments: "-nG", + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, + (exception) => { - stringBuilder.AppendLine(mes.Data); - }; + if (exception is OperationCanceledException) + { + executionContext.Debug("GetUserGroupsOnNonWindows process failed by timeout. Retrying..."); + } - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: idUtil, - arguments: "-nG", - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken) - ); - } + return true; + }); } catch (Exception ex) { @@ -703,49 +767,80 @@ private async Task DumpCurrentJobEventLogs(IExecutionContext executionContext, s Get-WinEvent -ListLog * | where {{ $_.RecordCount -gt 0 }} ` | ForEach-Object {{ Get-WinEvent -ErrorAction SilentlyContinue -FilterHashtable @{{ LogName=$_.LogName; StartTime='{startDate}'; EndTime='{endDate}'; }} }} ` | Export-CSV {logFile}"; - using (var processInvoker = HostContext.CreateService()) - { - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: powerShellExe, - arguments: arguments, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken)); - } + using var processInvoker = HostContext.CreateService(); + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => + { + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(45)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: powerShellExe, + arguments: arguments, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, + (exception) => + { + if (exception is OperationCanceledException) + { + executionContext.Debug("DumpCurrentJobEventLogs process failed by timeout. Retrying..."); + } + + return true; + }); } /// /// Git package verification result using the "debsums" utility. /// /// String with the "debsums" output - private async Task GetPackageVerificationResult(string debsumsPath) + private async Task GetPackageVerificationResult(IExecutionContext executionContext, string debsumsPath) { var stringBuilder = new StringBuilder(); - using (var processInvoker = HostContext.CreateService()) + using var processInvoker = HostContext.CreateService(); + processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => { - processInvoker.OutputDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + stringBuilder.AppendLine(mes.Data); + }; + processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + { + stringBuilder.AppendLine(mes.Data); + }; + + var retryHelper = new RetryHelper(executionContext, maxRetries: 3); + await retryHelper.Retry( + async () => { - stringBuilder.AppendLine(mes.Data); - }; - processInvoker.ErrorDataReceived += (object sender, ProcessDataReceivedEventArgs mes) => + using var cts = new CancellationTokenSource(); + cts.CancelAfter(TimeSpan.FromSeconds(20)); + + return await processInvoker.ExecuteAsync( + workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), + fileName: debsumsPath, + arguments: string.Empty, + environment: null, + requireExitCodeZero: false, + outputEncoding: null, + killProcessOnCancel: false, + cancellationToken: cts.Token); + }, + (retryCounter) => RetryHelper.ExponentialDelay(retryCounter) + DefaultTimeoutIncreaseInMilliseconds, + (exception) => { - stringBuilder.AppendLine(mes.Data); - }; + if (exception is OperationCanceledException) + { + executionContext.Debug("GetPackageVerificationResult process failed by timeout. Retrying..."); + } - await processInvoker.ExecuteAsync( - workingDirectory: HostContext.GetDirectory(WellKnownDirectory.Bin), - fileName: debsumsPath, - arguments: string.Empty, - environment: null, - requireExitCodeZero: false, - outputEncoding: null, - killProcessOnCancel: false, - cancellationToken: default(CancellationToken) - ); - } + return true; + }); return stringBuilder.ToString(); } diff --git a/src/Agent.Worker/JobRunner.cs b/src/Agent.Worker/JobRunner.cs index c893d56fd3..5da2d4830a 100644 --- a/src/Agent.Worker/JobRunner.cs +++ b/src/Agent.Worker/JobRunner.cs @@ -111,12 +111,10 @@ public async Task RunAsync(Pipelines.AgentJobRequestMessage message, jobContext.Start(); jobContext.Section(StringUtil.Loc("StepStarting", message.JobDisplayName)); - //Start Resource Diagnostics if enabled in the job message - jobContext.Variables.TryGetValue("system.debug", out var systemDebug); - resourceDiagnosticManager = HostContext.GetService(); - - if (string.Equals(systemDebug, "true", StringComparison.OrdinalIgnoreCase)) + //Start Resource Diagnostics if enabled in the job message + if (jobContext.WriteDebug) { + resourceDiagnosticManager = HostContext.GetService(); resourceDiagnosticManager.Setup(jobContext); _ = resourceDiagnosticManager.RunDebugResourceMonitor(); } diff --git a/src/Agent.Worker/RetryHelper.cs b/src/Agent.Worker/RetryHelper.cs index 9e3aabe1b0..bfc2707098 100644 --- a/src/Agent.Worker/RetryHelper.cs +++ b/src/Agent.Worker/RetryHelper.cs @@ -1,4 +1,5 @@ using System; +using System.Reflection; using System.Threading.Tasks; using Microsoft.TeamFoundation.DistributedTask.WebApi; @@ -17,7 +18,6 @@ public static int ExponentialDelay(int retryNumber) return (int)(Math.Pow(retryNumber + 1, 2) * 1000); } - public RetryHelper(IExecutionContext executionContext, int maxRetries = 3) { Debug = (str) => executionContext.Debug(str); @@ -47,19 +47,18 @@ public async Task Retry(Func> action, Func timeDelayInte } catch (Exception ex) { - if (!shouldRetryOnException(ex) || ExhaustedRetryCount(retryCounter)) + if (!shouldRetryOnException(ex) || ExhaustedRetryCount(retryCounter, action.Method)) { throw; } - Warning($"Intermittent failure attempting to call the restapis {action.Method}. Retry attempt {retryCounter}. Exception: {ex.Message} "); + Warning($"Intermittent failure attempting to call the method {action.Method}. Retry attempt {retryCounter}. Exception: {ex.Message} "); var delay = timeDelayInterval(retryCounter); await Task.Delay(delay); } retryCounter++; } } while (true); - } /// @@ -87,7 +86,7 @@ public async Task RetryStep(Func action, Func timeDelayInterval) Debug($"Invoking Method: {action.Method}. Attempt count: {retryCounter}"); await action(); - if (ExecutionContext.Result != TaskResult.Failed || ExhaustedRetryCount(retryCounter)) + if (ExecutionContext.Result != TaskResult.Failed || ExhaustedRetryCount(retryCounter, action.Method)) { return; } @@ -100,7 +99,7 @@ public async Task RetryStep(Func action, Func timeDelayInterval) } catch (Exception ex) { - if (!ShouldRetryStepOnException(ex) || ExhaustedRetryCount(retryCounter)) + if (!ShouldRetryStepOnException(ex) || ExhaustedRetryCount(retryCounter, action.Method)) { throw; } @@ -115,11 +114,11 @@ public async Task RetryStep(Func action, Func timeDelayInterval) } while (true); } - private bool ExhaustedRetryCount(int retryCount) + private bool ExhaustedRetryCount(int retryCount, MethodInfo methodInfo) { if (retryCount >= MaxRetries) { - Debug($"Failure attempting to call the restapi and retry counter is exhausted"); + Debug($"Failure attempting to call the {methodInfo} and retry counter is exhausted"); return true; } return false;