使用 CloudWatch 日志运行大型查询 - Amazon CloudWatch 日志
Amazon Web Services 文档中描述的 Amazon Web Services 服务或功能可能因区域而异。要查看适用于中国区域的差异,请参阅 中国的 Amazon Web Services 服务入门 (PDF)

本文属于机器翻译版本。若本译文内容与英语原文存在差异,则一律以英文原文为准。

使用 CloudWatch 日志运行大型查询

以下代码示例展示了如何使用 CloudWatch 日志查询超过 10,000 条记录。

.NET
适用于 .NET 的 Amazon SDK (v4)
注意

还有更多相关信息 GitHub。在 Amazon 代码示例存储库中查找完整示例,了解如何进行设置和运行。

这是演示大型查询场景的主要工作流程。

using System.Diagnostics; using System.Text.RegularExpressions; using Amazon.CloudFormation; using Amazon.CloudFormation.Model; using Amazon.CloudWatchLogs; using Amazon.CloudWatchLogs.Model; using CloudWatchLogsActions; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Hosting; using Microsoft.Extensions.Logging; namespace CloudWatchLogsScenario; public class LargeQueryWorkflow { /* Before running this .NET code example, set up your development environment, including your credentials. This .NET code example performs the following tasks for the CloudWatch Logs Large Query workflow: 1. Prepare the Application: - Prompt the user to deploy CloudFormation stack and generate sample logs. - Deploy the CloudFormation template for resource creation. - Generate 50,000 sample log entries using CloudWatch Logs API. - Wait 5 minutes for logs to be fully ingested. 2. Execute Large Query: - Perform recursive queries to retrieve all logs using binary search. - Display progress for each query executed. - Show total execution time and logs found. 3. Clean up: - Prompt the user to delete the CloudFormation stack and all resources. - Destroy the CloudFormation stack and wait until removed. */ public static ILogger<LargeQueryWorkflow> _logger = null!; public static CloudWatchLogsWrapper _wrapper = null!; public static IAmazonCloudFormation _amazonCloudFormation = null!; private static string _logGroupName = "/workflows/cloudwatch-logs/large-query"; private static string _logStreamName = "stream1"; private static long _queryStartDate; private static long _queryEndDate; public static bool _interactive = true; public static string _stackName = "CloudWatchLargeQueryStack"; private static string _stackResourcePath = "../../../../../../../scenarios/features/cloudwatch_logs_large_query/resources/stack.yaml"; public static async Task Main(string[] args) { using var host = Host.CreateDefaultBuilder(args) .ConfigureLogging(logging => logging.AddFilter("System", LogLevel.Debug) .AddFilter("Microsoft", LogLevel.Information)) .ConfigureServices((_, services) => services.AddAWSService<IAmazonCloudWatchLogs>() .AddAWSService<IAmazonCloudFormation>() .AddTransient<CloudWatchLogsWrapper>() ) .Build(); if (_interactive) { _logger = LoggerFactory.Create(builder => { builder.AddConsole(); }) .CreateLogger<LargeQueryWorkflow>(); _wrapper = host.Services.GetRequiredService<CloudWatchLogsWrapper>(); _amazonCloudFormation = host.Services.GetRequiredService<IAmazonCloudFormation>(); } Console.WriteLine(new string('-', 80)); Console.WriteLine("Welcome to the CloudWatch Logs Large Query Scenario."); Console.WriteLine(new string('-', 80)); Console.WriteLine("This scenario demonstrates how to perform large-scale queries on"); Console.WriteLine("CloudWatch Logs using recursive binary search to retrieve more than"); Console.WriteLine("the 10,000 result limit."); Console.WriteLine(); try { Console.WriteLine(new string('-', 80)); var prepareSuccess = await PrepareApplication(); Console.WriteLine(new string('-', 80)); if (prepareSuccess) { Console.WriteLine(new string('-', 80)); await ExecuteLargeQuery(); Console.WriteLine(new string('-', 80)); } Console.WriteLine(new string('-', 80)); await Cleanup(); Console.WriteLine(new string('-', 80)); } catch (Exception ex) { _logger.LogError(ex, "There was a problem with the scenario, initiating cleanup..."); _interactive = false; await Cleanup(); } Console.WriteLine("CloudWatch Logs Large Query scenario completed."); } /// <summary> /// Runs the scenario workflow. Used for testing. /// </summary> public static async Task RunScenario() { Console.WriteLine(new string('-', 80)); Console.WriteLine("Welcome to the CloudWatch Logs Large Query Scenario."); Console.WriteLine(new string('-', 80)); Console.WriteLine("This scenario demonstrates how to perform large-scale queries on"); Console.WriteLine("CloudWatch Logs using recursive binary search to retrieve more than"); Console.WriteLine("the 10,000 result limit."); Console.WriteLine(); try { Console.WriteLine(new string('-', 80)); var prepareSuccess = await PrepareApplication(); Console.WriteLine(new string('-', 80)); if (prepareSuccess) { Console.WriteLine(new string('-', 80)); await ExecuteLargeQuery(); Console.WriteLine(new string('-', 80)); } Console.WriteLine(new string('-', 80)); await Cleanup(); Console.WriteLine(new string('-', 80)); } catch (Exception ex) { _logger.LogError(ex, "There was a problem with the scenario, initiating cleanup..."); _interactive = false; await Cleanup(); } Console.WriteLine("CloudWatch Logs Large Query scenario completed."); } /// <summary> /// Prepares the application by creating the necessary resources. /// </summary> /// <returns>True if the application was prepared successfully.</returns> public static async Task<bool> PrepareApplication() { Console.WriteLine("Preparing the application..."); Console.WriteLine(); try { var deployStack = !_interactive || GetYesNoResponse( "Would you like to deploy the CloudFormation stack and generate sample logs? (y/n) "); if (deployStack) { if (_interactive) { Console.Write( $"Enter a path for the CloudFormation stack resource .yaml file (or press Enter for default '{_stackResourcePath}'): "); string? inputPath = Console.ReadLine(); if (!string.IsNullOrWhiteSpace(inputPath)) { _stackResourcePath = inputPath; } } _stackName = PromptUserForStackName(); var deploySuccess = await DeployCloudFormationStack(_stackName); if (deploySuccess) { Console.WriteLine(); Console.WriteLine("Generating 50,000 sample log entries..."); var generateSuccess = await GenerateSampleLogs(); if (generateSuccess) { Console.WriteLine(); Console.WriteLine("Sample logs created. Waiting 5 minutes for logs to be fully ingested..."); await WaitWithCountdown(300); Console.WriteLine("Application preparation complete."); return true; } } } else { _logGroupName = PromptUserForInput("Enter the log group name ", _logGroupName); _logStreamName = PromptUserForInput("Enter the log stream name ", _logStreamName); var startDateMs = PromptUserForLong("Enter the query start date (milliseconds since epoch): "); var endDateMs = PromptUserForLong("Enter the query end date (milliseconds since epoch): "); _queryStartDate = startDateMs / 1000; _queryEndDate = endDateMs / 1000; Console.WriteLine("Application preparation complete."); return true; } } catch (Exception ex) { _logger.LogError(ex, "An error occurred while preparing the application."); } Console.WriteLine("Application preparation failed."); return false; } /// <summary> /// Deploys the CloudFormation stack with the necessary resources. /// </summary> /// <param name="stackName">The name of the CloudFormation stack.</param> /// <returns>True if the stack was deployed successfully.</returns> private static async Task<bool> DeployCloudFormationStack(string stackName) { Console.WriteLine($"\nDeploying CloudFormation stack: {stackName}"); try { var request = new CreateStackRequest { StackName = stackName, TemplateBody = await File.ReadAllTextAsync(_stackResourcePath) }; var response = await _amazonCloudFormation.CreateStackAsync(request); if (response.HttpStatusCode == System.Net.HttpStatusCode.OK) { Console.WriteLine($"CloudFormation stack creation started: {stackName}"); bool stackCreated = await WaitForStackCompletion(response.StackId); if (stackCreated) { Console.WriteLine("CloudFormation stack created successfully."); return true; } else { _logger.LogError($"CloudFormation stack creation failed: {stackName}"); return false; } } else { _logger.LogError($"Failed to create CloudFormation stack: {stackName}"); return false; } } catch (AlreadyExistsException) { _logger.LogWarning($"CloudFormation stack '{stackName}' already exists. Please provide a unique name."); var newStackName = PromptUserForStackName(); return await DeployCloudFormationStack(newStackName); } catch (Exception ex) { _logger.LogError(ex, $"An error occurred while deploying the CloudFormation stack: {stackName}"); return false; } } /// <summary> /// Waits for the CloudFormation stack to be in the CREATE_COMPLETE state. /// </summary> /// <param name="stackId">The ID of the CloudFormation stack.</param> /// <returns>True if the stack was created successfully.</returns> private static async Task<bool> WaitForStackCompletion(string stackId) { int retryCount = 0; const int maxRetries = 30; const int retryDelay = 10000; while (retryCount < maxRetries) { var describeStacksRequest = new DescribeStacksRequest { StackName = stackId }; var describeStacksResponse = await _amazonCloudFormation.DescribeStacksAsync(describeStacksRequest); if (describeStacksResponse.Stacks.Count > 0) { if (describeStacksResponse.Stacks[0].StackStatus == StackStatus.CREATE_COMPLETE) { return true; } if (describeStacksResponse.Stacks[0].StackStatus == StackStatus.CREATE_FAILED || describeStacksResponse.Stacks[0].StackStatus == StackStatus.ROLLBACK_COMPLETE) { return false; } } Console.WriteLine("Waiting for CloudFormation stack creation to complete..."); await Task.Delay(retryDelay); retryCount++; } _logger.LogError("Timed out waiting for CloudFormation stack creation to complete."); return false; } /// <summary> /// Generates sample logs directly using CloudWatch Logs API. /// Creates 50,000 log entries spanning 5 minutes. /// </summary> /// <returns>True if logs were generated successfully.</returns> private static async Task<bool> GenerateSampleLogs() { const int totalEntries = 50000; const int entriesPerBatch = 10000; const int fiveMinutesMs = 5 * 60 * 1000; try { // Calculate timestamps var startTimeMs = DateTimeOffset.UtcNow.ToUnixTimeMilliseconds(); var timestampIncrement = fiveMinutesMs / totalEntries; Console.WriteLine($"Generating {totalEntries} log entries..."); var entryCount = 0; var currentTimestamp = startTimeMs; var numBatches = totalEntries / entriesPerBatch; // Generate and upload logs in batches for (int batchNum = 0; batchNum < numBatches; batchNum++) { var logEvents = new List<InputLogEvent>(); for (int i = 0; i < entriesPerBatch; i++) { logEvents.Add(new InputLogEvent { Timestamp = DateTimeOffset.FromUnixTimeMilliseconds(currentTimestamp).UtcDateTime, Message = $"Entry {entryCount}" }); entryCount++; currentTimestamp += timestampIncrement; } // Upload batch var success = await _wrapper.PutLogEventsAsync(_logGroupName, _logStreamName, logEvents); if (!success) { _logger.LogError($"Failed to upload batch {batchNum + 1}/{numBatches}"); return false; } Console.WriteLine($"Uploaded batch {batchNum + 1}/{numBatches}"); } // Set query date range (convert milliseconds to seconds for query API) _queryStartDate = startTimeMs / 1000; _queryEndDate = (currentTimestamp - timestampIncrement) / 1000; Console.WriteLine($"Query start date: {DateTimeOffset.FromUnixTimeSeconds(_queryStartDate):yyyy-MM-ddTHH:mm:ss.fffZ}"); Console.WriteLine($"Query end date: {DateTimeOffset.FromUnixTimeSeconds(_queryEndDate):yyyy-MM-ddTHH:mm:ss.fffZ}"); Console.WriteLine($"Successfully uploaded {totalEntries} log entries"); return true; } catch (Exception ex) { _logger.LogError(ex, "An error occurred while generating sample logs."); return false; } } /// <summary> /// Executes the large query workflow. /// </summary> public static async Task ExecuteLargeQuery() { Console.WriteLine("Starting recursive query to retrieve all logs..."); Console.WriteLine(); var queryLimit = PromptUserForInteger("Enter the query limit (max 10000) ", 10000); if (queryLimit > 10000) queryLimit = 10000; var queryString = "fields @timestamp, @message | sort @timestamp asc"; var stopwatch = Stopwatch.StartNew(); var allResults = await PerformLargeQuery(_logGroupName, queryString, _queryStartDate, _queryEndDate, queryLimit); stopwatch.Stop(); Console.WriteLine(); Console.WriteLine($"Queries finished in {stopwatch.Elapsed.TotalSeconds:F3} seconds."); Console.WriteLine($"Total logs found: {allResults.Count}"); // Check for duplicates Console.WriteLine(); Console.WriteLine("Checking for duplicate logs..."); var duplicates = FindDuplicateLogs(allResults); if (duplicates.Count > 0) { Console.WriteLine($"WARNING: Found {duplicates.Count} duplicate log entries!"); Console.WriteLine("Duplicate entries (showing first 10):"); foreach (var dup in duplicates.Take(10)) { Console.WriteLine($" [{dup.Timestamp}] {dup.Message} (appears {dup.Count} times)"); } var uniqueCount = allResults.Count - duplicates.Sum(d => d.Count - 1); Console.WriteLine($"Unique logs: {uniqueCount}"); } else { Console.WriteLine("No duplicates found. All logs are unique."); } Console.WriteLine(); var viewSample = !_interactive || GetYesNoResponse("Would you like to see a sample of the logs? (y/n) "); if (viewSample) { Console.WriteLine(); Console.WriteLine($"Sample logs (first 10 of {allResults.Count}):"); for (int i = 0; i < Math.Min(10, allResults.Count); i++) { var timestamp = allResults[i].Find(f => f.Field == "@timestamp")?.Value ?? "N/A"; var message = allResults[i].Find(f => f.Field == "@message")?.Value ?? "N/A"; Console.WriteLine($"[{timestamp}] {message}"); } } } /// <summary> /// Performs a large query using recursive binary search. /// </summary> private static async Task<List<List<ResultField>>> PerformLargeQuery( string logGroupName, string queryString, long startTime, long endTime, int limit) { var queryId = await _wrapper.StartQueryAsync(logGroupName, queryString, startTime, endTime, limit); if (queryId == null) { return new List<List<ResultField>>(); } var results = await PollQueryResults(queryId); if (results == null || results.Count == 0) { return new List<List<ResultField>>(); } var startDate = DateTimeOffset.FromUnixTimeSeconds(startTime).ToString("yyyy-MM-ddTHH:mm:ss.fffZ"); var endDate = DateTimeOffset.FromUnixTimeSeconds(endTime).ToString("yyyy-MM-ddTHH:mm:ss.fffZ"); Console.WriteLine($"Query date range: {startDate} ({startTime}s) to {endDate} ({endTime}s). Found {results.Count} logs."); if (results.Count < limit) { Console.WriteLine($" -> Returning {results.Count} logs (less than limit of {limit})"); return results; } Console.WriteLine($" -> Hit limit of {limit}. Need to split and recurse."); // Get the timestamp of the last log (sorted to find the actual last one) var lastLogTimestamp = GetLastLogTimestamp(results); if (lastLogTimestamp == null) { Console.WriteLine($" -> No timestamp found in results. Returning {results.Count} logs."); return results; } Console.WriteLine($" -> Last log timestamp: {lastLogTimestamp}"); // Parse the timestamp and add 1 millisecond to avoid querying the same log again var lastLogDate = DateTimeOffset.Parse(lastLogTimestamp + " +0000"); Console.WriteLine($" -> Last log as DateTimeOffset: {lastLogDate:yyyy-MM-ddTHH:mm:ss.fffZ} ({lastLogDate.ToUnixTimeSeconds()}s)"); var offsetLastLogDate = lastLogDate.AddMilliseconds(1); Console.WriteLine($" -> Offset timestamp (last + 1ms): {offsetLastLogDate:yyyy-MM-ddTHH:mm:ss.fffZ} ({offsetLastLogDate.ToUnixTimeSeconds()}s)"); // Convert to seconds, but round UP to the next second to avoid overlapping with logs in the same second // This ensures we don't re-query logs that share the same second as the last log var offsetLastLogTime = offsetLastLogDate.ToUnixTimeSeconds(); if (offsetLastLogDate.Millisecond > 0) { offsetLastLogTime++; // Move to the next full second Console.WriteLine($" -> Adjusted to next full second: {offsetLastLogTime}s ({DateTimeOffset.FromUnixTimeSeconds(offsetLastLogTime):yyyy-MM-ddTHH:mm:ss.fffZ})"); } Console.WriteLine($" -> Comparing: offsetLastLogTime={offsetLastLogTime}s vs endTime={endTime}s"); Console.WriteLine($" -> End time as date: {DateTimeOffset.FromUnixTimeSeconds(endTime):yyyy-MM-ddTHH:mm:ss.fffZ}"); // Check if there's any time range left to query if (offsetLastLogTime >= endTime) { Console.WriteLine($" -> No time range left to query. Offset time ({offsetLastLogTime}s) >= end time ({endTime}s)"); return results; } // Split the remaining date range in half var (range1Start, range1End, range2Start, range2End) = SplitDateRange(offsetLastLogTime, endTime); var range1StartDate = DateTimeOffset.FromUnixTimeSeconds(range1Start).ToString("yyyy-MM-ddTHH:mm:ss.fffZ"); var range1EndDate = DateTimeOffset.FromUnixTimeSeconds(range1End).ToString("yyyy-MM-ddTHH:mm:ss.fffZ"); var range2StartDate = DateTimeOffset.FromUnixTimeSeconds(range2Start).ToString("yyyy-MM-ddTHH:mm:ss.fffZ"); var range2EndDate = DateTimeOffset.FromUnixTimeSeconds(range2End).ToString("yyyy-MM-ddTHH:mm:ss.fffZ"); Console.WriteLine($" -> Splitting remaining range:"); Console.WriteLine($" Range 1: {range1StartDate} ({range1Start}s) to {range1EndDate} ({range1End}s)"); Console.WriteLine($" Range 2: {range2StartDate} ({range2Start}s) to {range2EndDate} ({range2End}s)"); // Query both halves recursively Console.WriteLine($" -> Querying range 1..."); var results1 = await PerformLargeQuery(logGroupName, queryString, range1Start, range1End, limit); Console.WriteLine($" -> Range 1 returned {results1.Count} logs"); Console.WriteLine($" -> Querying range 2..."); var results2 = await PerformLargeQuery(logGroupName, queryString, range2Start, range2End, limit); Console.WriteLine($" -> Range 2 returned {results2.Count} logs"); // Combine all results var allResults = new List<List<ResultField>>(results); allResults.AddRange(results1); allResults.AddRange(results2); Console.WriteLine($" -> Combined total: {allResults.Count} logs ({results.Count} + {results1.Count} + {results2.Count})"); return allResults; } /// <summary> /// Gets the timestamp string of the most recent log from a list of logs. /// Sorts timestamps to find the actual last one. /// </summary> private static string? GetLastLogTimestamp(List<List<ResultField>> logs) { var timestamps = logs .Select(log => log.Find(f => f.Field == "@timestamp")?.Value) .Where(t => !string.IsNullOrEmpty(t)) .OrderBy(t => t) .ToList(); if (timestamps.Count == 0) { return null; } return timestamps[timestamps.Count - 1]; } /// <summary> /// Splits a date range in half. /// Range 2 starts at midpoint + 1 second to avoid overlap. /// </summary> private static (long range1Start, long range1End, long range2Start, long range2End) SplitDateRange(long startTime, long endTime) { var midpoint = startTime + (endTime - startTime) / 2; // Range 2 starts at midpoint + 1 to avoid querying the same second twice return (startTime, midpoint, midpoint + 1, endTime); } /// <summary> /// Polls for query results until complete. /// </summary> private static async Task<List<List<ResultField>>?> PollQueryResults(string queryId) { int retryCount = 0; const int maxRetries = 60; const int retryDelay = 1000; while (retryCount < maxRetries) { var response = await _wrapper.GetQueryResultsAsync(queryId); if (response == null) { return null; } if (response.Status == QueryStatus.Complete) { return response.Results; } if (response.Status == QueryStatus.Failed || response.Status == QueryStatus.Cancelled || response.Status == QueryStatus.Timeout || response.Status == QueryStatus.Unknown) { _logger.LogError($"Query failed with status: {response.Status}"); return null; } await Task.Delay(retryDelay); retryCount++; } _logger.LogError("Timed out waiting for query results."); return null; } /// <summary> /// Cleans up the resources created during the scenario. /// </summary> public static async Task<bool> Cleanup() { var cleanup = !_interactive || GetYesNoResponse( "Do you want to delete the CloudFormation stack and all resources? (y/n) "); if (cleanup) { try { var stackDeleteSuccess = await DeleteCloudFormationStack(_stackName, false); return stackDeleteSuccess; } catch (Exception ex) { _logger.LogError(ex, "An error occurred while cleaning up the resources."); return false; } } Console.WriteLine($"Resources will remain. Stack name: {_stackName}, Log group: {_logGroupName}"); _logger.LogInformation("CloudWatch Logs Large Query scenario is complete."); return true; } /// <summary> /// Deletes the CloudFormation stack and waits for confirmation. /// </summary> private static async Task<bool> DeleteCloudFormationStack(string stackName, bool forceDelete) { var request = new DeleteStackRequest { StackName = stackName, }; if (forceDelete) { request.DeletionMode = DeletionMode.FORCE_DELETE_STACK; } await _amazonCloudFormation.DeleteStackAsync(request); Console.WriteLine($"CloudFormation stack '{stackName}' is being deleted. This may take a few minutes."); bool stackDeleted = await WaitForStackDeletion(stackName, forceDelete); if (stackDeleted) { Console.WriteLine($"CloudFormation stack '{stackName}' has been deleted."); return true; } else { _logger.LogError($"Failed to delete CloudFormation stack '{stackName}'."); return false; } } /// <summary> /// Waits for the stack to be deleted. /// </summary> private static async Task<bool> WaitForStackDeletion(string stackName, bool forceDelete) { int retryCount = 0; const int maxRetries = 30; const int retryDelay = 10000; while (retryCount < maxRetries) { var describeStacksRequest = new DescribeStacksRequest { StackName = stackName }; try { var describeStacksResponse = await _amazonCloudFormation.DescribeStacksAsync(describeStacksRequest); if (describeStacksResponse.Stacks.Count == 0 || describeStacksResponse.Stacks[0].StackStatus == StackStatus.DELETE_COMPLETE) { return true; } if (!forceDelete && describeStacksResponse.Stacks[0].StackStatus == StackStatus.DELETE_FAILED) { return await DeleteCloudFormationStack(stackName, true); } } catch (AmazonCloudFormationException ex) when (ex.ErrorCode == "ValidationError") { return true; } Console.WriteLine($"Waiting for CloudFormation stack '{stackName}' to be deleted..."); await Task.Delay(retryDelay); retryCount++; } _logger.LogError($"Timed out waiting for CloudFormation stack '{stackName}' to be deleted."); return false; } /// <summary> /// Waits with a countdown display. /// </summary> private static async Task WaitWithCountdown(int seconds) { for (int i = seconds; i > 0; i--) { Console.Write($"\rWaiting: {i} seconds remaining... "); await Task.Delay(1000); } Console.WriteLine("\rWait complete. "); } /// <summary> /// Helper method to get a yes or no response from the user. /// </summary> private static bool GetYesNoResponse(string question) { Console.WriteLine(question); var ynResponse = Console.ReadLine(); var response = ynResponse != null && ynResponse.Equals("y", StringComparison.InvariantCultureIgnoreCase); return response; } /// <summary> /// Prompts the user for a stack name. /// </summary> private static string PromptUserForStackName() { if (_interactive) { Console.Write($"Enter a name for the CloudFormation stack (press Enter for default '{_stackName}'): "); string? input = Console.ReadLine(); if (!string.IsNullOrWhiteSpace(input)) { var regex = "[a-zA-Z][-a-zA-Z0-9]*"; if (!Regex.IsMatch(input, regex)) { Console.WriteLine($"Invalid stack name. Using default: {_stackName}"); return _stackName; } return input; } } return _stackName; } /// <summary> /// Prompts the user for input with a default value. /// </summary> private static string PromptUserForInput(string prompt, string defaultValue) { if (_interactive) { Console.Write($"{prompt}(press Enter for default '{defaultValue}'): "); string? input = Console.ReadLine(); return string.IsNullOrWhiteSpace(input) ? defaultValue : input; } return defaultValue; } /// <summary> /// Prompts the user for an integer value. /// </summary> private static int PromptUserForInteger(string prompt, int defaultValue) { if (_interactive) { Console.Write($"{prompt}(press Enter for default '{defaultValue}'): "); string? input = Console.ReadLine(); if (string.IsNullOrWhiteSpace(input) || !int.TryParse(input, out var result)) { return defaultValue; } return result; } return defaultValue; } /// <summary> /// Prompts the user for a long value. /// </summary> private static long PromptUserForLong(string prompt) { if (_interactive) { Console.Write(prompt); string? input = Console.ReadLine(); if (long.TryParse(input, out var result)) { return result; } } return 0; } /// <summary> /// Finds duplicate log entries based on timestamp and message. /// </summary> private static List<(string Timestamp, string Message, int Count)> FindDuplicateLogs(List<List<ResultField>> logs) { var logSignatures = new Dictionary<string, int>(); foreach (var log in logs) { var timestamp = log.Find(f => f.Field == "@timestamp")?.Value ?? ""; var message = log.Find(f => f.Field == "@message")?.Value ?? ""; var signature = $"{timestamp}|{message}"; if (logSignatures.ContainsKey(signature)) { logSignatures[signature]++; } else { logSignatures[signature] = 1; } } return logSignatures .Where(kvp => kvp.Value > 1) .Select(kvp => { var parts = kvp.Key.Split('|'); return (Timestamp: parts[0], Message: parts[1], Count: kvp.Value); }) .OrderByDescending(x => x.Count) .ToList(); } }
  • 有关 API 详细信息,请参阅《适用于 .NET 的 Amazon SDK API Reference》中的以下主题。

JavaScript
适用于 JavaScript (v3) 的软件开发工具包
注意

还有更多相关信息 GitHub。在 Amazon 代码示例存储库中查找完整示例,了解如何进行设置和运行。

这是入口点。

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 import { CloudWatchLogsClient } from "@aws-sdk/client-cloudwatch-logs"; import { CloudWatchQuery } from "./cloud-watch-query.js"; console.log("Starting a recursive query..."); if (!process.env.QUERY_START_DATE || !process.env.QUERY_END_DATE) { throw new Error( "QUERY_START_DATE and QUERY_END_DATE environment variables are required.", ); } const cloudWatchQuery = new CloudWatchQuery(new CloudWatchLogsClient({}), { logGroupNames: ["/workflows/cloudwatch-logs/large-query"], dateRange: [ new Date(Number.parseInt(process.env.QUERY_START_DATE)), new Date(Number.parseInt(process.env.QUERY_END_DATE)), ], }); await cloudWatchQuery.run(); console.log( `Queries finished in ${cloudWatchQuery.secondsElapsed} seconds.\nTotal logs found: ${cloudWatchQuery.results.length}`, );

该类可在必要时将查询拆分为多个步骤。

// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 import { StartQueryCommand, GetQueryResultsCommand, } from "@aws-sdk/client-cloudwatch-logs"; import { splitDateRange } from "@aws-doc-sdk-examples/lib/utils/util-date.js"; import { retry } from "@aws-doc-sdk-examples/lib/utils/util-timers.js"; class DateOutOfBoundsError extends Error {} export class CloudWatchQuery { /** * Run a query for all CloudWatch Logs within a certain date range. * CloudWatch logs return a max of 10,000 results. This class * performs a binary search across all of the logs in the provided * date range if a query returns the maximum number of results. * * @param {import('@aws-sdk/client-cloudwatch-logs').CloudWatchLogsClient} client * @param {{ logGroupNames: string[], dateRange: [Date, Date], queryConfig: { limit: number } }} config */ constructor(client, { logGroupNames, dateRange, queryConfig }) { this.client = client; /** * All log groups are queried. */ this.logGroupNames = logGroupNames; /** * The inclusive date range that is queried. */ this.dateRange = dateRange; /** * CloudWatch Logs never returns more than 10,000 logs. */ this.limit = queryConfig?.limit ?? 10000; /** * @type {import("@aws-sdk/client-cloudwatch-logs").ResultField[][]} */ this.results = []; } /** * Run the query. */ async run() { this.secondsElapsed = 0; const start = new Date(); this.results = await this._largeQuery(this.dateRange); const end = new Date(); this.secondsElapsed = (end - start) / 1000; return this.results; } /** * Recursively query for logs. * @param {[Date, Date]} dateRange * @returns {Promise<import("@aws-sdk/client-cloudwatch-logs").ResultField[][]>} */ async _largeQuery(dateRange) { const logs = await this._query(dateRange, this.limit); console.log( `Query date range: ${dateRange .map((d) => d.toISOString()) .join(" to ")}. Found ${logs.length} logs.`, ); if (logs.length < this.limit) { return logs; } const lastLogDate = this._getLastLogDate(logs); const offsetLastLogDate = new Date(lastLogDate); offsetLastLogDate.setMilliseconds(lastLogDate.getMilliseconds() + 1); const subDateRange = [offsetLastLogDate, dateRange[1]]; const [r1, r2] = splitDateRange(subDateRange); const results = await Promise.all([ this._largeQuery(r1), this._largeQuery(r2), ]); return [logs, ...results].flat(); } /** * Find the most recent log in a list of logs. * @param {import("@aws-sdk/client-cloudwatch-logs").ResultField[][]} logs */ _getLastLogDate(logs) { const timestamps = logs .map( (log) => log.find((fieldMeta) => fieldMeta.field === "@timestamp")?.value, ) .filter((t) => !!t) .map((t) => `${t}Z`) .sort(); if (!timestamps.length) { throw new Error("No timestamp found in logs."); } return new Date(timestamps[timestamps.length - 1]); } /** * Simple wrapper for the GetQueryResultsCommand. * @param {string} queryId */ _getQueryResults(queryId) { return this.client.send(new GetQueryResultsCommand({ queryId })); } /** * Starts a query and waits for it to complete. * @param {[Date, Date]} dateRange * @param {number} maxLogs */ async _query(dateRange, maxLogs) { try { const { queryId } = await this._startQuery(dateRange, maxLogs); const { results } = await this._waitUntilQueryDone(queryId); return results ?? []; } catch (err) { /** * This error is thrown when StartQuery returns an error indicating * that the query's start or end date occur before the log group was * created. */ if (err instanceof DateOutOfBoundsError) { return []; } throw err; } } /** * Wrapper for the StartQueryCommand. Uses a static query string * for consistency. * @param {[Date, Date]} dateRange * @param {number} maxLogs * @returns {Promise<{ queryId: string }>} */ async _startQuery([startDate, endDate], maxLogs = 10000) { try { return await this.client.send( new StartQueryCommand({ logGroupNames: this.logGroupNames, queryString: "fields @timestamp, @message | sort @timestamp asc", startTime: startDate.valueOf(), endTime: endDate.valueOf(), limit: maxLogs, }), ); } catch (err) { /** @type {string} */ const message = err.message; if (message.startsWith("Query's end date and time")) { // This error indicates that the query's start or end date occur // before the log group was created. throw new DateOutOfBoundsError(message); } throw err; } } /** * Call GetQueryResultsCommand until the query is done. * @param {string} queryId */ _waitUntilQueryDone(queryId) { const getResults = async () => { const results = await this._getQueryResults(queryId); const queryDone = [ "Complete", "Failed", "Cancelled", "Timeout", "Unknown", ].includes(results.status); return { queryDone, results }; }; return retry( { intervalInMs: 1000, maxRetries: 60, quiet: true }, async () => { const { queryDone, results } = await getResults(); if (!queryDone) { throw new Error("Query not done."); } return results; }, ); } }
  • 有关 API 详细信息,请参阅《适用于 JavaScript 的 Amazon SDK API Reference》中的以下主题。

Python
适用于 Python 的 SDK(Boto3)
注意

还有更多相关信息 GitHub。在 Amazon 代码示例存储库中查找完整示例,了解如何进行设置和运行。

此文件调用一个示例模块,用于管理超过 10,000 个结果的 CloudWatch 查询。

import logging import os import sys import boto3 from botocore.config import Config from cloudwatch_query import CloudWatchQuery from date_utilities import DateUtilities # Configure logging at the module level. logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(filename)s:%(lineno)d - %(message)s", ) DEFAULT_QUERY_LOG_GROUP = "/workflows/cloudwatch-logs/large-query" class CloudWatchLogsQueryRunner: def __init__(self): """ Initializes the CloudWatchLogsQueryRunner class by setting up date utilities and creating a CloudWatch Logs client with retry configuration. """ self.date_utilities = DateUtilities() self.cloudwatch_logs_client = self.create_cloudwatch_logs_client() def create_cloudwatch_logs_client(self): """ Creates and returns a CloudWatch Logs client with a specified retry configuration. :return: A CloudWatch Logs client instance. :rtype: boto3.client """ try: return boto3.client("logs", config=Config(retries={"max_attempts": 10})) except Exception as e: logging.error(f"Failed to create CloudWatch Logs client: {e}") sys.exit(1) def fetch_environment_variables(self): """ Fetches and validates required environment variables for query start and end dates. Fetches the environment variable for log group, returning the default value if it does not exist. :return: Tuple of query start date and end date as integers and the log group. :rtype: tuple :raises SystemExit: If required environment variables are missing or invalid. """ try: query_start_date = int(os.environ["QUERY_START_DATE"]) query_end_date = int(os.environ["QUERY_END_DATE"]) except KeyError: logging.error( "Both QUERY_START_DATE and QUERY_END_DATE environment variables are required." ) sys.exit(1) except ValueError as e: logging.error(f"Error parsing date environment variables: {e}") sys.exit(1) try: log_group = os.environ["QUERY_LOG_GROUP"] except KeyError: logging.warning("No QUERY_LOG_GROUP environment variable, using default value") log_group = DEFAULT_QUERY_LOG_GROUP return query_start_date, query_end_date, log_group def convert_dates_to_iso8601(self, start_date, end_date): """ Converts UNIX timestamp dates to ISO 8601 format using DateUtilities. :param start_date: The start date in UNIX timestamp. :type start_date: int :param end_date: The end date in UNIX timestamp. :type end_date: int :return: Start and end dates in ISO 8601 format. :rtype: tuple """ start_date_iso8601 = self.date_utilities.convert_unix_timestamp_to_iso8601( start_date ) end_date_iso8601 = self.date_utilities.convert_unix_timestamp_to_iso8601( end_date ) return start_date_iso8601, end_date_iso8601 def execute_query( self, start_date_iso8601, end_date_iso8601, log_group="/workflows/cloudwatch-logs/large-query", query="fields @timestamp, @message | sort @timestamp asc" ): """ Creates a CloudWatchQuery instance and executes the query with provided date range. :param start_date_iso8601: The start date in ISO 8601 format. :type start_date_iso8601: str :param end_date_iso8601: The end date in ISO 8601 format. :type end_date_iso8601: str :param log_group: Log group to search: "/workflows/cloudwatch-logs/large-query" :type log_group: str :param query: Query string to pass to the CloudWatchQuery instance :type query: str """ cloudwatch_query = CloudWatchQuery( log_group=log_group, query_string=query ) cloudwatch_query.query_logs((start_date_iso8601, end_date_iso8601)) logging.info("Query executed successfully.") logging.info( f"Queries completed in {cloudwatch_query.query_duration} seconds. Total logs found: {len(cloudwatch_query.query_results)}" ) def main(): """ Main function to start a recursive CloudWatch logs query. Fetches required environment variables, converts dates, and executes the query. """ logging.info("Starting a recursive CloudWatch logs query...") runner = CloudWatchLogsQueryRunner() query_start_date, query_end_date, log_group = runner.fetch_environment_variables() start_date_iso8601 = DateUtilities.convert_unix_timestamp_to_iso8601( query_start_date ) end_date_iso8601 = DateUtilities.convert_unix_timestamp_to_iso8601(query_end_date) runner.execute_query(start_date_iso8601, end_date_iso8601, log_group=log_group) if __name__ == "__main__": main()

此模块处理超过 10,000 个结果的 CloudWatch 查询。

import logging import time from datetime import datetime import threading import boto3 from date_utilities import DateUtilities DEFAULT_QUERY = "fields @timestamp, @message | sort @timestamp asc" DEFAULT_LOG_GROUP = "/workflows/cloudwatch-logs/large-query" class DateOutOfBoundsError(Exception): """Exception raised when the date range for a query is out of bounds.""" pass class CloudWatchQuery: """ A class to query AWS CloudWatch logs within a specified date range. :vartype date_range: tuple :ivar limit: Maximum number of log entries to return. :vartype limit: int :log_group str: Name of the log group to query :query_string str: query """ def __init__(self, log_group: str = DEFAULT_LOG_GROUP, query_string: str=DEFAULT_QUERY) -> None: self.lock = threading.Lock() self.log_group = log_group self.query_string = query_string self.query_results = [] self.query_duration = None self.datetime_format = "%Y-%m-%d %H:%M:%S.%f" self.date_utilities = DateUtilities() self.limit = 10000 def query_logs(self, date_range): """ Executes a CloudWatch logs query for a specified date range and calculates the execution time of the query. :return: A batch of logs retrieved from the CloudWatch logs query. :rtype: list """ start_time = datetime.now() start_date, end_date = self.date_utilities.normalize_date_range_format( date_range, from_format="unix_timestamp", to_format="datetime" ) logging.info( f"Original query:" f"\n START: {start_date}" f"\n END: {end_date}" f"\n LOG GROUP: {self.log_group}" ) self.recursive_query((start_date, end_date)) end_time = datetime.now() self.query_duration = (end_time - start_time).total_seconds() def recursive_query(self, date_range): """ Processes logs within a given date range, fetching batches of logs recursively if necessary. :param date_range: The date range to fetch logs for, specified as a tuple (start_timestamp, end_timestamp). :type date_range: tuple :return: None if the recursive fetching is continued or stops when the final batch of logs is processed. Although it doesn't explicitly return the query results, this method accumulates all fetched logs in the `self.query_results` attribute. :rtype: None """ batch_of_logs = self.perform_query(date_range) # Add the batch to the accumulated logs with self.lock: self.query_results.extend(batch_of_logs) if len(batch_of_logs) == self.limit: logging.info(f"Fetched {self.limit}, checking for more...") most_recent_log = self.find_most_recent_log(batch_of_logs) most_recent_log_timestamp = next( item["value"] for item in most_recent_log if item["field"] == "@timestamp" ) new_range = (most_recent_log_timestamp, date_range[1]) midpoint = self.date_utilities.find_middle_time(new_range) first_half_thread = threading.Thread( target=self.recursive_query, args=((most_recent_log_timestamp, midpoint),), ) second_half_thread = threading.Thread( target=self.recursive_query, args=((midpoint, date_range[1]),) ) first_half_thread.start() second_half_thread.start() first_half_thread.join() second_half_thread.join() def find_most_recent_log(self, logs): """ Search a list of log items and return most recent log entry. :param logs: A list of logs to analyze. :return: log :type :return List containing log item details """ most_recent_log = None most_recent_date = "1970-01-01 00:00:00.000" for log in logs: for item in log: if item["field"] == "@timestamp": logging.debug(f"Compared: {item['value']} to {most_recent_date}") if ( self.date_utilities.compare_dates( item["value"], most_recent_date ) == item["value"] ): logging.debug(f"New most recent: {item['value']}") most_recent_date = item["value"] most_recent_log = log logging.info(f"Most recent log date of batch: {most_recent_date}") return most_recent_log def perform_query(self, date_range): """ Performs the actual CloudWatch log query. :param date_range: A tuple representing the start and end datetime for the query. :type date_range: tuple :return: A list containing the query results. :rtype: list """ client = boto3.client("logs") try: try: start_time = round( self.date_utilities.convert_iso8601_to_unix_timestamp(date_range[0]) ) end_time = round( self.date_utilities.convert_iso8601_to_unix_timestamp(date_range[1]) ) response = client.start_query( logGroupName=self.log_group, startTime=start_time, endTime=end_time, queryString=self.query_string, limit=self.limit, ) query_id = response["queryId"] except client.exceptions.ResourceNotFoundException as e: raise DateOutOfBoundsError(f"Resource not found: {e}") while True: time.sleep(1) results = client.get_query_results(queryId=query_id) if results["status"] in [ "Complete", "Failed", "Cancelled", "Timeout", "Unknown", ]: return results.get("results", []) except DateOutOfBoundsError: return [] def _initiate_query(self, client, date_range, max_logs): """ Initiates the CloudWatch logs query. :param date_range: A tuple representing the start and end datetime for the query. :type date_range: tuple :param max_logs: The maximum number of logs to retrieve. :type max_logs: int :return: The query ID as a string. :rtype: str """ try: start_time = round( self.date_utilities.convert_iso8601_to_unix_timestamp(date_range[0]) ) end_time = round( self.date_utilities.convert_iso8601_to_unix_timestamp(date_range[1]) ) response = client.start_query( logGroupName=self.log_group, startTime=start_time, endTime=end_time, queryString=self.query_string, limit=max_logs, ) return response["queryId"] except client.exceptions.ResourceNotFoundException as e: raise DateOutOfBoundsError(f"Resource not found: {e}") def _wait_for_query_results(self, client, query_id): """ Waits for the query to complete and retrieves the results. :param query_id: The ID of the initiated query. :type query_id: str :return: A list containing the results of the query. :rtype: list """ while True: time.sleep(1) results = client.get_query_results(queryId=query_id) if results["status"] in [ "Complete", "Failed", "Cancelled", "Timeout", "Unknown", ]: return results.get("results", [])
  • 有关 API 详细信息,请参阅《Amazon SDK for Python (Boto3) API Reference》中的以下主题。

有关 S Amazon DK 开发者指南和代码示例的完整列表,请参阅在 Amazon SDK 中使用 CloudWatch 日志。本主题还包括有关入门的信息以及有关先前的 SDK 版本的详细信息。