Skip to content

Commit

Permalink
NRL-693 add new alarm and sns topic for lambda error cloudwatch alarm
Browse files Browse the repository at this point in the history
  • Loading branch information
eesa456 committed Jul 10, 2024
1 parent 84c21ba commit 7ec4efc
Show file tree
Hide file tree
Showing 7 changed files with 268 additions and 0 deletions.
40 changes: 40 additions & 0 deletions terraform/infrastructure/cloudwatch.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
module "aws_sns_topic" {
source = "./modules/sns"
name = "nrlf_lambda_errors_topic"
prefix = local.prefix
}

module "aws_cloudwatch_metric_alarm" {
source = "./modules/cloudwatch"
name = "nrlf_lambda_errors"
prefix = local.prefix

comparison_operator = "GreaterThanOrEqualToThreshold"
evaluation_periods = 1
metric_name = "Errors"
namespace = "AWS/Lambda"
period = 60
statistic = "Maximum"
threshold = 0
unit = "Count"
alarm_description = "This metric monitors the number of Lambda errors that have occurred"
alarm_actions = [module.aws_sns_topic.sns_topic_arn]
}

module "aws_sns_topic_subscription" {
source = "./modules/sns"
prefix = local.prefix
name = "slack_email_subscription"
topic_arn = module.aws_sns_topic.sns_topic_arn
protocol = "email"
endpoint = "spine-cell-sigma-noti-aaaalor2u6funj7q3a4v7cpuba@nhsdigitalcorporate.org.slack.com"
}

module "aws_sns_topic_subscription" {
source = "./modules/sns"
prefix = local.prefix
name = "me_email_subscription"
topic_arn = module.aws_sns_topic.sns_topic_arn
protocol = "email"
endpoint = "[email protected]"
}
57 changes: 57 additions & 0 deletions terraform/infrastructure/modules/cloudwatch/cloudwatch.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
resource "aws_cloudwatch_metric_alarm" "metric_alarm" {
count = var.create_metric_alarm ? 1 : 0

alarm_name = "${var.prefix}--${var.name}"
alarm_description = var.alarm_description
actions_enabled = var.actions_enabled

alarm_actions = var.alarm_actions
ok_actions = var.ok_actions
insufficient_data_actions = var.insufficient_data_actions

comparison_operator = var.comparison_operator
evaluation_periods = var.evaluation_periods
threshold = var.threshold
unit = var.unit

datapoints_to_alarm = var.datapoints_to_alarm
treat_missing_data = var.treat_missing_data
evaluate_low_sample_count_percentiles = var.evaluate_low_sample_count_percentiles

# conflicts with metric_query
metric_name = var.metric_name
namespace = var.namespace
period = var.period
statistic = var.statistic
extended_statistic = var.extended_statistic

dimensions = var.dimensions

# conflicts with metric_name
dynamic "metric_query" {
for_each = var.metric_query
content {
id = lookup(metric_query.value, "id")
account_id = lookup(metric_query.value, "account_id", null)
label = lookup(metric_query.value, "label", null)
return_data = lookup(metric_query.value, "return_data", null)
expression = lookup(metric_query.value, "expression", null)
period = lookup(metric_query.value, "period", null)

dynamic "metric" {
for_each = lookup(metric_query.value, "metric", [])
content {
metric_name = lookup(metric.value, "metric_name")
namespace = lookup(metric.value, "namespace")
period = lookup(metric.value, "period")
stat = lookup(metric.value, "stat")
unit = lookup(metric.value, "unit", null)
dimensions = lookup(metric.value, "dimensions", null)
}
}
}
}
threshold_metric_id = var.threshold_metric_id

tags = var.tags
}
4 changes: 4 additions & 0 deletions terraform/infrastructure/modules/cloudwatch/output.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
output "cloudwatch_metric_alarm_arn" {
description = "The ARN of the Cloudwatch metric alarm."
value = try(aws_cloudwatch_metric_alarm.metric_alarm.arn, "")
}
133 changes: 133 additions & 0 deletions terraform/infrastructure/modules/cloudwatch/vars.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
variable "create_metric_alarm" {
description = "Whether to create the Cloudwatch metric alarm"
type = bool
default = true
}

variable "alarm_description" {
description = "The description for the alarm."
type = string
default = null
}

variable "comparison_operator" {
description = "The arithmetic operation to use when comparing the specified Statistic and Threshold. The specified Statistic value is used as the first operand. Either of the following is supported: GreaterThanOrEqualToThreshold, GreaterThanThreshold, LessThanThreshold, LessThanOrEqualToThreshold."
type = string
}

variable "evaluation_periods" {
description = "The number of periods over which data is compared to the specified threshold."
type = number
}

variable "threshold" {
description = "The value against which the specified statistic is compared."
type = number
default = null
}

variable "threshold_metric_id" {
description = "If this is an alarm based on an anomaly detection model, make this value match the ID of the ANOMALY_DETECTION_BAND function."
type = string
default = null
}

variable "unit" {
description = "The unit for the alarm's associated metric."
type = string
default = null
}

variable "metric_name" {
description = "The name for the alarm's associated metric. See docs for supported metrics."
type = string
default = null
}

variable "namespace" {
description = "The namespace for the alarm's associated metric. See docs for the list of namespaces. See docs for supported metrics."
type = string
default = null
}

variable "period" {
description = "The period in seconds over which the specified statistic is applied."
type = string
default = null
}

variable "statistic" {
description = "The statistic to apply to the alarm's associated metric. Either of the following is supported: SampleCount, Average, Sum, Minimum, Maximum"
type = string
default = null
}

variable "actions_enabled" {
description = "Indicates whether or not actions should be executed during any changes to the alarm's state. Defaults to true."
type = bool
default = true
}

variable "datapoints_to_alarm" {
description = "The number of datapoints that must be breaching to trigger the alarm."
type = number
default = null
}

variable "dimensions" {
description = "The dimensions for the alarm's associated metric."
type = any
default = null
}

variable "alarm_actions" {
description = "The list of actions to execute when this alarm transitions into an ALARM state from any other state. Each action is specified as an Amazon Resource Name (ARN)."
type = list(string)
default = null
}

variable "insufficient_data_actions" {
description = "The list of actions to execute when this alarm transitions into an INSUFFICIENT_DATA state from any other state. Each action is specified as an Amazon Resource Name (ARN)."
type = list(string)
default = null
}

variable "ok_actions" {
description = "The list of actions to execute when this alarm transitions into an OK state from any other state. Each action is specified as an Amazon Resource Name (ARN)."
type = list(string)
default = null
}

variable "extended_statistic" {
description = "The percentile statistic for the metric associated with the alarm. Specify a value between p0.0 and p100."
type = string
default = null
}

variable "treat_missing_data" {
description = "Sets how this alarm is to handle missing data points. The following values are supported: missing, ignore, breaching and notBreaching."
type = string
default = "missing"
}

variable "evaluate_low_sample_count_percentiles" {
description = "Used only for alarms based on percentiles. If you specify ignore, the alarm state will not change during periods with too few data points to be statistically significant. If you specify evaluate or omit this parameter, the alarm will always be evaluated and possibly change state no matter how many data points are available. The following values are supported: ignore, and evaluate."
type = string
default = null
}

variable "metric_query" {
description = "Enables you to create an alarm based on a metric math expression. You may specify at most 20."
type = any
default = []
}

variable "tags" {
description = "A mapping of tags to assign to all resources"
type = map(string)
default = {}
}

variable "prefix" {}

variable "name" {}
4 changes: 4 additions & 0 deletions terraform/infrastructure/modules/sns/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
output "sns_topic_arn" {
description = "ARN"
value = aws_sns_topic.lambda_errors.arn
}
9 changes: 9 additions & 0 deletions terraform/infrastructure/modules/sns/sns.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
resource "aws_sns_topic" "lambda_errors" {
name = "${var.prefix}--${var.name}"
}

resource "aws_sns_topic_subscription" "email_subscription" {
topic_arn = var.topic_arn
protocol = var.protocol
endpoint = var.endpoint
}
21 changes: 21 additions & 0 deletions terraform/infrastructure/modules/sns/vars.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
variable "prefix" {}

variable "name" {}

variable "topic_arn" {
description = "The ARN of the SNS topic."
type = string
default = null
}

variable "protocol" {
description = "Used to define the protocol for the sns subscription."
type = string
default = "email"
}

variable "endpoint" {
description = "Used to define the endpoint for the sns subscription to which it sends the events."
type = string
default = ""
}

0 comments on commit 7ec4efc

Please sign in to comment.