From efaae921aaf8225e87836dbfc7261698ec69b071 Mon Sep 17 00:00:00 2001 From: ariagraham-nhs Date: Fri, 1 May 2026 16:47:03 +0100 Subject: [PATCH 1/6] VED-1170: CloudWatch Alarm without actions --- .../fhir_api_perf_errors_slack_chatbot.tf | 24 +++++++++++++++++++ .../account/fhir_api_perf_errors_sns_topic.tf | 22 +++++++++++++++++ infrastructure/account/kms.tf | 6 +++++ 3 files changed, 52 insertions(+) create mode 100644 infrastructure/account/fhir_api_perf_errors_slack_chatbot.tf create mode 100644 infrastructure/account/fhir_api_perf_errors_sns_topic.tf diff --git a/infrastructure/account/fhir_api_perf_errors_slack_chatbot.tf b/infrastructure/account/fhir_api_perf_errors_slack_chatbot.tf new file mode 100644 index 0000000000..88fe488df1 --- /dev/null +++ b/infrastructure/account/fhir_api_perf_errors_slack_chatbot.tf @@ -0,0 +1,24 @@ +resource "aws_chatbot_slack_channel_configuration" "fhir_api_perf_alerts" { + configuration_name = "${var.environment}-fhir-api-perf-alerts-slack-channel-config" + iam_role_arn = aws_iam_role.fhir_api_perf_alerts_chatbot.arn + slack_channel_id = var.environment == "prod" ? "C0B11MJPQ6A" : "C0B1GKZ5S4R" + slack_team_id = "TJ00QR03U" + sns_topic_arns = [aws_sns_topic.fhir_api_perf_alerts.arn] +} + +resource "aws_iam_role" "fhir_api_perf_alerts_chatbot" { + name = "${var.environment}-fhir-api-perf-alerts-chatbot-channel-role" + assume_role_policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Action = "sts:AssumeRole" + Effect = "Allow" + Sid = "AssumeChatbotRole" + Principal = { + Service = "chatbot.amazonaws.com" + } + }, + ] + }) +} diff --git a/infrastructure/account/fhir_api_perf_errors_sns_topic.tf b/infrastructure/account/fhir_api_perf_errors_sns_topic.tf new file mode 100644 index 0000000000..00fcda4576 --- /dev/null +++ b/infrastructure/account/fhir_api_perf_errors_sns_topic.tf @@ -0,0 +1,22 @@ +resource "aws_sns_topic" "fhir_api_perf_alerts" { + name = "${var.environment}-fhir-api-perf-alerts" + kms_master_key_id = aws_kms_key.error_alerts_sns_encryption_key.arn +} + +resource "aws_sns_topic_policy" "fhir_api_perf_alerts_topic_policy" { + arn = aws_sns_topic.fhir_api_perf_alerts.arn + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Sid = "AllowCloudWatchToPublish", + Effect = "Allow", + Principal = { + Service = "cloudwatch.amazonaws.com" + }, + Action = "SNS:Publish", + Resource = aws_sns_topic.fhir_api_perf_alerts.arn + } + ] + }) +} diff --git a/infrastructure/account/kms.tf b/infrastructure/account/kms.tf index 21e5e2a78e..563c7bdc72 100644 --- a/infrastructure/account/kms.tf +++ b/infrastructure/account/kms.tf @@ -225,3 +225,9 @@ resource "aws_kms_alias" "fhir_api_errors_sns_encryption_key" { name = "alias/${var.environment}-fhir-api-errors-imms-sns-encryption" target_key_id = aws_kms_key.error_alerts_sns_encryption_key.key_id } + +resource "aws_kms_alias" "fhir_api_perf_alerts_sns_encryption_key" { + name = "alias/${var.environment}-fhir-api-perf-alerts-imms-sns-encryption" + target_key_id = aws_kms_key.error_alerts_sns_encryption_key.key_id +} + From 70ac3c1b8f94bed160101249489af0028220cb5f Mon Sep 17 00:00:00 2001 From: ariagraham-nhs Date: Tue, 5 May 2026 11:59:43 +0100 Subject: [PATCH 2/6] Add action to existing alarm --- ...lack_chatbot.tf => fhir_api_perf_alerts_slack_chatbot.tf} | 0 ...errors_sns_topic.tf => fhir_api_perf_alerts_sns_topic.tf} | 0 infrastructure/instance/modules/lambda/lambda.tf | 5 +++++ 3 files changed, 5 insertions(+) rename infrastructure/account/{fhir_api_perf_errors_slack_chatbot.tf => fhir_api_perf_alerts_slack_chatbot.tf} (100%) rename infrastructure/account/{fhir_api_perf_errors_sns_topic.tf => fhir_api_perf_alerts_sns_topic.tf} (100%) diff --git a/infrastructure/account/fhir_api_perf_errors_slack_chatbot.tf b/infrastructure/account/fhir_api_perf_alerts_slack_chatbot.tf similarity index 100% rename from infrastructure/account/fhir_api_perf_errors_slack_chatbot.tf rename to infrastructure/account/fhir_api_perf_alerts_slack_chatbot.tf diff --git a/infrastructure/account/fhir_api_perf_errors_sns_topic.tf b/infrastructure/account/fhir_api_perf_alerts_sns_topic.tf similarity index 100% rename from infrastructure/account/fhir_api_perf_errors_sns_topic.tf rename to infrastructure/account/fhir_api_perf_alerts_sns_topic.tf diff --git a/infrastructure/instance/modules/lambda/lambda.tf b/infrastructure/instance/modules/lambda/lambda.tf index 9714614c04..87e38808d3 100644 --- a/infrastructure/instance/modules/lambda/lambda.tf +++ b/infrastructure/instance/modules/lambda/lambda.tf @@ -24,6 +24,10 @@ module "lambda_function_container_image" { image_config_command = ["${var.function_name}_handler.${var.function_name}_handler"] } +data "aws_sns_topic" "fhir_api_perf_alerts" { + name = "${var.environment}-fhir-api-perf-alerts" +} + resource "aws_cloudwatch_metric_alarm" "memory_alarm" { alarm_name = "${var.short_prefix}_${var.function_name} memory alarm" comparison_operator = "GreaterThanOrEqualToThreshold" @@ -34,6 +38,7 @@ resource "aws_cloudwatch_metric_alarm" "memory_alarm" { statistic = "Maximum" threshold = 256 alarm_description = "This metric monitors Lambda memory usage" + alarm_actions = [data.aws_sns_topic.fhir_api_perf_alerts.arn] insufficient_data_actions = [] } From b2bf711da84e5131b9e172befe6ad2dab0fd1ac2 Mon Sep 17 00:00:00 2001 From: ariagraham-nhs Date: Wed, 6 May 2026 11:32:20 +0100 Subject: [PATCH 3/6] Add action to DDoS alarm --- infrastructure/account/shield_protection.tf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/infrastructure/account/shield_protection.tf b/infrastructure/account/shield_protection.tf index 0809c97d08..a7a6770830 100644 --- a/infrastructure/account/shield_protection.tf +++ b/infrastructure/account/shield_protection.tf @@ -34,6 +34,10 @@ locals { } } +# Topic to publish alerts to when alarm is triggered +data "aws_sns_topic" "fhir_api_perf_alerts" { + name = "${var.environment}-fhir-api-perf-alerts" +} # Create Metric Alarms for each of those resources resource "aws_cloudwatch_metric_alarm" "ddos_protection_regional" { @@ -41,6 +45,7 @@ resource "aws_cloudwatch_metric_alarm" "ddos_protection_regional" { alarm_name = "imms-${var.environment}-shield_ddos_${each.key}" alarm_description = "Alarm when Shield detects DDoS on ${each.key}" + alarm_actions = [data.aws_sns_topic.fhir_api_perf_alerts.arn] namespace = "AWS/DDoSProtection" metric_name = "DDoSDetected" From 3164531b039cf3e355bbe72a8153c5757505b6f4 Mon Sep 17 00:00:00 2001 From: ariagraham-nhs Date: Thu, 7 May 2026 15:29:49 +0100 Subject: [PATCH 4/6] Update alarm to base trigger off of percentage of memory used --- .../instance/modules/lambda/lambda.tf | 25 +++++++++---------- .../instance/modules/lambda/variables.tf | 6 +++++ 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/infrastructure/instance/modules/lambda/lambda.tf b/infrastructure/instance/modules/lambda/lambda.tf index 87e38808d3..ed95054a1e 100644 --- a/infrastructure/instance/modules/lambda/lambda.tf +++ b/infrastructure/instance/modules/lambda/lambda.tf @@ -18,7 +18,7 @@ module "lambda_function_container_image" { # A JWT encode took 7 seconds at default memory size of 128 and 0.8 seconds at 1024. # 2048 gets it down to around 0.5 but since Lambda is charged at GB * ms then it costs more for minimal benefit. - memory_size = 1024 + memory_size = var.memory_size environment_variables = var.environment_variables image_config_command = ["${var.function_name}_handler.${var.function_name}_handler"] @@ -29,18 +29,17 @@ data "aws_sns_topic" "fhir_api_perf_alerts" { } resource "aws_cloudwatch_metric_alarm" "memory_alarm" { - alarm_name = "${var.short_prefix}_${var.function_name} memory alarm" - comparison_operator = "GreaterThanOrEqualToThreshold" - evaluation_periods = 1 - metric_name = aws_cloudwatch_log_metric_filter.max_memory_used_metric.metric_transformation[0].name - namespace = aws_cloudwatch_log_metric_filter.max_memory_used_metric.metric_transformation[0].namespace - period = 600 - statistic = "Maximum" - threshold = 256 - alarm_description = "This metric monitors Lambda memory usage" - alarm_actions = [data.aws_sns_topic.fhir_api_perf_alerts.arn] - insufficient_data_actions = [] - + alarm_name = "${var.short_prefix}_${var.function_name} memory alarm" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = 1 + metric_name = aws_cloudwatch_log_metric_filter.max_memory_used_metric.metric_transformation[0].name + namespace = aws_cloudwatch_log_metric_filter.max_memory_used_metric.metric_transformation[0].namespace + period = 600 + statistic = "Maximum" + threshold = var.memory_size * 0.85 # Alarm threshold set at 85% of memory size + alarm_description = "This metric monitors Lambda memory usage" + alarm_actions = [data.aws_sns_topic.fhir_api_perf_alerts.arn] + treat_missing_data = "notBreaching" } resource "aws_cloudwatch_log_metric_filter" "max_memory_used_metric" { diff --git a/infrastructure/instance/modules/lambda/variables.tf b/infrastructure/instance/modules/lambda/variables.tf index 60369e6b4e..c92917d241 100644 --- a/infrastructure/instance/modules/lambda/variables.tf +++ b/infrastructure/instance/modules/lambda/variables.tf @@ -41,4 +41,10 @@ variable "vpc_subnet_ids" { variable "environment" { description = "The deployment environment (e.g., dev, int, internal-qa, prod)" type = string +} + +variable "memory_size" { + description = "The memory size allocated to the lambda" + type = number + default = 1024 } \ No newline at end of file From 977d3725fbf9a806c4f79dfc6e01acd03221b60c Mon Sep 17 00:00:00 2001 From: ariagraham-nhs Date: Fri, 8 May 2026 10:24:09 +0100 Subject: [PATCH 5/6] Add depends-on for SNS topic --- infrastructure/account/shield_protection.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/infrastructure/account/shield_protection.tf b/infrastructure/account/shield_protection.tf index a7a6770830..7966d7f6d9 100644 --- a/infrastructure/account/shield_protection.tf +++ b/infrastructure/account/shield_protection.tf @@ -36,6 +36,7 @@ locals { # Topic to publish alerts to when alarm is triggered data "aws_sns_topic" "fhir_api_perf_alerts" { + depends_on = [aws_sns_topic.fhir_api_perf_alerts] name = "${var.environment}-fhir-api-perf-alerts" } From 3d68c3d103291d0b8e26d96c2f794dbed18c331e Mon Sep 17 00:00:00 2001 From: ariagraham-nhs Date: Fri, 8 May 2026 14:58:56 +0100 Subject: [PATCH 6/6] Fix formatting in shield_protection.tf --- infrastructure/account/shield_protection.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/account/shield_protection.tf b/infrastructure/account/shield_protection.tf index 7966d7f6d9..8fdb411b59 100644 --- a/infrastructure/account/shield_protection.tf +++ b/infrastructure/account/shield_protection.tf @@ -36,8 +36,8 @@ locals { # Topic to publish alerts to when alarm is triggered data "aws_sns_topic" "fhir_api_perf_alerts" { + name = "${var.environment}-fhir-api-perf-alerts" depends_on = [aws_sns_topic.fhir_api_perf_alerts] - name = "${var.environment}-fhir-api-perf-alerts" } # Create Metric Alarms for each of those resources