@@ -38,6 +38,14 @@ Metadata:
38
38
- SubDomainName
39
39
- NumShards
40
40
- NumReplicas
41
+ - Label :
42
+ default : ' Alerting Parameters'
43
+ Parameters :
44
+ - CPUUtilizationThreshold
45
+ - DatabaseMemoryUsagePercentageThreshold
46
+ - SwapUsageThreshold
47
+ - EvictionsThreshold
48
+ - ReplicationLagThreshold
41
49
Parameters :
42
50
ParentVPCStack :
43
51
Description : ' Stack name of parent VPC stack based on vpc/vpc-*azs.yaml template.'
@@ -112,6 +120,33 @@ Parameters:
112
120
Default : 1
113
121
MinValue : 0
114
122
MaxValue : 5
123
+ CPUUtilizationThreshold :
124
+ Description : ' The maximum percentage of CPU usage (set to -1 to disable).'
125
+ Type : Number
126
+ Default : 80
127
+ MinValue : -1
128
+ MaxValue : 100
129
+ DatabaseMemoryUsagePercentageThreshold :
130
+ Description : ' The maximum percentage of memory usage (set to -1 to disable).'
131
+ Type : Number
132
+ Default : 90
133
+ MinValue : -1
134
+ MaxValue : 100
135
+ SwapUsageThreshold :
136
+ Description : ' The maximum bytes of swap usage (set to -1 to disable).'
137
+ Type : Number
138
+ Default : 67108864 # 64 MB in Bytes
139
+ MinValue : -1
140
+ EvictionsThreshold :
141
+ Description : ' The maximum number of evictions (set to -1 to disable).'
142
+ Type : Number
143
+ Default : 1000
144
+ MinValue : -1
145
+ ReplicationLagThreshold :
146
+ Description : ' The maximum seconds of replication lag (set to -1 to disable).'
147
+ Type : Number
148
+ Default : 30
149
+ MinValue : -1
115
150
Mappings :
116
151
EngineVersionMap :
117
152
' 3.2.6 ' :
@@ -130,6 +165,11 @@ Conditions:
130
165
HasAuthToken : !Not [!Equals [!Ref AuthToken, '']]
131
166
HasSnapshotName : !Not [!Equals [!Ref SnapshotName, '']]
132
167
HasAutomaticFailoverEnabled : !Not [!Equals [!Ref NumReplicas, 0]]
168
+ HasCPUUtilizationThresholdAndAlertTopic : !And [!Not [!Equals [!Ref CPUUtilizationThreshold, '']], !Condition HasAlertTopic]
169
+ HasDatabaseMemoryUsagePercentageThresholdAndAlertTopic : !And [!Not [!Equals [!Ref DatabaseMemoryUsagePercentageThreshold, '']], !Condition HasAlertTopic]
170
+ HasSwapUsageThresholdAndAlertTopic : !And [!Not [!Equals [!Ref SwapUsageThreshold, '']], !Condition HasAlertTopic]
171
+ HasEvictionsThresholdAndAlertTopic : !And [!Not [!Equals [!Ref EvictionsThreshold, '']], !Condition HasAlertTopic]
172
+ HasReplicationLagThresholdAndAlertTopic : !And [!Not [!Equals [!Ref ReplicationLagThreshold, '']], !Condition HasAlertTopic]
133
173
Resources :
134
174
RecordSet :
135
175
Condition : HasZone
@@ -209,136 +249,204 @@ Resources:
209
249
UpdatePolicy :
210
250
UseOnlineResharding : true
211
251
Node1CPUUtilizationTooHighAlarm :
212
- Condition : HasAlertTopic
252
+ Condition : HasCPUUtilizationThresholdAndAlertTopic
213
253
Type : ' AWS::CloudWatch::Alarm'
214
254
Properties :
215
- AlarmDescription : ' Average CPU utilization over last 10 minutes higher than 80 %'
255
+ AlarmDescription : !Sub 'Average CPU utilization over last 10 minutes higher than ${CPUUtilizationThreshold} %'
216
256
Namespace : ' AWS/ElastiCache'
217
257
MetricName : CPUUtilization
218
258
Statistic : Average
219
259
Period : 600
220
260
EvaluationPeriods : 1
221
261
ComparisonOperator : GreaterThanThreshold
222
- Threshold : 80
262
+ Threshold : !Ref CPUUtilizationThreshold
223
263
AlarmActions :
224
264
- ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
225
265
Dimensions :
226
266
- Name : CacheClusterId
227
267
Value : !Sub '${ReplicationGroup}-001'
228
268
Node2CPUUtilizationTooHighAlarm :
229
- Condition : HasAlertTopic
269
+ Condition : HasCPUUtilizationThresholdAndAlertTopic
230
270
Type : ' AWS::CloudWatch::Alarm'
231
271
Properties :
232
- AlarmDescription : ' Average CPU utilization over last 10 minutes higher than 80 %'
272
+ AlarmDescription : !Sub 'Average CPU utilization over last 10 minutes higher than ${CPUUtilizationThreshold} %'
233
273
Namespace : ' AWS/ElastiCache'
234
274
MetricName : CPUUtilization
235
275
Statistic : Average
236
276
Period : 600
237
277
EvaluationPeriods : 1
238
278
ComparisonOperator : GreaterThanThreshold
239
- Threshold : 80
279
+ Threshold : !Ref CPUUtilizationThreshold
280
+ AlarmActions :
281
+ - ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
282
+ Dimensions :
283
+ - Name : CacheClusterId
284
+ Value : !Sub '${ReplicationGroup}-002'
285
+ Node1EngineCPUUtilizationTooHighAlarm :
286
+ Condition : HasCPUUtilizationThresholdAndAlertTopic
287
+ Type : ' AWS::CloudWatch::Alarm'
288
+ Properties :
289
+ AlarmDescription : !Sub 'Average engine CPU utilization over last 10 minutes higher than ${CPUUtilizationThreshold}%'
290
+ Namespace : ' AWS/ElastiCache'
291
+ MetricName : EngineCPUUtilization
292
+ Statistic : Average
293
+ Period : 600
294
+ EvaluationPeriods : 1
295
+ ComparisonOperator : GreaterThanThreshold
296
+ Threshold : !Ref CPUUtilizationThreshold
297
+ AlarmActions :
298
+ - ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
299
+ Dimensions :
300
+ - Name : CacheClusterId
301
+ Value : !Sub '${ReplicationGroup}-001'
302
+ Node2EngineCPUUtilizationTooHighAlarm :
303
+ Condition : HasCPUUtilizationThresholdAndAlertTopic
304
+ Type : ' AWS::CloudWatch::Alarm'
305
+ Properties :
306
+ AlarmDescription : !Sub 'Average engine CPU utilization over last 10 minutes higher than ${CPUUtilizationThreshold}%'
307
+ Namespace : ' AWS/ElastiCache'
308
+ MetricName : EngineCPUUtilization
309
+ Statistic : Average
310
+ Period : 600
311
+ EvaluationPeriods : 1
312
+ ComparisonOperator : GreaterThanThreshold
313
+ Threshold : !Ref CPUUtilizationThreshold
314
+ AlarmActions :
315
+ - ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
316
+ Dimensions :
317
+ - Name : CacheClusterId
318
+ Value : !Sub '${ReplicationGroup}-002'
319
+ Node1DatabaseMemoryUsagePercentageTooHighAlarm :
320
+ Condition : HasDatabaseMemoryUsagePercentageThresholdAndAlertTopic
321
+ Type : ' AWS::CloudWatch::Alarm'
322
+ Properties :
323
+ AlarmDescription : !Sub 'Average memory usage over last 10 minutes higher than ${DatabaseMemoryUsagePercentageThreshold}, performance may suffer'
324
+ Namespace : ' AWS/ElastiCache'
325
+ MetricName : DatabaseMemoryUsagePercentage
326
+ Statistic : Average
327
+ Period : 600
328
+ EvaluationPeriods : 1
329
+ ComparisonOperator : GreaterThanThreshold
330
+ Threshold : !Ref DatabaseMemoryUsagePercentageThreshold
331
+ AlarmActions :
332
+ - ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
333
+ Dimensions :
334
+ - Name : CacheClusterId
335
+ Value : !Sub '${ReplicationGroup}-001'
336
+ Node2DatabaseMemoryUsagePercentageTooHighAlarm :
337
+ Condition : HasDatabaseMemoryUsagePercentageThresholdAndAlertTopic
338
+ Type : ' AWS::CloudWatch::Alarm'
339
+ Properties :
340
+ AlarmDescription : !Sub 'Average memory usage over last 10 minutes higher than ${DatabaseMemoryUsagePercentageThreshold}, performance may suffer'
341
+ Namespace : ' AWS/ElastiCache'
342
+ MetricName : DatabaseMemoryUsagePercentage
343
+ Statistic : Average
344
+ Period : 600
345
+ EvaluationPeriods : 1
346
+ ComparisonOperator : GreaterThanThreshold
347
+ Threshold : !Ref DatabaseMemoryUsagePercentageThreshold
240
348
AlarmActions :
241
349
- ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
242
350
Dimensions :
243
351
- Name : CacheClusterId
244
352
Value : !Sub '${ReplicationGroup}-002'
245
353
Node1SwapUsageTooHighAlarm :
246
- Condition : HasAlertTopic
354
+ Condition : HasSwapUsageThresholdAndAlertTopic
247
355
Type : ' AWS::CloudWatch::Alarm'
248
356
Properties :
249
- AlarmDescription : ' Average swap usage over last 10 minutes higher than 64 MB , performance may suffer'
357
+ AlarmDescription : !Sub 'Average swap usage over last 10 minutes higher than ${SwapUsageThreshold} bytes , performance may suffer'
250
358
Namespace : ' AWS/ElastiCache'
251
359
MetricName : SwapUsage
252
360
Statistic : Average
253
361
Period : 600
254
362
EvaluationPeriods : 1
255
363
ComparisonOperator : GreaterThanThreshold
256
- Threshold : 67108864 # 64 MB in Bytes
364
+ Threshold : !Ref SwapUsageThreshold
257
365
AlarmActions :
258
366
- ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
259
367
Dimensions :
260
368
- Name : CacheClusterId
261
369
Value : !Sub '${ReplicationGroup}-001'
262
370
Node2SwapUsageTooHighAlarm :
263
- Condition : HasAlertTopic
371
+ Condition : HasSwapUsageThresholdAndAlertTopic
264
372
Type : ' AWS::CloudWatch::Alarm'
265
373
Properties :
266
- AlarmDescription : ' Average swap usage over last 10 minutes higher than 64 MB , performance may suffer'
374
+ AlarmDescription : !Sub 'Average swap usage over last 10 minutes higher than ${SwapUsageThreshold} bytes , performance may suffer'
267
375
Namespace : ' AWS/ElastiCache'
268
376
MetricName : SwapUsage
269
377
Statistic : Average
270
378
Period : 600
271
379
EvaluationPeriods : 1
272
380
ComparisonOperator : GreaterThanThreshold
273
- Threshold : 67108864 # 64 MB in Bytes
381
+ Threshold : !Ref SwapUsageThreshold
274
382
AlarmActions :
275
383
- ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
276
384
Dimensions :
277
385
- Name : CacheClusterId
278
386
Value : !Sub '${ReplicationGroup}-002'
279
387
Node1EvictionsTooHighAlarm :
280
- Condition : HasAlertTopic
388
+ Condition : HasEvictionsThresholdAndAlertTopic
281
389
Type : ' AWS::CloudWatch::Alarm'
282
390
Properties :
283
- AlarmDescription : ' Average evictions over last 10 minutes higher than 1000, may enough memory for all keys '
391
+ AlarmDescription : !Sub 'Average evictions over last 10 minutes higher than ${EvictionsThreshold}, cache hit ratio may suffer '
284
392
Namespace : ' AWS/ElastiCache'
285
393
MetricName : Evictions
286
394
Statistic : Average
287
395
Period : 600
288
396
EvaluationPeriods : 1
289
397
ComparisonOperator : GreaterThanThreshold
290
- Threshold : 1000
398
+ Threshold : !Ref EvictionsThreshold
291
399
AlarmActions :
292
400
- ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
293
401
Dimensions :
294
402
- Name : CacheClusterId
295
403
Value : !Sub '${ReplicationGroup}-001'
296
404
Node2EvictionsTooHighAlarm :
297
- Condition : HasAlertTopic
405
+ Condition : HasEvictionsThresholdAndAlertTopic
298
406
Type : ' AWS::CloudWatch::Alarm'
299
407
Properties :
300
- AlarmDescription : ' Average evictions over last 10 minutes higher than 1000, may enough memory for all keys '
408
+ AlarmDescription : !Sub 'Average evictions over last 10 minutes higher than ${EvictionsThreshold}, cache hit ratio may suffer '
301
409
Namespace : ' AWS/ElastiCache'
302
410
MetricName : Evictions
303
411
Statistic : Average
304
412
Period : 600
305
413
EvaluationPeriods : 1
306
414
ComparisonOperator : GreaterThanThreshold
307
- Threshold : 1000
415
+ Threshold : !Ref EvictionsThreshold
308
416
AlarmActions :
309
417
- ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
310
418
Dimensions :
311
419
- Name : CacheClusterId
312
420
Value : !Sub '${ReplicationGroup}-002'
313
421
Node1ReplicationLagTooHighAlarm :
314
- Condition : HasAlertTopic
422
+ Condition : HasReplicationLagThresholdAndAlertTopic
315
423
Type : ' AWS::CloudWatch::Alarm'
316
424
Properties :
317
- AlarmDescription : ' Average replication lag over last 10 minutes higher than 30 seconds'
425
+ AlarmDescription : !Sub 'Average replication lag over last 10 minutes higher than ${ReplicationLagThreshold} seconds'
318
426
Namespace : ' AWS/ElastiCache'
319
427
MetricName : ReplicationLag
320
428
Statistic : Average
321
429
Period : 600
322
430
EvaluationPeriods : 1
323
431
ComparisonOperator : GreaterThanThreshold
324
- Threshold : 30
432
+ Threshold : !Ref ReplicationLagThreshold
325
433
AlarmActions :
326
434
- ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
327
435
Dimensions :
328
436
- Name : CacheClusterId
329
437
Value : !Sub '${ReplicationGroup}-001'
330
438
Node2ReplicationLagTooHighAlarm :
331
- Condition : HasAlertTopic
439
+ Condition : HasReplicationLagThresholdAndAlertTopic
332
440
Type : ' AWS::CloudWatch::Alarm'
333
441
Properties :
334
- AlarmDescription : ' Average replication lag over last 10 minutes higher than 30 seconds'
442
+ AlarmDescription : !Sub 'Average replication lag over last 10 minutes higher than ${ReplicationLagThreshold} seconds'
335
443
Namespace : ' AWS/ElastiCache'
336
444
MetricName : ReplicationLag
337
445
Statistic : Average
338
446
Period : 600
339
447
EvaluationPeriods : 1
340
448
ComparisonOperator : GreaterThanThreshold
341
- Threshold : 30
449
+ Threshold : !Ref ReplicationLagThreshold
342
450
AlarmActions :
343
451
- ' Fn::ImportValue ' : !Sub '${ParentAlertStack}-TopicARN'
344
452
Dimensions :
0 commit comments