@@ -218,14 +218,34 @@ public void execute(JobExecutionContext jobExecutionContext) throws JobExecution
218
218
219
219
for (GroupStats groupStats : m_offsetsTracker .copyOfCurrentStats ())
220
220
{
221
- Map <Integer , Long > latestOffsets = getLatestTopicOffsets (groupStats .getTopic ());
221
+ Map <Integer , Long > latestOffsets = null ;
222
+
223
+ try
224
+ {
225
+ latestOffsets = getLatestTopicOffsets (groupStats .getTopic ());
226
+ }
227
+ catch (Exception e )
228
+ {
229
+ logger .error ("Error reading kafka offsets for topic: " +groupStats .getTopic (), e );
230
+
231
+ //Report failure metric
232
+ ImmutableSortedMap <String , String > offsetTags = new ImmutableSortedMap .Builder <String , String >(Ordering .natural ())
233
+ .putAll (m_monitorConfig .getAdditionalTags ())
234
+ .put ("host" , m_clientId ).build ();
235
+
236
+ DataPointEvent failureEvent = new DataPointEvent (m_monitorConfig .getGatherFailureMetric (),
237
+ offsetTags ,
238
+ m_dataPointFactory .createDataPoint (now , 1 ));
239
+ postEvent (failureEvent );
240
+ }
222
241
223
242
ImmutableSortedMap <String , String > groupTags = new ImmutableSortedMap .Builder <String , String >(Ordering .natural ())
224
243
.putAll (m_monitorConfig .getAdditionalTags ())
225
244
.put ("group" , groupStats .getGroupName ())
226
245
.put ("proxy_group" , groupStats .getProxyGroup ())
227
246
.put ("topic" , groupStats .getTopic ()).build ();
228
247
248
+ //Report consumer rate
229
249
DataPointEvent event = new DataPointEvent (m_monitorConfig .getConsumerRateMetric (),
230
250
groupTags ,
231
251
m_dataPointFactory .createDataPoint (now , groupStats .getConsumeCount ()));
@@ -244,30 +264,34 @@ public void execute(JobExecutionContext jobExecutionContext) throws JobExecution
244
264
.put ("topic" , groupStats .getTopic ())
245
265
.put ("partition" , String .valueOf (offsetStat .getPartition ())).build ();
246
266
267
+ //Report offset age
247
268
DataPointEvent offsetAge = new DataPointEvent (m_monitorConfig .getOffsetAgeMetric (),
248
269
partitionTags ,
249
270
m_dataPointFactory .createDataPoint (now , (now - offsetStat .getCommitTime ())));
250
271
postEvent (offsetAge );
251
272
252
- Long latestOffset = latestOffsets .get (offsetStat .getPartition ());
273
+ Long latestOffset = latestOffsets != null ? latestOffsets .get (offsetStat .getPartition ()) : null ;
253
274
if (latestOffset != null ) //in case something goes bananas
254
275
{
255
276
long partitionLag = calculateDiff (latestOffset , offsetStat .getOffset ());
256
277
groupLag += partitionLag ;
257
278
279
+ //Report partition lag
258
280
DataPointEvent partitionLagEvent = new DataPointEvent (m_monitorConfig .getPartitionLagMetric (),
259
281
partitionTags ,
260
282
m_dataPointFactory .createDataPoint (now , partitionLag ));
261
283
postEvent (partitionLagEvent );
262
284
}
263
285
}
264
286
287
+ //Report group lag
265
288
DataPointEvent groupLagEvent = new DataPointEvent (m_monitorConfig .getGroupLagMetric (),
266
289
groupTags ,
267
290
m_dataPointFactory .createDataPoint (now , groupLag ));
268
291
postEvent (groupLagEvent );
269
292
270
293
294
+ //Report time to process lag
271
295
long secToProcess = 0 ;
272
296
if (groupStats .getCurrentRate () != 0.0 )
273
297
secToProcess = (long )((double )groupLag / groupStats .getCurrentRate ());
0 commit comments