3535import com .google .cloud .storage .BlobId ;
3636import com .google .cloud .storage .Bucket ;
3737import com .google .cloud .storage .StorageException ;
38+ import com .google .common .annotations .VisibleForTesting ;
3839import com .google .re2j .Matcher ;
3940import com .google .re2j .Pattern ;
4041import com .wepay .kafka .connect .bigquery .write .row .GcsToBqWriter ;
@@ -67,18 +68,24 @@ public class GcsToBqLoadRunnable implements Runnable {
6768 private static String SOURCE_URI_FORMAT = "gs://%s/%s" ;
6869 private final BigQuery bigQuery ;
6970
70- // these numbers are intended to try to make this task not excede Google Cloud Quotas.
71+ // these numbers are intended to try to make this task not exceed Google Cloud Quotas.
7172 // see: https://cloud.google.com/bigquery/quotas#load_jobs
7273 private final Bucket bucket ;
7374 private final Map <Job , List <BlobId >> activeJobs ;
75+ /**
76+ * The set of blob Ids that the system is currently processing or are queued to process.
77+ */
7478 private final Set <BlobId > claimedBlobIds ;
79+ /**
80+ * The set of blob Ids that the system can delete.
81+ */
7582 private final Set <BlobId > deletableBlobIds ;
7683
7784 /**
7885 * Create a {@link GcsToBqLoadRunnable} with the given bigquery, bucket, and ms wait interval.
7986 *
8087 * @param bigQuery the {@link BigQuery} instance.
81- * @param bucket the the GCS bucket to read from.
88+ * @param bucket the GCS bucket to read from.
8289 */
8390 public GcsToBqLoadRunnable (BigQuery bigQuery , Bucket bucket ) {
8491 this .bigQuery = bigQuery ;
@@ -88,6 +95,24 @@ public GcsToBqLoadRunnable(BigQuery bigQuery, Bucket bucket) {
8895 this .deletableBlobIds = new HashSet <>();
8996 }
9097
98+ /**
99+ * Create a {@link GcsToBqLoadRunnable} with the given bigquery, bucket, and ms wait interval.
100+ *
101+ * @param bigQuery the {@link BigQuery} instance.
102+ * @param bucket the GCS bucket to read from.
103+ * @param activeJobs the map of job to the list of blobs it contains.
104+ * @param claimedBlobIds the list of Blob Ids being processed.
105+ * @param deletableBlobIds the list of Blob Ids that can be deleted.
106+ */
107+ @ VisibleForTesting
108+ GcsToBqLoadRunnable (BigQuery bigQuery , Bucket bucket , Map <Job , List <BlobId >> activeJobs , Set <BlobId > claimedBlobIds , Set <BlobId > deletableBlobIds ) {
109+ this .bigQuery = bigQuery ;
110+ this .bucket = bucket ;
111+ this .activeJobs = activeJobs ;
112+ this .claimedBlobIds = claimedBlobIds ;
113+ this .deletableBlobIds = deletableBlobIds ;
114+ }
115+
91116 /**
92117 * Given a blob, return the {@link TableId} this blob should be inserted into.
93118 *
@@ -217,7 +242,8 @@ private Job triggerBigQueryLoadJob(TableId table, List<Blob> blobs) {
217242 * any jobs that failed. We only log a message for failed jobs because those blobs will be
218243 * retried during the next run.
219244 */
220- private void checkJobs () {
245+ @ VisibleForTesting
246+ void checkJobs () {
221247 if (activeJobs .isEmpty ()) {
222248 // quick exit if nothing needs to be done.
223249 logger .debug ("No active jobs to check. Skipping check jobs." );
@@ -237,31 +263,50 @@ private void checkJobs() {
237263 try {
238264 if (job .isDone ()) {
239265 logger .trace ("Job is marked done: id={}, status={}" , job .getJobId (), job .getStatus ());
240- final List <BlobId > blobIdsToDelete = jobEntry .getValue ();
266+ if (job .getStatus ().getError () == null ) {
267+ processSuccessfulJob (job , jobEntry .getValue ());
268+ successCount ++;
269+ } else {
270+ processFailedJob (job , jobEntry .getValue ());
271+ failureCount ++;
272+ }
241273 jobIterator .remove ();
242274 logger .trace ("Job is removed from iterator: {}" , job .getJobId ());
243- successCount ++;
244- claimedBlobIds .removeAll (blobIdsToDelete );
245- logger .trace ("Completed blobs have been removed from claimed set: {}" , blobIdsToDelete );
246- deletableBlobIds .addAll (blobIdsToDelete );
247- logger .trace ("Completed blobs marked as deletable: {}" , blobIdsToDelete );
248275 }
249276 } catch (BigQueryException ex ) {
250277 // log a message.
251278 logger .warn ("GCS to BQ load job failed" , ex );
252- // remove job from active jobs (it's not active anymore)
253- List <BlobId > blobIds = activeJobs .get (job );
254- jobIterator .remove ();
255- // unclaim blobs
256- claimedBlobIds .removeAll (blobIds );
279+ processFailedJob (job , jobEntry .getValue ());
257280 failureCount ++;
281+ jobIterator .remove ();
282+ logger .trace ("Job is removed from iterator: {}" , job .getJobId ());
258283 } finally {
259284 logger .info ("GCS To BQ job tally: {} successful jobs, {} failed jobs." ,
260285 successCount , failureCount );
261286 }
262287 }
263288 }
264289
290+ private void processSuccessfulJob (final Job job , final List <BlobId > blobIdsToDelete ) {
291+ blobIdsToDelete .forEach (claimedBlobIds ::remove );
292+ logger .trace ("Completed blobs have been removed from claimed set: {}" , blobIdsToDelete );
293+ deletableBlobIds .addAll (blobIdsToDelete );
294+ logger .trace ("Completed blobs marked as deletable: {}" , blobIdsToDelete );
295+ }
296+
297+ private void processFailedJob (final Job job , final List <BlobId > blobsNotCompleted ) {
298+ logger .warn ("Job {} failed with {}" , job .getJobId (), job .getStatus ().getError ());
299+ if (job .getStatus ().getExecutionErrors ().isEmpty ()) {
300+ logger .warn ("No additional errors associated with job {}" , job .getJobId ());
301+ } else {
302+ logger .warn ("Additional errors associated with job {}: {}" , job .getJobId (), job .getStatus ().getExecutionErrors ());
303+ }
304+ logger .warn ("Blobs in job {}: {}" , job .getJobId (), blobsNotCompleted );
305+ // unclaim blobs
306+ blobsNotCompleted .forEach (claimedBlobIds ::remove );
307+ logger .trace ("Failed blobs reset as processable" );
308+ }
309+
265310 /**
266311 * Delete deletable blobs.
267312 */
@@ -298,7 +343,7 @@ private void deleteBlobs() {
298343 // Calculate number of successful deletes, remove the successful deletes from
299344 // the deletableBlobIds.
300345 successfulDeletes = numberOfBlobs - failedDeletes ;
301- deletableBlobIds . removeAll ( blobIdsToDelete );
346+ blobIdsToDelete . forEach ( deletableBlobIds :: remove );
302347
303348 logger .info ("Successfully deleted {} blobs; failed to delete {} blobs" ,
304349 successfulDeletes ,
0 commit comments