33
33
import org .apache .cassandra .concurrent .Stage ;
34
34
import org .apache .cassandra .db .Clustering ;
35
35
import org .apache .cassandra .db .ColumnFamilyStore ;
36
- import org .apache .cassandra .db .Columns ;
37
36
import org .apache .cassandra .db .ConsistencyLevel ;
38
37
import org .apache .cassandra .db .DecoratedKey ;
39
38
import org .apache .cassandra .db .DeletionTime ;
46
45
import org .apache .cassandra .db .filter .DataLimits ;
47
46
import org .apache .cassandra .db .filter .RowFilter ;
48
47
import org .apache .cassandra .db .partitions .PartitionIterator ;
49
- import org .apache .cassandra .db .partitions .PartitionIterators ;
50
48
import org .apache .cassandra .db .partitions .UnfilteredPartitionIterator ;
51
49
import org .apache .cassandra .db .partitions .UnfilteredPartitionIterators ;
52
50
import org .apache .cassandra .db .rows .EncodingStats ;
53
51
import org .apache .cassandra .db .rows .RangeTombstoneMarker ;
54
52
import org .apache .cassandra .db .rows .Row ;
53
+ import org .apache .cassandra .db .rows .RowIterator ;
55
54
import org .apache .cassandra .db .rows .Rows ;
56
55
import org .apache .cassandra .db .rows .Unfiltered ;
57
56
import org .apache .cassandra .db .rows .UnfilteredRowIterator ;
71
70
import org .apache .cassandra .service .ClientWarn ;
72
71
import org .apache .cassandra .service .StorageProxy ;
73
72
import org .apache .cassandra .service .reads .repair .NoopReadRepair ;
73
+ import org .apache .cassandra .service .reads .repair .PartitionIteratorMergeListener ;
74
74
import org .apache .cassandra .tracing .Tracing ;
75
75
import org .apache .cassandra .transport .Dispatcher ;
76
76
import org .apache .cassandra .utils .NoSpamLogger ;
@@ -120,6 +120,12 @@ public class ReplicaFilteringProtection<E extends Endpoints<E>>
120
120
*/
121
121
private final List <Queue <PartitionBuilder >> originalPartitions ;
122
122
123
+ /** Whether to consume entire partitions or not in {@link #queryProtectedPartitions}. */
124
+ private final boolean consumeEntirePartitions ;
125
+
126
+ /** Tracks the current partitions when not consuming entire partitions in {@link #queryProtectedPartitions}. */
127
+ private RowIterator currentRowIterator = null ;
128
+
123
129
ReplicaFilteringProtection (ReadCoordinator coordinator ,
124
130
Keyspace keyspace ,
125
131
ReadCommand command ,
@@ -132,6 +138,7 @@ public class ReplicaFilteringProtection<E extends Endpoints<E>>
132
138
this .coordinator = coordinator ;
133
139
this .keyspace = keyspace ;
134
140
this .command = command ;
141
+ this .consumeEntirePartitions = command .limits ().isUnlimited () || !command .isLimitedToOnePartition () || command .rowFilter ().hasStaticExpression ();
135
142
this .consistency = consistency ;
136
143
this .requestTime = requestTime ;
137
144
this .sources = sources ;
@@ -195,14 +202,14 @@ public void close()
195
202
public UnfilteredRowIterators .MergeListener getRowMergeListener (DecoratedKey partitionKey , List <UnfilteredRowIterator > versions )
196
203
{
197
204
List <PartitionBuilder > builders = new ArrayList <>(sources .size ());
198
- RegularAndStaticColumns columns = columns (versions );
205
+ RegularAndStaticColumns columns = PartitionIteratorMergeListener . columns (versions );
199
206
EncodingStats stats = EncodingStats .merge (versions , NULL_TO_NO_STATS );
200
207
201
208
for (int i = 0 ; i < sources .size (); i ++)
202
209
builders .add (i , new PartitionBuilder (partitionKey , sources .get (i ), columns , stats ));
203
210
204
- boolean [] silentRowAt = new boolean [builders .size ()];
205
- boolean [] silentColumnAt = new boolean [builders .size ()];
211
+ final boolean [] silentRowAt = new boolean [builders .size ()];
212
+ final boolean [] silentColumnAt = new boolean [builders .size ()];
206
213
207
214
return new UnfilteredRowIterators .MergeListener ()
208
215
{
@@ -270,6 +277,11 @@ public void onMergedRangeTombstoneMarkers(RangeTombstoneMarker merged, RangeTomb
270
277
271
278
@ Override
272
279
public void close ()
280
+ {
281
+ }
282
+
283
+ @ Override
284
+ public void checkpoint ()
273
285
{
274
286
for (int i = 0 ; i < sources .size (); i ++)
275
287
originalPartitions .get (i ).add (builders .get (i ));
@@ -313,22 +325,6 @@ private void releaseCachedRows(int count)
313
325
currentRowsCached -= count ;
314
326
}
315
327
316
- private static RegularAndStaticColumns columns (List <UnfilteredRowIterator > versions )
317
- {
318
- Columns statics = Columns .NONE ;
319
- Columns regulars = Columns .NONE ;
320
- for (UnfilteredRowIterator iter : versions )
321
- {
322
- if (iter == null )
323
- continue ;
324
-
325
- RegularAndStaticColumns cols = iter .columns ();
326
- statics = statics .mergeTo (cols .statics );
327
- regulars = regulars .mergeTo (cols .regulars );
328
- }
329
- return new RegularAndStaticColumns (statics , regulars );
330
- }
331
-
332
328
/**
333
329
* Returns the protected results for the specified replica. These are generated fetching the extra rows and merging
334
330
* them with the cached original filtered results for that replica.
@@ -350,16 +346,66 @@ public TableMetadata metadata()
350
346
}
351
347
352
348
@ Override
353
- public void close () { }
349
+ public void close ()
350
+ {
351
+ if (currentRowIterator != null )
352
+ currentRowIterator .close ();
353
+ }
354
354
355
355
@ Override
356
356
public boolean hasNext ()
357
357
{
358
358
// If there are no cached partition builders for this source, advance the first phase iterator, which
359
- // will force the RFP merge listener to load at least the next protected partition.
359
+ // will force the RFP merge listener to load rows from the next protected partition.
360
360
if (partitions .isEmpty ())
361
361
{
362
- PartitionIterators .consumeNext (merged );
362
+ if (consumeEntirePartitions )
363
+ {
364
+ if (merged .hasNext ())
365
+ {
366
+ try (RowIterator partition = merged .next ())
367
+ {
368
+ while (partition .hasNext ())
369
+ partition .next ();
370
+
371
+ partition .checkpoint ();
372
+ }
373
+ }
374
+ }
375
+ else
376
+ {
377
+ if (currentRowIterator == null || !currentRowIterator .hasNext ())
378
+ {
379
+ // If there is an iterator, it's done, so just close it.
380
+ if (currentRowIterator != null )
381
+ {
382
+ currentRowIterator .close ();
383
+ currentRowIterator = null ;
384
+ }
385
+
386
+ // Take the next filtered partition from the merged partition iterator.
387
+ if (merged .hasNext ())
388
+ currentRowIterator = merged .next ();
389
+ }
390
+
391
+ if (currentRowIterator != null )
392
+ {
393
+ int i = 0 ;
394
+
395
+ // Consume LIMIT filtered rows from the current partition, unless there are fewer results.
396
+ // The underlying iterator is short-read protected, and limiting the number of rows we
397
+ // consume avoids needless SRP reads when there are many more than LIMIT results.
398
+ while (i < command .limits ().count () && currentRowIterator .hasNext ())
399
+ {
400
+ currentRowIterator .next ();
401
+ i ++;
402
+ }
403
+
404
+ // If we actually consumed a row, checkpoint to populate the builders.
405
+ if (i > 0 )
406
+ currentRowIterator .checkpoint ();
407
+ }
408
+ }
363
409
}
364
410
365
411
return !partitions .isEmpty ();
@@ -491,6 +537,8 @@ public Row staticRow()
491
537
public void close ()
492
538
{
493
539
releaseCachedRows (partitionRowsCached );
540
+ toFetch = null ;
541
+ // TODO: the counters might not be accurate for the static row at this point?
494
542
}
495
543
496
544
@ Override
0 commit comments