47
47
import io .prestosql .plugin .hive .statistics .HiveStatisticsProvider ;
48
48
import io .prestosql .plugin .hive .util .ConfigurationUtils ;
49
49
import io .prestosql .plugin .hive .util .Statistics ;
50
+ import io .prestosql .spi .PartialAndFinalAggregationType ;
50
51
import io .prestosql .spi .PrestoException ;
51
52
import io .prestosql .spi .block .Block ;
52
53
import io .prestosql .spi .connector .ColumnHandle ;
@@ -3127,36 +3128,44 @@ public List<ConnectorVacuumTableInfo> getTablesForVacuum()
3127
3128
}
3128
3129
3129
3130
@ Override
3130
- public boolean canPerformSortBasedAggregation (ConnectorSession session , ConnectorTableHandle tableHandle , List <String > groupKeyNames )
3131
+ public PartialAndFinalAggregationType validateAndGetSortAggregationType (ConnectorSession session , ConnectorTableHandle tableHandle , List <String > groupKeyNames )
3131
3132
{
3133
+ PartialAndFinalAggregationType partialAndFinalAggregationType = new PartialAndFinalAggregationType ();
3132
3134
ConnectorTableMetadata connectorTableMetadata = getTableMetadata (session , ((HiveTableHandle ) tableHandle ).getSchemaTableName ());
3133
3135
List <SortingColumn > sortingColumn = (List <SortingColumn >) connectorTableMetadata .getProperties ().get (HiveTableProperties .SORTED_BY_PROPERTY );
3134
- if ((sortingColumn == null ) || (sortingColumn .size () == 0 )) {
3135
- return false ;
3136
- }
3136
+ boolean isSortingColumnsNotPresent = (sortingColumn == null ) || (sortingColumn .size () == 0 );
3137
+
3137
3138
List <String > partitionedBy = new ArrayList <>();
3138
- List <String > partitionedBytemp = (List <String >) connectorTableMetadata .getProperties ().get (HiveTableProperties .PARTITIONED_BY_PROPERTY );
3139
- if (null != partitionedBytemp ) {
3140
- partitionedBy .addAll (partitionedBytemp );
3139
+ List <String > partitionedByTemp = (List <String >) connectorTableMetadata .getProperties ().get (HiveTableProperties .PARTITIONED_BY_PROPERTY );
3140
+ if ((partitionedByTemp != null ) && (partitionedByTemp .size () != 0 )) {
3141
+ partitionedBy .addAll (partitionedByTemp );
3142
+ if (isSortingColumnsNotPresent && (partitionedByTemp .size () != groupKeyNames .size ())) {
3143
+ return partialAndFinalAggregationType ;
3144
+ }
3145
+ }
3146
+ else if (isSortingColumnsNotPresent ) {
3147
+ return partialAndFinalAggregationType ;
3141
3148
}
3142
3149
int bucketCount = 0 ;
3143
- List <String > bucketedColumns = (List <String >) connectorTableMetadata .getProperties ().get (HiveTableProperties .BUCKETED_BY_PROPERTY );
3144
- if (null != bucketedColumns ) {
3145
- bucketCount = (int ) connectorTableMetadata .getProperties ().get (HiveTableProperties .BUCKET_COUNT_PROPERTY );
3150
+ List <String > bucketedColumns = new ArrayList <>();
3151
+ if (!isSortingColumnsNotPresent ) {
3152
+ bucketedColumns .addAll ((List <String >) connectorTableMetadata .getProperties ().get (HiveTableProperties .BUCKETED_BY_PROPERTY ));
3153
+ if (null != bucketedColumns ) {
3154
+ bucketCount = (int ) connectorTableMetadata .getProperties ().get (HiveTableProperties .BUCKET_COUNT_PROPERTY );
3155
+ }
3146
3156
}
3147
3157
3148
- List <String > sortedColumnNames = sortingColumn .stream ().map (column -> column .getColumnName ()).collect (Collectors .toList ());
3149
- List <String > partitionAndSortedColumnNames = new ArrayList <>();
3150
- // when there is partition by , sorted data is arranged 'partition by' followed by 'sorted by'
3151
- partitionAndSortedColumnNames .addAll (partitionedBy );
3152
- partitionAndSortedColumnNames .addAll (sortedColumnNames );
3158
+ List <String > sortedColumnNames = new ArrayList <>();
3159
+ if ((sortingColumn != null ) && (sortingColumn .size () != 0 )) {
3160
+ sortedColumnNames .addAll (sortingColumn .stream ().map (column -> column .getColumnName ()).collect (Collectors .toList ()));
3161
+ }
3153
3162
3154
3163
//grouping key should be sub set of sorted By and it should match all partition by columns
3155
- if ((sortedColumnNames .size () + partitionedBy .size () < groupKeyNames .size ()) ||
3164
+ if ((partitionedBy .size () + sortedColumnNames .size () < groupKeyNames .size ()) ||
3156
3165
(partitionedBy .size () > groupKeyNames .size ())) {
3157
3166
//sorted columns are less than join criteria columns
3158
3167
log .debug ("number of sorted columns " + sortedColumnNames .size () + "are less join column size " + groupKeyNames .size ());
3159
- return false ;
3168
+ return partialAndFinalAggregationType ;
3160
3169
}
3161
3170
3162
3171
// bucketby columns and groupby Columns should be same.
@@ -3167,43 +3176,52 @@ public boolean canPerformSortBasedAggregation(ConnectorSession session, Connecto
3167
3176
(groupKeyNames .get (partitionedByCount ).equals (bucketedColumns .get (0 )))) || (bucketCount == 0 ));
3168
3177
3169
3178
if ((bucketCount == 1 ) && (bucketedColumns .size () > 1 )) {
3170
- boolean notMatching = false ;
3171
3179
int minSize = Math .min (groupKeyNames .size () - partitionedBy .size (), bucketedColumns .size ());
3172
3180
int partSize = partitionedBy .size ();
3173
3181
for (int keyIdx = 0 ; keyIdx < minSize ; keyIdx ++) {
3174
3182
if (!groupKeyNames .get (keyIdx + partSize ).equals (bucketedColumns .get (keyIdx ))) {
3175
- notMatching = true ;
3176
- break ;
3183
+ return partialAndFinalAggregationType ;
3177
3184
}
3178
3185
}
3179
- if (!notMatching ) {
3180
- singleOrZeroBucketedColumn = true ;
3186
+ singleOrZeroBucketedColumn = true ;
3187
+ }
3188
+
3189
+ for (int numOfComparedKeys = 0 ; numOfComparedKeys < partitionedBy .size (); numOfComparedKeys ++) {
3190
+ if ((!groupKeyNames .get (numOfComparedKeys ).equals (partitionedBy .get (numOfComparedKeys )))) {
3191
+ return partialAndFinalAggregationType ;
3181
3192
}
3182
3193
}
3194
+
3195
+ if (groupKeyNames .size () == partitionedBy .size ()) {
3196
+ partialAndFinalAggregationType .setPartialAsSortAndFinalAsHashAggregation (true );
3197
+ return partialAndFinalAggregationType ;
3198
+ }
3199
+
3183
3200
if (singleOrZeroBucketedColumn || (groupKeyNames .size () == (bucketedColumns .size () + partitionedBy .size ()))) {
3184
- for ( int numOfComparedKeys = 0 ; numOfComparedKeys < groupKeyNames .size (); numOfComparedKeys ++) {
3185
- boolean bucketedColumnsResult = numOfComparedKeys < partitionedBy .size () ? false : ! singleOrZeroBucketedColumn &&
3186
- (!groupKeyNames .get (numOfComparedKeys ).equals (bucketedColumns .get (numOfComparedKeys - partitionedBy . size () )));
3187
- if ((!groupKeyNames .get (numOfComparedKeys ).equals (partitionAndSortedColumnNames .get (numOfComparedKeys ))) ||
3201
+ int numOfCmpKeysAfterPartitionedBy = partitionedBy .size ();
3202
+ for ( int numOfComparedKeys = 0 ; numOfComparedKeys < groupKeyNames .size () - partitionedBy . size (); numOfComparedKeys ++, numOfCmpKeysAfterPartitionedBy ++) {
3203
+ boolean bucketedColumnsResult = ! singleOrZeroBucketedColumn && (!groupKeyNames .get (numOfComparedKeys ).equals (bucketedColumns .get (numOfComparedKeys )));
3204
+ if ((!groupKeyNames .get (numOfCmpKeysAfterPartitionedBy ).equals (sortedColumnNames .get (numOfComparedKeys ))) ||
3188
3205
(!singleOrZeroBucketedColumn && bucketedColumnsResult )) {
3189
3206
if (log .isDebugEnabled ()) {
3190
3207
final String [] dbgGroupKeyNames = {new String ("" )};
3191
3208
groupKeyNames .stream ().forEach (k -> dbgGroupKeyNames [0 ] = dbgGroupKeyNames [0 ].concat (k + " , " ));
3192
3209
final String [] dbgSortedColumnNames = {new String ("" )};
3193
- partitionAndSortedColumnNames .stream ().forEach (k -> dbgSortedColumnNames [0 ] = dbgSortedColumnNames [0 ].concat (k + " , " ));
3210
+ sortedColumnNames .stream ().forEach (k -> dbgSortedColumnNames [0 ] = dbgSortedColumnNames [0 ].concat (k + " , " ));
3194
3211
if ((null != bucketedColumns ) && (bucketedColumns .size () > 0 )) {
3195
3212
final String [] dbgbucketedColumns = {new String ("" )};
3196
3213
bucketedColumns .stream ().forEach (k -> dbgbucketedColumns [0 ] = dbgbucketedColumns [0 ].concat (k + " , " ));
3197
3214
log .debug ("Not matching sortedColumnNames: " + dbgSortedColumnNames + " group columns name: " + dbgGroupKeyNames + " bucketedColumns :" + dbgbucketedColumns );
3198
3215
}
3199
3216
log .debug ("Not matching sortedColumnNames: " + dbgSortedColumnNames + " group columns name: " + dbgGroupKeyNames );
3200
3217
}
3201
- return false ;
3218
+ return partialAndFinalAggregationType ;
3202
3219
}
3203
3220
}
3204
- return true ;
3221
+ partialAndFinalAggregationType .setSortAggregation (true );
3222
+ return partialAndFinalAggregationType ;
3205
3223
}
3206
- return false ;
3224
+ return partialAndFinalAggregationType ;
3207
3225
}
3208
3226
3209
3227
@ Override
0 commit comments