47
47
import io .prestosql .plugin .hive .statistics .HiveStatisticsProvider ;
48
48
import io .prestosql .plugin .hive .util .ConfigurationUtils ;
49
49
import io .prestosql .plugin .hive .util .Statistics ;
50
+ import io .prestosql .spi .PartialAndFinalAggregationType ;
50
51
import io .prestosql .spi .PrestoException ;
51
52
import io .prestosql .spi .block .Block ;
52
53
import io .prestosql .spi .connector .ColumnHandle ;
@@ -3129,36 +3130,44 @@ public List<ConnectorVacuumTableInfo> getTablesForVacuum()
3129
3130
}
3130
3131
3131
3132
@ Override
3132
- public boolean canPerformSortBasedAggregation (ConnectorSession session , ConnectorTableHandle tableHandle , List <String > groupKeyNames )
3133
+ public PartialAndFinalAggregationType validateAndGetSortAggregationType (ConnectorSession session , ConnectorTableHandle tableHandle , List <String > groupKeyNames )
3133
3134
{
3135
+ PartialAndFinalAggregationType partialAndFinalAggregationType = new PartialAndFinalAggregationType ();
3134
3136
ConnectorTableMetadata connectorTableMetadata = getTableMetadata (session , ((HiveTableHandle ) tableHandle ).getSchemaTableName ());
3135
3137
List <SortingColumn > sortingColumn = (List <SortingColumn >) connectorTableMetadata .getProperties ().get (HiveTableProperties .SORTED_BY_PROPERTY );
3136
- if ((sortingColumn == null ) || (sortingColumn .size () == 0 )) {
3137
- return false ;
3138
- }
3138
+ boolean isSortingColumnsNotPresent = (sortingColumn == null ) || (sortingColumn .size () == 0 );
3139
+
3139
3140
List <String > partitionedBy = new ArrayList <>();
3140
- List <String > partitionedBytemp = (List <String >) connectorTableMetadata .getProperties ().get (HiveTableProperties .PARTITIONED_BY_PROPERTY );
3141
- if (null != partitionedBytemp ) {
3142
- partitionedBy .addAll (partitionedBytemp );
3141
+ List <String > partitionedByTemp = (List <String >) connectorTableMetadata .getProperties ().get (HiveTableProperties .PARTITIONED_BY_PROPERTY );
3142
+ if ((partitionedByTemp != null ) && (partitionedByTemp .size () != 0 )) {
3143
+ partitionedBy .addAll (partitionedByTemp );
3144
+ if (isSortingColumnsNotPresent && (partitionedByTemp .size () != groupKeyNames .size ())) {
3145
+ return partialAndFinalAggregationType ;
3146
+ }
3147
+ }
3148
+ else if (isSortingColumnsNotPresent ) {
3149
+ return partialAndFinalAggregationType ;
3143
3150
}
3144
3151
int bucketCount = 0 ;
3145
- List <String > bucketedColumns = (List <String >) connectorTableMetadata .getProperties ().get (HiveTableProperties .BUCKETED_BY_PROPERTY );
3146
- if (null != bucketedColumns ) {
3147
- bucketCount = (int ) connectorTableMetadata .getProperties ().get (HiveTableProperties .BUCKET_COUNT_PROPERTY );
3152
+ List <String > bucketedColumns = new ArrayList <>();
3153
+ if (!isSortingColumnsNotPresent ) {
3154
+ bucketedColumns .addAll ((List <String >) connectorTableMetadata .getProperties ().get (HiveTableProperties .BUCKETED_BY_PROPERTY ));
3155
+ if (null != bucketedColumns ) {
3156
+ bucketCount = (int ) connectorTableMetadata .getProperties ().get (HiveTableProperties .BUCKET_COUNT_PROPERTY );
3157
+ }
3148
3158
}
3149
3159
3150
- List <String > sortedColumnNames = sortingColumn .stream ().map (column -> column .getColumnName ()).collect (Collectors .toList ());
3151
- List <String > partitionAndSortedColumnNames = new ArrayList <>();
3152
- // when there is partition by , sorted data is arranged 'partition by' followed by 'sorted by'
3153
- partitionAndSortedColumnNames .addAll (partitionedBy );
3154
- partitionAndSortedColumnNames .addAll (sortedColumnNames );
3160
+ List <String > sortedColumnNames = new ArrayList <>();
3161
+ if ((sortingColumn != null ) && (sortingColumn .size () != 0 )) {
3162
+ sortedColumnNames .addAll (sortingColumn .stream ().map (column -> column .getColumnName ()).collect (Collectors .toList ()));
3163
+ }
3155
3164
3156
3165
//grouping key should be sub set of sorted By and it should match all partition by columns
3157
- if ((sortedColumnNames .size () + partitionedBy .size () < groupKeyNames .size ()) ||
3166
+ if ((partitionedBy .size () + sortedColumnNames .size () < groupKeyNames .size ()) ||
3158
3167
(partitionedBy .size () > groupKeyNames .size ())) {
3159
3168
//sorted columns are less than join criteria columns
3160
3169
log .debug ("number of sorted columns " + sortedColumnNames .size () + "are less join column size " + groupKeyNames .size ());
3161
- return false ;
3170
+ return partialAndFinalAggregationType ;
3162
3171
}
3163
3172
3164
3173
// bucketby columns and groupby Columns should be same.
@@ -3169,43 +3178,52 @@ public boolean canPerformSortBasedAggregation(ConnectorSession session, Connecto
3169
3178
(groupKeyNames .get (partitionedByCount ).equals (bucketedColumns .get (0 )))) || (bucketCount == 0 ));
3170
3179
3171
3180
if ((bucketCount == 1 ) && (bucketedColumns .size () > 1 )) {
3172
- boolean notMatching = false ;
3173
3181
int minSize = Math .min (groupKeyNames .size () - partitionedBy .size (), bucketedColumns .size ());
3174
3182
int partSize = partitionedBy .size ();
3175
3183
for (int keyIdx = 0 ; keyIdx < minSize ; keyIdx ++) {
3176
3184
if (!groupKeyNames .get (keyIdx + partSize ).equals (bucketedColumns .get (keyIdx ))) {
3177
- notMatching = true ;
3178
- break ;
3185
+ return partialAndFinalAggregationType ;
3179
3186
}
3180
3187
}
3181
- if (!notMatching ) {
3182
- singleOrZeroBucketedColumn = true ;
3188
+ singleOrZeroBucketedColumn = true ;
3189
+ }
3190
+
3191
+ for (int numOfComparedKeys = 0 ; numOfComparedKeys < partitionedBy .size (); numOfComparedKeys ++) {
3192
+ if ((!groupKeyNames .get (numOfComparedKeys ).equals (partitionedBy .get (numOfComparedKeys )))) {
3193
+ return partialAndFinalAggregationType ;
3183
3194
}
3184
3195
}
3196
+
3197
+ if (groupKeyNames .size () == partitionedBy .size ()) {
3198
+ partialAndFinalAggregationType .setPartialAsSortAndFinalAsHashAggregation (true );
3199
+ return partialAndFinalAggregationType ;
3200
+ }
3201
+
3185
3202
if (singleOrZeroBucketedColumn || (groupKeyNames .size () == (bucketedColumns .size () + partitionedBy .size ()))) {
3186
- for ( int numOfComparedKeys = 0 ; numOfComparedKeys < groupKeyNames .size (); numOfComparedKeys ++) {
3187
- boolean bucketedColumnsResult = numOfComparedKeys < partitionedBy .size () ? false : ! singleOrZeroBucketedColumn &&
3188
- (!groupKeyNames .get (numOfComparedKeys ).equals (bucketedColumns .get (numOfComparedKeys - partitionedBy . size () )));
3189
- if ((!groupKeyNames .get (numOfComparedKeys ).equals (partitionAndSortedColumnNames .get (numOfComparedKeys ))) ||
3203
+ int numOfCmpKeysAfterPartitionedBy = partitionedBy .size ();
3204
+ for ( int numOfComparedKeys = 0 ; numOfComparedKeys < groupKeyNames .size () - partitionedBy . size (); numOfComparedKeys ++, numOfCmpKeysAfterPartitionedBy ++) {
3205
+ boolean bucketedColumnsResult = ! singleOrZeroBucketedColumn && (!groupKeyNames .get (numOfComparedKeys ).equals (bucketedColumns .get (numOfComparedKeys )));
3206
+ if ((!groupKeyNames .get (numOfCmpKeysAfterPartitionedBy ).equals (sortedColumnNames .get (numOfComparedKeys ))) ||
3190
3207
(!singleOrZeroBucketedColumn && bucketedColumnsResult )) {
3191
3208
if (log .isDebugEnabled ()) {
3192
3209
final String [] dbgGroupKeyNames = {new String ("" )};
3193
3210
groupKeyNames .stream ().forEach (k -> dbgGroupKeyNames [0 ] = dbgGroupKeyNames [0 ].concat (k + " , " ));
3194
3211
final String [] dbgSortedColumnNames = {new String ("" )};
3195
- partitionAndSortedColumnNames .stream ().forEach (k -> dbgSortedColumnNames [0 ] = dbgSortedColumnNames [0 ].concat (k + " , " ));
3212
+ sortedColumnNames .stream ().forEach (k -> dbgSortedColumnNames [0 ] = dbgSortedColumnNames [0 ].concat (k + " , " ));
3196
3213
if ((null != bucketedColumns ) && (bucketedColumns .size () > 0 )) {
3197
3214
final String [] dbgbucketedColumns = {new String ("" )};
3198
3215
bucketedColumns .stream ().forEach (k -> dbgbucketedColumns [0 ] = dbgbucketedColumns [0 ].concat (k + " , " ));
3199
3216
log .debug ("Not matching sortedColumnNames: " + dbgSortedColumnNames + " group columns name: " + dbgGroupKeyNames + " bucketedColumns :" + dbgbucketedColumns );
3200
3217
}
3201
3218
log .debug ("Not matching sortedColumnNames: " + dbgSortedColumnNames + " group columns name: " + dbgGroupKeyNames );
3202
3219
}
3203
- return false ;
3220
+ return partialAndFinalAggregationType ;
3204
3221
}
3205
3222
}
3206
- return true ;
3223
+ partialAndFinalAggregationType .setSortAggregation (true );
3224
+ return partialAndFinalAggregationType ;
3207
3225
}
3208
- return false ;
3226
+ return partialAndFinalAggregationType ;
3209
3227
}
3210
3228
3211
3229
@ Override
0 commit comments