import pyspark.sql.functions as F df = df.withColumn("salt", F.round(100 * F.rand()))\ .groupby(['dim1', 'dim2', 'salt']) \ .agg({'dim_to_sum': 'sum'})\ .drop('salt')