Skip to content

Commit a75b4ac

Browse files
alihassanijrAli Hassani
and
Ali Hassani
authored
Fix Stream-K reduce bug in epilogue with broadcast (#1224)
Co-authored-by: Ali Hassani <[email protected]>
1 parent e9e30c2 commit a75b4ac

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

include/cutlass/epilogue/threadblock/epilogue_with_broadcast.h

+4-4
Original file line numberDiff line numberDiff line change
@@ -946,13 +946,13 @@ class EpilogueWithBroadcast<
946946
//
947947

948948
if (OutputOp::kStoreZ) {
949+
destination_iterator += reduce_fragment_idx;
949950
destination_iterator.store(frag_Z);
950-
++destination_iterator;
951951
}
952952

953953
if (OutputOp::kStoreT) {
954+
tensor_iterator += reduce_fragment_idx;
954955
tensor_iterator.store(frag_T);
955-
++tensor_iterator;
956956
}
957957
}
958958
};
@@ -1698,13 +1698,13 @@ class EpilogueWithBroadcast<
16981698
//
16991699

17001700
if (OutputOp::kStoreZ) {
1701+
destination_iterator += reduce_fragment_idx;
17011702
destination_iterator.store(frag_Z);
1702-
++destination_iterator;
17031703
}
17041704

17051705
if (OutputOp::kStoreT) {
1706+
tensor_iterator += reduce_fragment_idx;
17061707
tensor_iterator.store(frag_T);
1707-
++tensor_iterator;
17081708
}
17091709
}
17101710
};

0 commit comments

Comments
 (0)