Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Flang][MLIR][OpenMP] - Add support for firstprivate when translating omp.target ops from MLIR to LLVMIR #131213

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 5 additions & 4 deletions flang/lib/Lower/OpenMP/OpenMP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1694,13 +1694,14 @@ static void genTargetClauses(
cp.processNowait(clauseOps);
cp.processThreadLimit(stmtCtx, clauseOps);

cp.processTODO<clause::Allocate, clause::Defaultmap, clause::Firstprivate,
clause::InReduction, clause::UsesAllocators>(
loc, llvm::omp::Directive::OMPD_target);
cp.processTODO<clause::Allocate, clause::Defaultmap, clause::InReduction,
clause::UsesAllocators>(loc,
llvm::omp::Directive::OMPD_target);

// `target private(..)` is only supported in delayed privatization mode.
if (!enableDelayedPrivatizationStaging)
cp.processTODO<clause::Private>(loc, llvm::omp::Directive::OMPD_target);
cp.processTODO<clause::Firstprivate, clause::Private>(
loc, llvm::omp::Directive::OMPD_target);
}

static void genTargetDataClauses(
Expand Down
68 changes: 62 additions & 6 deletions flang/lib/Optimizer/OpenMP/MapsForPrivatizedSymbols.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,14 @@
#include <type_traits>

#define DEBUG_TYPE "omp-maps-for-privatized-symbols"

#define PDBGS() (llvm::dbgs() << "[" << DEBUG_TYPE << "]: ")
namespace flangomp {
#define GEN_PASS_DEF_MAPSFORPRIVATIZEDSYMBOLSPASS
#include "flang/Optimizer/OpenMP/Passes.h.inc"
} // namespace flangomp

using namespace mlir;

namespace {
class MapsForPrivatizedSymbolsPass
: public flangomp::impl::MapsForPrivatizedSymbolsPassBase<
Expand All @@ -60,14 +62,14 @@ class MapsForPrivatizedSymbolsPass
// We want the first result of the hlfir.declare op because our goal
// is to map the descriptor (fir.box or fir.boxchar) and the first
// result for hlfir.declare is the descriptor if a the symbol being
// decalred needs a descriptor.
// declared needs a descriptor.
// Some types are boxed immediately before privatization. These have other
// operations in between the privatization and the declaration. It is safe
// to use var directly here because they will be boxed anyway.
if (auto declOp = llvm::dyn_cast_if_present<hlfir::DeclareOp>(definingOp))
varPtr = declOp.getBase();

// If we do not have a reference to descritor, but the descriptor itself
// If we do not have a reference to a descriptor but the descriptor itself,
// then we need to store that on the stack so that we can map the
// address of the descriptor.
if (mlir::isa<fir::BaseBoxType>(varPtr.getType()) ||
Expand All @@ -81,6 +83,15 @@ class MapsForPrivatizedSymbolsPass
builder.create<fir::StoreOp>(loc, varPtr, alloca);
varPtr = alloca;
}
assert(mlir::isa<omp::PointerLikeType>(varPtr.getType()) &&
"Dealing with a varPtr that is not a PointerLikeType");

// Figure out the bounds because knowing the bounds will help the subsequent
// MapInfoFinalizationPass map the underlying data of the descriptor.
llvm::SmallVector<mlir::Value> boundsOps;
if (needsBoundsOps(varPtr))
genBoundsOps(builder, varPtr, boundsOps);

return builder.create<omp::MapInfoOp>(
loc, varPtr.getType(), varPtr,
TypeAttr::get(llvm::cast<omp::PointerLikeType>(varPtr.getType())
Expand All @@ -92,7 +103,7 @@ class MapsForPrivatizedSymbolsPass
/*varPtrPtr=*/Value{},
/*members=*/SmallVector<Value>{},
/*member_index=*/mlir::ArrayAttr{},
/*bounds=*/ValueRange{},
/*bounds=*/boundsOps.empty() ? SmallVector<Value>{} : boundsOps,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is unnecessary. An empty boundsOps is already an empty SmallVector<Value>.

/*mapperId=*/mlir::FlatSymbolRefAttr(), /*name=*/StringAttr(),
builder.getBoolAttr(false));
}
Expand Down Expand Up @@ -143,8 +154,8 @@ class MapsForPrivatizedSymbolsPass
omp::MapInfoOp mapInfoOp = createMapInfo(loc, privVar, builder);
mapInfoOps.push_back(mapInfoOp);

LLVM_DEBUG(llvm::dbgs() << "MapsForPrivatizedSymbolsPass created ->\n");
LLVM_DEBUG(mapInfoOp.dump());
LLVM_DEBUG(PDBGS() << "MapsForPrivatizedSymbolsPass created ->\n"
<< mapInfoOp << "\n");
}
if (!mapInfoOps.empty()) {
mapInfoOpsForTarget.insert({targetOp.getOperation(), mapInfoOps});
Expand All @@ -158,5 +169,50 @@ class MapsForPrivatizedSymbolsPass
}
}
}
// As the name suggests, this function examines var to determine if
// it has dynamic size. If true, this pass'll have to extract these
// bounds from descriptor of var and add the bounds to the resultant
// MapInfoOp.
bool needsBoundsOps(mlir::Value var) {
assert(mlir::isa<omp::PointerLikeType>(var.getType()) &&
"needsBoundsOps can deal only with pointer types");
mlir::Type t = fir::unwrapRefType(var.getType());
// t could be a box, so look inside the box
auto innerType = fir::dyn_cast_ptrOrBoxEleTy(t);
if (innerType)
return fir::hasDynamicSize(innerType);
return fir::hasDynamicSize(t);
}
void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
Comment on lines +185 to +186
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ultra-nit

Suggested change
}
void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,
}
void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value var,

llvm::SmallVector<mlir::Value> &boundsOps) {
if (!fir::isBoxAddress(var.getType()))
return;

unsigned int rank = 0;
rank = fir::getBoxRank(fir::unwrapRefType(var.getType()));
mlir::Location loc = var.getLoc();
mlir::Type idxTy = builder.getIndexType();
mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
mlir::Type boundTy = builder.getType<omp::MapBoundsType>();
mlir::Value box = builder.create<fir::LoadOp>(loc, var);
for (unsigned int i = 0; i < rank; ++i) {
mlir::Value dimNo = builder.createIntegerConstant(loc, idxTy, i);
auto dimInfo =
builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dimNo);
auto normalizedLB = builder.create<mlir::arith::ConstantOp>(
loc, idxTy, builder.getIntegerAttr(idxTy, 0));
Comment on lines +202 to +203
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this is constant across iterations so you could move it outside of the loop and generate fewer constants like you did with one.

mlir::Value lb = dimInfo.getLowerBound();
mlir::Value extent = dimInfo.getExtent();
mlir::Value byteStride = dimInfo.getByteStride();
mlir::Value ub = builder.create<mlir::arith::SubIOp>(loc, extent, one);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you subtract 1 here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the extent is the number of elements. So, the upper_bound is (extent -1)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Doesn't that depend upon what the lower bound is?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for the late reply. You are right, I should add the lower_bound to it.
ub = lower_bound + extent - 1


mlir::Value boundsOp = builder.create<omp::MapBoundsOp>(
loc, boundTy, /*lower_bound=*/normalizedLB,
/*upper_bound=*/ub, /*extent=*/extent, /*stride=*/byteStride,
/*stride_in_bytes = */ true, /*start_idx=*/lb);
LLVM_DEBUG(PDBGS() << "Created BoundsOp " << boundsOp << "\n");
boundsOps.push_back(boundsOp);
}
}
};
} // namespace
8 changes: 7 additions & 1 deletion flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,14 @@ void createOpenMPFIRPassPipeline(mlir::PassManager &pm,
pm.addPass(flangomp::createDoConcurrentConversionPass(
opts.doConcurrentMappingKind == DoConcurrentMappingKind::DCMK_Device));

pm.addPass(flangomp::createMapInfoFinalizationPass());
// The MapsForPrivatizedSymbols pass needs to run before
// MapInfoFinalizationPass because the former creates new
// MapInfoOp instances, typically for descriptors.
// MapInfoFinalizationPass adds MapInfoOp instances for the descriptors
// underlying data which is necessary to access the data on the offload
// target device.
pm.addPass(flangomp::createMapsForPrivatizedSymbolsPass());
pm.addPass(flangomp::createMapInfoFinalizationPass());
pm.addPass(flangomp::createMarkDeclareTargetPass());
pm.addPass(flangomp::createGenericLoopConversionPass());
if (opts.isTargetDevice)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ end subroutine target_allocatable
! CHECK: %[[VAR_ALLOC:.*]] = fir.alloca [[DESC_TYPE]]
! CHECK-SAME: {bindc_name = "alloc_var", {{.*}}}
! CHECK: %[[VAR_DECL:.*]]:2 = hlfir.declare %[[VAR_ALLOC]]
! CHECK: %[[BASE_ADDR:.*]] = fir.box_offset %[[VAR_DECL]]#0 base_addr : (!fir.ref<!fir.box<!fir.heap<i32>>>) -> [[MEMBER_TYPE:.*]]
! CHECK: %[[MEMBER:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], i32) map_clauses(to) capture(ByRef) var_ptr_ptr(%[[BASE_ADDR]] : [[MEMBER_TYPE:.*]]) -> {{.*}}
! CHECK: %[[MAP_VAR:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], [[DESC_TYPE]]) map_clauses(to) capture(ByRef) members(%[[MEMBER]] : [0] : !fir.llvm_ptr<!fir.ref<i32>>) -> !fir.ref<!fir.box<!fir.heap<i32>>>

! CHECK: %[[MAP_VAR:.*]] = omp.map.info var_ptr(%[[VAR_DECL]]#0 : [[TYPE]], [[DESC_TYPE]])
! CHECK-SAME: map_clauses(to) capture(ByRef) -> [[TYPE]]
! CHECK: omp.target map_entries(%[[MAP_VAR]] -> %arg0 : [[TYPE]]) private(
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} : [[TYPE]]) {
! CHECK: omp.target map_entries(%[[MAP_VAR]] -> %arg0, %[[MEMBER]] -> %arg1 : [[TYPE]], [[MEMBER_TYPE]]) private(
! CHECK-SAME: @[[VAR_PRIVATIZER_SYM]] %[[VAR_DECL]]#0 -> %{{.*}} [map_idx=0] : [[TYPE]]) {
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,19 @@ end subroutine target_allocatable
! CHECK: %[[REAL_ARR_DECL:.*]]:2 = hlfir.declare %[[REAL_ARR_ALLOC]]({{.*}})
! CHECK: fir.store %[[REAL_ARR_DECL]]#0 to %[[REAL_ARR_DESC_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
! CHECK: %[[MAPPED_MI0:.*]] = omp.map.info var_ptr(%[[MAPPED_DECL]]#1 : !fir.ref<i32>, i32) {{.*}}
! CHECK: %[[ALLOC_VAR_MAP:.*]] = omp.map.info var_ptr(%[[ALLOC_VAR_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>)
! CHECK: %[[REAL_ARR_DESC_MAP:.*]] = omp.map.info var_ptr(%[[REAL_ARR_DESC_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>)
! CHECK: %[[ALLOC_VAR_MEMBER:.*]] = omp.map.info var_ptr(%[[ALLOC_VAR_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, i32)
! CHECK: %[[ALLOC_VAR_MAP:.*]] = omp.map.info var_ptr(%[[ALLOC_VAR_DECL]]#0 : !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.box<!fir.heap<i32>>) {{.*}} members(%[[ALLOC_VAR_MEMBER]] :
! CHECK: %[[REAL_ARR_MEMBER:.*]] = omp.map.info var_ptr(%[[REAL_ARR_DESC_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, f32)
! CHECK: %[[REAL_ARR_DESC_MAP:.*]] = omp.map.info var_ptr(%[[REAL_ARR_DESC_ALLOCA]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>) {{.*}} members(%[[REAL_ARR_MEMBER]] :
! CHECK: fir.store %[[CHAR_VAR_DECL]]#0 to %[[CHAR_VAR_DESC_ALLOCA]] : !fir.ref<!fir.boxchar<1>>
! CHECK: %[[CHAR_VAR_DESC_MAP:.*]] = omp.map.info var_ptr(%[[CHAR_VAR_DESC_ALLOCA]] : !fir.ref<!fir.boxchar<1>>, !fir.boxchar<1>)
! CHECK: omp.target
! CHECK-SAME: map_entries(
! CHECK-SAME: %[[MAPPED_MI0]] -> %[[MAPPED_ARG0:[^,]+]],
! CHECK-SAME: %[[ALLOC_VAR_MAP]] -> %[[MAPPED_ARG1:[^,]+]]
! CHECK-SAME: %[[REAL_ARR_DESC_MAP]] -> %[[MAPPED_ARG2:[^,]+]]
! CHECK-SAME: %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]] :
! CHECK-SAME: !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.ref<!fir.boxchar<1>>)
! CHECK-SAME: %[[CHAR_VAR_DESC_MAP]] -> %[[MAPPED_ARG3:.[^,]+]]
! CHECK-SAME: !fir.ref<i32>, !fir.ref<!fir.box<!fir.heap<i32>>>, !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.ref<!fir.boxchar<1>>, !fir.llvm_ptr<!fir.ref<i32>>, !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>
! CHECK-SAME: private(
! CHECK-SAME: @[[ALLOC_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[ALLOC_ARG:[^,]+]] [map_idx=1],
! CHECK-SAME: @[[REAL_PRIVATIZER_SYM]] %{{[^[:space:]]+}}#0 -> %[[REAL_ARG:[^,]+]],
Expand Down
2 changes: 1 addition & 1 deletion flang/test/Lower/OpenMP/Todo/firstprivate-target.f90
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

integer :: i
! CHECK: not yet implemented: Unhandled clause FIRSTPRIVATE in TARGET construct
!$omp target firstprivate(i)
!$omp target firstprivate(i) nowait
!$omp end target

end program
15 changes: 8 additions & 7 deletions mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>

There are no restrictions on the body except for:
- The `dealloc` regions has a single argument.
- The `init & `copy` regions have 2 arguments.
- The `init` & `copy` regions have 2 arguments.
- All three regions are terminated by `omp.yield` ops.
The above restrictions and other obvious restrictions (e.g. verifying the
type of yielded values) are verified by the custom op verifier. The actual
Expand All @@ -90,15 +90,15 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
Instances of this op would then be used by ops that model directives that
accept data-sharing attribute clauses.

The $sym_name attribute provides a symbol by which the privatizer op can be
The `sym_name` attribute provides a symbol by which the privatizer op can be
referenced by other dialect ops.

The $type attribute is the type of the value being privatized. This type
The `type` attribute is the type of the value being privatized. This type
will be implicitly allocated in MLIR->LLVMIR conversion and passed as the
second argument to the init region. Therefore the type of arguments to
the regions should be a type which represents a pointer to $type.
the regions should be a type which represents a pointer to `type`.

The $data_sharing_type attribute specifies whether privatizer corresponds
The `data_sharing_type` attribute specifies whether privatizer corresponds
to a `private` or a `firstprivate` clause.
}];

Expand Down Expand Up @@ -161,9 +161,10 @@ def PrivateClauseOp : OpenMP_Op<"private", [IsolatedFromAbove, RecipeInterface]>
/// needsMap returns true if the value being privatized should additionally
/// be mapped to the target region using a MapInfoOp. This is most common
/// when an allocatable is privatized. In such cases, the descriptor is used
/// in privatization and needs to be mapped on to the device.
/// in privatization and needs to be mapped on to the device. The use of
/// firstprivate also creates the need to map the host variable to the device.
bool needsMap() {
return initReadsFromMold();
return readsFromMold();
}

/// Get the type for arguments to nested regions. This should
Expand Down
38 changes: 18 additions & 20 deletions mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -208,19 +208,9 @@ static LogicalResult checkImplementationStatus(Operation &op) {
};
auto checkPrivate = [&todo](auto op, LogicalResult &result) {
if constexpr (std::is_same_v<std::decay_t<decltype(op)>, omp::TargetOp>) {
// Privatization clauses are supported, except on some situations, so we
// need to check here whether any of these unsupported cases are being
// translated.
if (std::optional<ArrayAttr> privateSyms = op.getPrivateSyms()) {
for (Attribute privatizerNameAttr : *privateSyms) {
omp::PrivateClauseOp privatizer = findPrivatizer(
op.getOperation(), cast<SymbolRefAttr>(privatizerNameAttr));

if (privatizer.getDataSharingType() ==
omp::DataSharingClauseType::FirstPrivate)
result = todo("firstprivate");
}
}
// Privatization is supported only for included target tasks.
if (!op.getPrivateVars().empty() && op.getNowait())
result = todo("privatization for deferred target tasks");
} else {
if (!op.getPrivateVars().empty() || op.getPrivateSyms())
result = todo("privatization");
Expand Down Expand Up @@ -1465,6 +1455,7 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
assert(allocaTerminator->getNumSuccessors() == 1 &&
"This is an unconditional branch created by splitBB");

llvm::DataLayout dataLayout = builder.GetInsertBlock()->getDataLayout();
llvm::BasicBlock *afterAllocas = allocaTerminator->getSuccessor(0);

unsigned int allocaAS =
Expand All @@ -1491,12 +1482,12 @@ allocatePrivateVars(llvm::IRBuilderBase &builder,
return afterAllocas;
}

static LogicalResult
copyFirstPrivateVars(llvm::IRBuilderBase &builder,
LLVM::ModuleTranslation &moduleTranslation,
SmallVectorImpl<mlir::Value> &mlirPrivateVars,
ArrayRef<llvm::Value *> llvmPrivateVars,
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls) {
static LogicalResult copyFirstPrivateVars(
llvm::IRBuilderBase &builder, LLVM::ModuleTranslation &moduleTranslation,
SmallVectorImpl<mlir::Value> &mlirPrivateVars,
ArrayRef<llvm::Value *> llvmPrivateVars,
SmallVectorImpl<omp::PrivateClauseOp> &privateDecls,
llvm::DenseMap<Value, Value> *mappedPrivateVars = nullptr) {
// Apply copy region for firstprivate.
bool needsFirstprivate =
llvm::any_of(privateDecls, [](omp::PrivateClauseOp &privOp) {
Expand All @@ -1520,7 +1511,8 @@ copyFirstPrivateVars(llvm::IRBuilderBase &builder,
Region &copyRegion = decl.getCopyRegion();

// map copyRegion rhs arg
llvm::Value *nonPrivateVar = moduleTranslation.lookupValue(mlirVar);
llvm::Value *nonPrivateVar = findAssociatedValue(
mlirVar, builder, moduleTranslation, mappedPrivateVars);
assert(nonPrivateVar);
moduleTranslation.mapValue(decl.getCopyMoldArg(), nonPrivateVar);

Expand Down Expand Up @@ -4860,6 +4852,12 @@ convertOmpTarget(Operation &opInst, llvm::IRBuilderBase &builder,
.failed())
return llvm::make_error<PreviouslyReportedError>();

if (failed(copyFirstPrivateVars(
builder, moduleTranslation, privateVarsInfo.mlirVars,
privateVarsInfo.llvmVars, privateVarsInfo.privatizers,
&mappedPrivateVars)))
return llvm::make_error<PreviouslyReportedError>();

SmallVector<Region *> privateCleanupRegions;
llvm::transform(privateVarsInfo.privatizers,
std::back_inserter(privateCleanupRegions),
Expand Down
Loading