[Openmp-commits] [clang] [openmp] [OpenMP 6.0 ]Codegen for Reduction over private variables with reduction clause (PR #134709)
Alexey Bataev via Openmp-commits
openmp-commits at lists.llvm.org
Mon May 12 07:38:42 PDT 2025
================
@@ -4898,6 +4898,266 @@ void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction &CGF,
}
}
+void CGOpenMPRuntime::emitPrivateReduction(
+ CodeGenFunction &CGF, SourceLocation Loc, const Expr *Privates,
+ const Expr *LHSExprs, const Expr *RHSExprs, const Expr *ReductionOps) {
+
+ // Create a shared global variable (__shared_reduction_var) to accumulate the
+ // final result.
+ //
+ // Call __kmpc_barrier to synchronize threads before initialization.
+ //
+ // The master thread (thread_id == 0) initializes __shared_reduction_var
+ // with the identity value or initializer.
+ //
+ // Call __kmpc_barrier to synchronize before combining.
+ // For each i:
+ // - Thread enters critical section.
+ // - Reads its private value from LHSExprs[i].
+ // - Updates __shared_reduction_var[i] = RedOp_i(__shared_reduction_var[i],
+ // LHSExprs[i]).
+ // - Exits critical section.
+ //
+ // Call __kmpc_barrier after combining.
+ //
+ // Each thread copies __shared_reduction_var[i] back to LHSExprs[i].
+ //
+ // Final __kmpc_barrier to synchronize after broadcasting
+ QualType PrivateType = Privates->getType();
+ llvm::Type *LLVMType = CGF.ConvertTypeForMem(PrivateType);
+
+ llvm::Constant *InitVal = nullptr;
+ const OMPDeclareReductionDecl *UDR = getReductionInit(ReductionOps);
+ // Determine the initial value for the shared reduction variable
+ if (!UDR) {
+ InitVal = llvm::Constant::getNullValue(LLVMType);
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
+ if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+ const Expr *InitExpr = VD->getInit();
+ if (InitExpr && !PrivateType->isAggregateType() &&
+ !PrivateType->isAnyComplexType()) {
+ Expr::EvalResult Result;
+ if (InitExpr->EvaluateAsRValue(Result, CGF.getContext())) {
+ APValue &InitValue = Result.Val;
+ if (InitValue.isInt())
+ InitVal = llvm::ConstantInt::get(LLVMType, InitValue.getInt());
+ }
+ }
+ }
+ }
+ } else {
+ InitVal = llvm::Constant::getNullValue(LLVMType);
+ }
+ std::string ReductionVarNameStr;
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates->IgnoreParenCasts())) {
+ ReductionVarNameStr = DRE->getDecl()->getNameAsString();
+ } else {
+ ReductionVarNameStr = "unnamed_priv_var";
+ }
+
+ // Create an internal shared variable
+ std::string SharedName =
+ CGM.getOpenMPRuntime().getName({"internal_pivate_", ReductionVarNameStr});
+ llvm::GlobalVariable *SharedVar = new llvm::GlobalVariable(
+ CGM.getModule(), LLVMType, false, llvm::GlobalValue::InternalLinkage,
+ InitVal, ".omp.reduction." + SharedName, nullptr,
+ llvm::GlobalVariable::NotThreadLocal);
+
+ SharedVar->setAlignment(
+ llvm::MaybeAlign(CGF.getContext().getTypeAlign(PrivateType) / 8));
+
+ Address SharedResult(SharedVar, SharedVar->getValueType(),
+ CGF.getContext().getTypeAlignInChars(PrivateType));
+
+ llvm::Value *ThreadId = getThreadID(CGF, Loc);
+ llvm::Value *BarrierLoc = emitUpdateLocation(CGF, Loc, OMP_ATOMIC_REDUCE);
+ llvm::Value *BarrierArgs[] = {BarrierLoc, ThreadId};
+
+ llvm::BasicBlock *InitBB = CGF.createBasicBlock("init");
+ llvm::BasicBlock *InitEndBB = CGF.createBasicBlock("init.end");
+
+ llvm::Value *IsWorker = CGF.Builder.CreateICmpEQ(
+ ThreadId, llvm::ConstantInt::get(ThreadId->getType(), 0));
+ CGF.Builder.CreateCondBr(IsWorker, InitBB, InitEndBB);
+
+ CGF.EmitBlock(InitBB);
+
+ auto EmitSharedInit = [&]() {
+ if (UDR) { // Check if it's a User-Defined Reduction
+ if (const Expr *UDRInitExpr = UDR->getInitializer()) {
+ std::pair<llvm::Function *, llvm::Function *> FnPair =
+ getUserDefinedReduction(UDR);
+ llvm::Function *InitializerFn = FnPair.second;
+ if (InitializerFn) {
+ if (const auto *CE =
+ dyn_cast<CallExpr>(UDRInitExpr->IgnoreParenImpCasts())) {
+ const auto *OutDRE = cast<DeclRefExpr>(
+ cast<UnaryOperator>(CE->getArg(0)->IgnoreParenImpCasts())
+ ->getSubExpr());
+ const VarDecl *OutVD = cast<VarDecl>(OutDRE->getDecl());
+
+ CodeGenFunction::OMPPrivateScope LocalScope(CGF);
+ LocalScope.addPrivate(OutVD, SharedResult);
+
+ (void)LocalScope.Privatize();
+ if (const auto *OVE = dyn_cast<OpaqueValueExpr>(
+ CE->getCallee()->IgnoreParenImpCasts())) {
+ CodeGenFunction::OpaqueValueMapping OpaqueMap(
+ CGF, OVE, RValue::get(InitializerFn));
+ CGF.EmitIgnoredExpr(CE);
+ } else {
+ CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
+ PrivateType.getQualifiers(), true);
+ }
+ } else {
+ CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
+ PrivateType.getQualifiers(), true);
+ }
+ } else {
+ CGF.EmitAnyExprToMem(UDRInitExpr, SharedResult,
+ PrivateType.getQualifiers(), true);
+ }
+ } else {
+ // EmitNullInitialization handles default construction for C++ classes
+ // and zeroing for scalars, which is a reasonable default.
+ CGF.EmitNullInitialization(SharedResult, PrivateType);
+ }
+ return; // UDR initialization handled
+ }
+ if (const auto *DRE = dyn_cast<DeclRefExpr>(Privates)) {
+ if (const auto *VD = dyn_cast<VarDecl>(DRE->getDecl())) {
+ const Expr *InitExpr = VD->getInit();
+ if (InitExpr && (PrivateType->isAggregateType() ||
+ PrivateType->isAnyComplexType())) {
----------------
alexey-bataev wrote:
All types should be supported!
https://github.com/llvm/llvm-project/pull/134709
More information about the Openmp-commits
mailing list