[llvm-branch-commits] [llvm] [OpenMPIRBuilder] Add support for distribute-parallel-for/do constructs (PR #127818)
Sergio Afonso via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Feb 21 02:20:19 PST 2025
https://github.com/skatrak updated https://github.com/llvm/llvm-project/pull/127818
>From ba9ea8c2cbe7848ca36c92e4c3ee464bcf0e6c39 Mon Sep 17 00:00:00 2001
From: Sergio Afonso <safonsof at amd.com>
Date: Tue, 18 Feb 2025 12:04:53 +0000
Subject: [PATCH] [OpenMPIRBuilder] Add support for distribute-parallel-for/do
constructs
This patch adds codegen for `kmpc_dist_for_static_init` runtime calls, used to
support worksharing a single loop across teams and threads. This can be used to
implement `distribute parallel for/do` support.
---
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 34 ++++++++++++++++++++---
1 file changed, 30 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 9e380bf2d3dbe..7788897fc0795 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -4130,6 +4130,23 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
}
+// Returns an LLVM function to call for initializing loop bounds using OpenMP
+// static scheduling for composite `distribute parallel for` depending on
+// `type`. Only i32 and i64 are supported by the runtime. Always interpret
+// integers as unsigned similarly to CanonicalLoopInfo.
+static FunctionCallee
+getKmpcDistForStaticInitForType(Type *Ty, Module &M,
+ OpenMPIRBuilder &OMPBuilder) {
+ unsigned Bitwidth = Ty->getIntegerBitWidth();
+ if (Bitwidth == 32)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
+ if (Bitwidth == 64)
+ return OMPBuilder.getOrCreateRuntimeFunction(
+ M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
+ llvm_unreachable("unknown OpenMP loop iterator bitwidth");
+}
+
// Returns an LLVM function to call for initializing loop bounds using OpenMP
// static scheduling depending on `type`. Only i32 and i64 are supported by the
// runtime. Always interpret integers as unsigned similarly to
@@ -4164,7 +4181,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
// Declare useful OpenMP runtime functions.
Value *IV = CLI->getIndVar();
Type *IVTy = IV->getType();
- FunctionCallee StaticInit = getKmpcForStaticInitForType(IVTy, M, *this);
+ FunctionCallee StaticInit =
+ LoopType == WorksharingLoopType::DistributeForStaticLoop
+ ? getKmpcDistForStaticInitForType(IVTy, M, *this)
+ : getKmpcForStaticInitForType(IVTy, M, *this);
FunctionCallee StaticFini =
getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
@@ -4200,9 +4220,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
// Call the "init" function and update the trip count of the loop with the
// value it produced.
- Builder.CreateCall(StaticInit,
- {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
- PUpperBound, PStride, One, Zero});
+ SmallVector<Value *, 10> Args(
+ {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
+ if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
+ Value *PDistUpperBound =
+ Builder.CreateAlloca(IVTy, nullptr, "p.distupperbound");
+ Args.push_back(PDistUpperBound);
+ }
+ Args.append({PStride, One, Zero});
+ Builder.CreateCall(StaticInit, Args);
Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
More information about the llvm-branch-commits
mailing list