[clang] [OpenMP] Pass min/max thread and team count to the OMPIRBuilder (PR #70247)
Joseph Huber via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 25 14:47:19 PDT 2023
================
@@ -4227,8 +4227,106 @@ static const omp::GV &getGridValue(Function *Kernel) {
llvm_unreachable("No grid value available for this architecture!");
}
+static MDNode *getNVPTXMDNode(Function &Kernel, StringRef Name) {
+ Module &M = *Kernel.getParent();
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+ for (auto *Op : MD->operands()) {
+ if (Op->getNumOperands() != 3)
+ continue;
+ auto *KernelOp = dyn_cast<ConstantAsMetadata>(Op->getOperand(0));
+ if (!KernelOp || KernelOp->getValue() != &Kernel)
+ continue;
+ auto *Prop = dyn_cast<MDString>(Op->getOperand(1));
+ if (!Prop || Prop->getString() != Name)
+ continue;
+ return Op;
+ }
+ return nullptr;
+}
+
+static void updateNVPTXMetadata(Function &Kernel, StringRef Name, int32_t Value,
+ bool Min) {
+ // Update the "maxntidx" metadata for NVIDIA, or add it.
+ MDNode *ExistingOp = getNVPTXMDNode(Kernel, Name);
+ if (ExistingOp) {
+ auto *OldVal = dyn_cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
+ int32_t OldLimit = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
+ ExistingOp->replaceOperandWith(
+ 2, ConstantAsMetadata::get(ConstantInt::get(
+ OldVal->getValue()->getType(),
+ Min ? std::min(OldLimit, Value) : std::max(OldLimit, Value))));
+ } else {
+ LLVMContext &Ctx = Kernel.getContext();
+ Metadata *MDVals[] = {ConstantAsMetadata::get(&Kernel),
+ MDString::get(Ctx, Name),
+ ConstantAsMetadata::get(
+ ConstantInt::get(Type::getInt32Ty(Ctx), Value))};
+ // Append metadata to nvvm.annotations
+ Module &M = *Kernel.getParent();
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+ MD->addOperand(MDNode::get(Ctx, MDVals));
+ }
+}
+
+std::pair<int32_t, int32_t>
+OpenMPIRBuilder::readThreadBoundsForKernel(Function &Kernel) {
+ int32_t ThreadLimit =
+ Kernel.getFnAttributeAsParsedInteger("omp_target_thread_limit");
+
+ bool IsAMDGPU = Kernel.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+ if (IsAMDGPU) {
+ const auto &Attr = Kernel.getFnAttribute("amdgpu-flat-work-group-size");
+ if (!Attr.isValid() || !Attr.isStringAttribute())
+ return {0, ThreadLimit};
+ auto [LBStr, UBStr] = Attr.getValueAsString().split(',');
+ int32_t LB, UB;
+ if (!llvm::to_integer(UBStr, UB, 10))
+ return {0, ThreadLimit};
+ UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
+ if (!llvm::to_integer(LBStr, LB, 10))
+ return {0, UB};
+ return {LB, UB};
+ }
+
+ if (MDNode *ExistingOp = getNVPTXMDNode(Kernel, "maxntidx")) {
+ auto *OldVal = dyn_cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
+ int32_t UB = cast<ConstantInt>(OldVal->getValue())->getZExtValue();
+ return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
+ }
+ return {0, ThreadLimit};
+}
+
+void OpenMPIRBuilder::writeThreadBoundsForKernel(Function &Kernel, int32_t LB,
+ int32_t UB) {
+ Kernel.addFnAttr("omp_target_thread_limit", std::to_string(UB));
+
+ bool IsAMDGPU = Kernel.getCallingConv() == CallingConv::AMDGPU_KERNEL;
+ if (IsAMDGPU) {
+ Kernel.addFnAttr("amdgpu-flat-work-group-size",
+ llvm::utostr(LB) + "," + llvm::utostr(UB));
+ return;
+ }
+
+ updateNVPTXMetadata(Kernel, "maxntidx", UB, true);
+}
+
+std::pair<int32_t, int32_t>
+OpenMPIRBuilder::readTeamBoundsForKernel(Function &Kernel) {
+ // TODO add A backend annotations
----------------
jhuber6 wrote:
Grammar
https://github.com/llvm/llvm-project/pull/70247
More information about the cfe-commits
mailing list