[PATCH] D158383: [OpenMP] Add NVIDIA annotations for static grid thread limit
Johannes Doerfert via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 20 20:29:03 PDT 2023
jdoerfert created this revision.
jdoerfert added reviewers: ye-luo, jhuber6, tianshilei1992.
Herald added subscribers: mattd, asavonic, guansong, bollu, hiraditya, yaxunl.
Herald added a project: All.
jdoerfert requested review of this revision.
Herald added subscribers: llvm-commits, jplehr, sstefan1.
Herald added a project: LLVM.
We already add AMD GPU annotations, the NVIDIA ones are just a little
more convoluted to add/update but otherwise the same.
We see again that the interplay of ompx_attribute and deduced value
needs to be improved, see the TODO.
https://reviews.llvm.org/D158383
Files:
clang/test/OpenMP/thread_limit_nvptx.c
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
Index: llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
===================================================================
--- llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -33,7 +33,9 @@
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/GlobalVariable.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/TargetRegistry.h"
@@ -4156,7 +4158,38 @@
OutlinedFn->addFnAttr("amdgpu-flat-work-group-size",
llvm::utostr(1) + "," + llvm::utostr(NumThreads));
} else {
- // TODO: Modify or create "maxntidx" module metadata.
+ // Update the "maxntidx" metadata for NVIDIA, or add it.
+ NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+ MDNode *ExistingOp = nullptr;
+ for (auto *Op : MD->operands()) {
+ if (Op->getNumOperands() != 3)
+ continue;
+ auto *Kernel = dyn_cast<ConstantAsMetadata>(Op->getOperand(0));
+ if (!Kernel || Kernel->getValue() != OutlinedFn)
+ continue;
+ auto *Prop = dyn_cast<MDString>(Op->getOperand(1));
+ if (!Prop || Prop->getString() != "maxntidx")
+ continue;
+ ExistingOp = Op;
+ break;
+ }
+ if (ExistingOp) {
+ auto *OldVal = dyn_cast<ConstantAsMetadata>(ExistingOp->getOperand(2));
+ int32_t OldLimit =
+ cast<ConstantInt>(OldVal->getValue())->getZExtValue();
+ ExistingOp->replaceOperandWith(
+ 2, ConstantAsMetadata::get(
+ ConstantInt::get(OldVal->getValue()->getType(),
+ std::min(OldLimit, NumThreads))));
+ } else {
+ LLVMContext &Ctx = M.getContext();
+ Metadata *MDVals[] = {ConstantAsMetadata::get(OutlinedFn),
+ MDString::get(Ctx, "maxntidx"),
+ ConstantAsMetadata::get(ConstantInt::get(
+ Type::getInt32Ty(Ctx), NumThreads))};
+ // Append metadata to nvvm.annotations
+ MD->addOperand(MDNode::get(Ctx, MDVals));
+ }
}
OutlinedFn->addFnAttr("omp_target_thread_limit",
std::to_string(NumThreads));
Index: clang/test/OpenMP/thread_limit_nvptx.c
===================================================================
--- /dev/null
+++ clang/test/OpenMP/thread_limit_nvptx.c
@@ -0,0 +1,33 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _
+// Test target codegen - host bc file has to be created first.
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc
+// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+#ifndef HEADER
+#define HEADER
+
+void foo(int N) {
+// CHECK: l11, !"maxntidx", i32 128}
+#pragma omp target teams distribute parallel for simd
+ for (int i = 0; i < N; ++i)
+ ;
+// CHECK: l15, !"maxntidx", i32 4}
+#pragma omp target teams distribute parallel for simd thread_limit(4)
+ for (int i = 0; i < N; ++i)
+ ;
+// TODO: We should not emit two maxntidx annotations.
+// CHECK: l21, !"maxntidx", i32 128}
+// CHECK: l21, !"maxntidx", i32 42}
+#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42))))
+ for (int i = 0; i < N; ++i)
+ ;
+// TODO: We should not emit two maxntidx annotations.
+// CHECK: l27, !"maxntidx", i32 22}
+// CHECK: l27, !"maxntidx", i32 42}
+#pragma omp target teams distribute parallel for simd ompx_attribute(__attribute__((launch_bounds(42, 42)))) num_threads(22)
+ for (int i = 0; i < N; ++i)
+ ;
+}
+
+#endif
+
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D158383.551886.patch
Type: text/x-patch
Size: 4187 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230821/61d6560a/attachment.bin>
More information about the llvm-commits
mailing list