[clang] [OpenMP] Pass min/max thread and team count to the OMPIRBuilder (PR #70247)
via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 25 13:00:08 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
Author: Johannes Doerfert (jdoerfert)
<details>
<summary>Changes</summary>
We now provide the information about the min/max thread and team count from to the OMPIRBuilder, no matter what the source was. That means we unify `thread_limit`, `num_teams`, `num_threads` handling with the target specific attriutes (`__launch_bounds__` and `amdgpu_flat_work_group_size`). This is in preparation to pass the values to the runtime, and to allow the middle-end (OpenMP-opt) to tighten the values if it seems appropriate. There is no "real" change after this commit.
---
Patch is 339.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/70247.diff
12 Files Affected:
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.cpp (+54-27)
- (modified) clang/lib/CodeGen/CGOpenMPRuntime.h (+2-1)
- (modified) clang/lib/CodeGen/CodeGenModule.h (+11-3)
- (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+8-2)
- (modified) clang/lib/CodeGen/Targets/NVPTX.cpp (+33-13)
- (modified) clang/test/OpenMP/ompx_attributes_codegen.cpp (+22-12)
- (modified) clang/test/OpenMP/target_parallel_codegen.cpp (+48-48)
- (modified) clang/test/OpenMP/target_parallel_for_codegen.cpp (+192-192)
- (modified) clang/test/OpenMP/target_parallel_for_simd_codegen.cpp (+796-796)
- (modified) clang/test/OpenMP/thread_limit_nvptx.c (+4-4)
- (modified) llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h (+36-12)
- (modified) llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp (+117-55)
``````````diff
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index aae1a0ea250eea2..9b7ff5f66f2f50d 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -6021,15 +6021,46 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
};
// Get NumTeams and ThreadLimit attributes
- int32_t DefaultValTeams = -1;
- uint32_t DefaultValThreads = UINT32_MAX;
- getNumTeamsExprForTargetDirective(CGF, D, DefaultValTeams);
- getNumThreadsExprForTargetDirective(CGF, D, DefaultValThreads,
+ int32_t DefaultValMinTeams = 1;
+ int32_t DefaultValMaxTeams = -1;
+ uint32_t DefaultValMinThreads = 1;
+ uint32_t DefaultValMaxThreads = UINT32_MAX;
+
+ getNumTeamsExprForTargetDirective(CGF, D, DefaultValMinTeams,
+ DefaultValMaxTeams);
+ getNumThreadsExprForTargetDirective(CGF, D, DefaultValMaxThreads,
/*UpperBoundOnly=*/true);
- OMPBuilder.emitTargetRegionFunction(EntryInfo, GenerateOutlinedFunction,
- DefaultValTeams, DefaultValThreads,
- IsOffloadEntry, OutlinedFn, OutlinedFnID);
+ for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
+ for (auto *A : C->getAttrs()) {
+ int32_t MinThreadsVal = 1, MaxThreadsVal = 0;
+ int32_t MinBlocksVal = 1, MaxBlocksVal = -1;
+ if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
+ CGM.handleCUDALaunchBoundsAttr(nullptr, Attr, &MaxThreadsVal,
+ &MinBlocksVal, &MaxBlocksVal);
+ else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
+ CGM.handleAMDGPUFlatWorkGroupSizeAttr(
+ nullptr, Attr, /*ReqdWGS=*/nullptr, &MinThreadsVal, &MaxThreadsVal);
+ else
+ continue;
+
+ DefaultValMinThreads =
+ std::max(DefaultValMinThreads, uint32_t(MinThreadsVal));
+ DefaultValMaxThreads =
+ DefaultValMaxThreads
+ ? std::min(DefaultValMaxThreads, uint32_t(MaxThreadsVal))
+ : MaxThreadsVal;
+ DefaultValMinTeams = DefaultValMinTeams
+ ? std::max(DefaultValMinTeams, MinBlocksVal)
+ : MinBlocksVal;
+ DefaultValMaxTeams = std::min(DefaultValMaxTeams, MaxBlocksVal);
+ }
+ }
+
+ OMPBuilder.emitTargetRegionFunction(
+ EntryInfo, GenerateOutlinedFunction, DefaultValMinTeams,
+ DefaultValMaxTeams, DefaultValMinThreads, DefaultValMaxThreads,
+ IsOffloadEntry, OutlinedFn, OutlinedFnID);
if (!OutlinedFn)
return;
@@ -6038,14 +6069,8 @@ void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
for (auto *C : D.getClausesOfKind<OMPXAttributeClause>()) {
for (auto *A : C->getAttrs()) {
- if (auto *Attr = dyn_cast<CUDALaunchBoundsAttr>(A))
- CGM.handleCUDALaunchBoundsAttr(OutlinedFn, Attr);
- else if (auto *Attr = dyn_cast<AMDGPUFlatWorkGroupSizeAttr>(A))
- CGM.handleAMDGPUFlatWorkGroupSizeAttr(OutlinedFn, Attr);
- else if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
+ if (auto *Attr = dyn_cast<AMDGPUWavesPerEUAttr>(A))
CGM.handleAMDGPUWavesPerEUAttr(OutlinedFn, Attr);
- else
- llvm_unreachable("Unexpected attribute kind");
}
}
}
@@ -6103,8 +6128,8 @@ const Stmt *CGOpenMPRuntime::getSingleCompoundChild(ASTContext &Ctx,
}
const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
- CodeGenFunction &CGF, const OMPExecutableDirective &D,
- int32_t &DefaultVal) {
+ CodeGenFunction &CGF, const OMPExecutableDirective &D, int32_t &MinTeamsVal,
+ int32_t &MaxTeamsVal) {
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
assert(isOpenMPTargetExecutionDirective(DirectiveKind) &&
@@ -6125,22 +6150,22 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
if (auto Constant =
NumTeams->getIntegerConstantExpr(CGF.getContext()))
- DefaultVal = Constant->getExtValue();
+ MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
return NumTeams;
}
- DefaultVal = 0;
+ MinTeamsVal = MaxTeamsVal = 0;
return nullptr;
}
if (isOpenMPParallelDirective(NestedDir->getDirectiveKind()) ||
isOpenMPSimdDirective(NestedDir->getDirectiveKind())) {
- DefaultVal = 1;
+ MinTeamsVal = MaxTeamsVal = 1;
return nullptr;
}
- DefaultVal = 1;
+ MinTeamsVal = MaxTeamsVal = 1;
return nullptr;
}
// A value of -1 is used to check if we need to emit no teams region
- DefaultVal = -1;
+ MinTeamsVal = MaxTeamsVal = -1;
return nullptr;
}
case OMPD_target_teams_loop:
@@ -6154,10 +6179,10 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
D.getSingleClause<OMPNumTeamsClause>()->getNumTeams();
if (NumTeams->isIntegerConstantExpr(CGF.getContext()))
if (auto Constant = NumTeams->getIntegerConstantExpr(CGF.getContext()))
- DefaultVal = Constant->getExtValue();
+ MinTeamsVal = MaxTeamsVal = Constant->getExtValue();
return NumTeams;
}
- DefaultVal = 0;
+ MinTeamsVal = MaxTeamsVal = 0;
return nullptr;
}
case OMPD_target_parallel:
@@ -6165,7 +6190,7 @@ const Expr *CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
case OMPD_target_parallel_for_simd:
case OMPD_target_parallel_loop:
case OMPD_target_simd:
- DefaultVal = 1;
+ MinTeamsVal = MaxTeamsVal = 1;
return nullptr;
case OMPD_parallel:
case OMPD_for:
@@ -6240,8 +6265,9 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
"Clauses associated with the teams directive expected to be emitted "
"only for the host!");
CGBuilderTy &Bld = CGF.Builder;
- int32_t DefaultNT = -1;
- const Expr *NumTeams = getNumTeamsExprForTargetDirective(CGF, D, DefaultNT);
+ int32_t MinNT = -1, MaxNT = -1;
+ const Expr *NumTeams =
+ getNumTeamsExprForTargetDirective(CGF, D, MinNT, MaxNT);
if (NumTeams != nullptr) {
OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
@@ -6271,7 +6297,8 @@ llvm::Value *CGOpenMPRuntime::emitNumTeamsForTargetDirective(
}
}
- return llvm::ConstantInt::get(CGF.Int32Ty, DefaultNT);
+ assert(MinNT == MaxNT && "Num threads ranges require handling here.");
+ return llvm::ConstantInt::get(CGF.Int32Ty, MinNT);
}
/// Check for a num threads constant value (stored in \p DefaultVal), or
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index 74b528d6cd7f8cc..d2f922da3320924 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -637,7 +637,8 @@ class CGOpenMPRuntime {
/// Otherwise, return nullptr.
const Expr *getNumTeamsExprForTargetDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &D,
- int32_t &DefaultVal);
+ int32_t &MinTeamsVal,
+ int32_t &MaxTeamsVal);
llvm::Value *emitNumTeamsForTargetDirective(CodeGenFunction &CGF,
const OMPExecutableDirective &D);
diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h
index 073b471c6e3cc11..793861f23b15f95 100644
--- a/clang/lib/CodeGen/CodeGenModule.h
+++ b/clang/lib/CodeGen/CodeGenModule.h
@@ -1543,15 +1543,23 @@ class CodeGenModule : public CodeGenTypeCache {
void moveLazyEmissionStates(CodeGenModule *NewBuilder);
/// Emit the IR encoding to attach the CUDA launch bounds attribute to \p F.
+ /// If \p MaxThreadsVal is not nullptr, the max threads value is stored in it,
+ /// if a valid one was found.
void handleCUDALaunchBoundsAttr(llvm::Function *F,
- const CUDALaunchBoundsAttr *A);
+ const CUDALaunchBoundsAttr *A,
+ int32_t *MaxThreadsVal = nullptr,
+ int32_t *MinBlocksVal = nullptr,
+ int32_t *MaxClusterRankVal = nullptr);
/// Emit the IR encoding to attach the AMD GPU flat-work-group-size attribute
/// to \p F. Alternatively, the work group size can be taken from a \p
- /// ReqdWGS.
+ /// ReqdWGS. If \p MinThreadsVal is not nullptr, the min threads value is
+ /// stored in it, if a valid one was found. If \p MaxThreadsVal is not
+ /// nullptr, the max threads value is stored in it, if a valid one was found.
void handleAMDGPUFlatWorkGroupSizeAttr(
llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *A,
- const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr);
+ const ReqdWorkGroupSizeAttr *ReqdWGS = nullptr,
+ int32_t *MinThreadsVal = nullptr, int32_t *MaxThreadsVal = nullptr);
/// Emit the IR encoding to attach the AMD GPU waves-per-eu attribute to \p F.
void handleAMDGPUWavesPerEUAttr(llvm::Function *F,
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index f6a614b3e4d54dd..0411846cf9b02bd 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -594,7 +594,8 @@ llvm::Value *AMDGPUTargetCodeGenInfo::createEnqueuedBlockKernel(
void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr(
llvm::Function *F, const AMDGPUFlatWorkGroupSizeAttr *FlatWGS,
- const ReqdWorkGroupSizeAttr *ReqdWGS) {
+ const ReqdWorkGroupSizeAttr *ReqdWGS, int32_t *MinThreadsVal,
+ int32_t *MaxThreadsVal) {
unsigned Min = 0;
unsigned Max = 0;
if (FlatWGS) {
@@ -607,8 +608,13 @@ void CodeGenModule::handleAMDGPUFlatWorkGroupSizeAttr(
if (Min != 0) {
assert(Min <= Max && "Min must be less than or equal Max");
+ if (MinThreadsVal)
+ *MinThreadsVal = Min;
+ if (MaxThreadsVal)
+ *MaxThreadsVal = Max;
std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
- F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
+ if (F)
+ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
} else
assert(Max == 0 && "Max must be zero");
}
diff --git a/clang/lib/CodeGen/Targets/NVPTX.cpp b/clang/lib/CodeGen/Targets/NVPTX.cpp
index 64d019a10514d60..9057cc2178e19de 100644
--- a/clang/lib/CodeGen/Targets/NVPTX.cpp
+++ b/clang/lib/CodeGen/Targets/NVPTX.cpp
@@ -9,6 +9,7 @@
#include "ABIInfoImpl.h"
#include "TargetInfo.h"
#include "llvm/IR/IntrinsicsNVPTX.h"
+#include <cstdint>
using namespace clang;
using namespace clang::CodeGen;
@@ -287,14 +288,23 @@ bool NVPTXTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
}
}
-void CodeGenModule::handleCUDALaunchBoundsAttr(
- llvm::Function *F, const CUDALaunchBoundsAttr *Attr) {
+void CodeGenModule::handleCUDALaunchBoundsAttr(llvm::Function *F,
+ const CUDALaunchBoundsAttr *Attr,
+ int32_t *MaxThreadsVal,
+ int32_t *MinBlocksVal,
+ int32_t *MaxClusterRankVal) {
// Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
llvm::APSInt MaxThreads(32);
MaxThreads = Attr->getMaxThreads()->EvaluateKnownConstInt(getContext());
- if (MaxThreads > 0)
- NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
- MaxThreads.getExtValue());
+ if (MaxThreads > 0) {
+ if (MaxThreadsVal)
+ *MaxThreadsVal = MaxThreads.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxntidx",
+ MaxThreads.getExtValue());
+ }
+ }
// min and max blocks is an optional argument for CUDALaunchBoundsAttr. If it
// was not specified in __launch_bounds__ or if the user specified a 0 value,
@@ -302,18 +312,28 @@ void CodeGenModule::handleCUDALaunchBoundsAttr(
if (Attr->getMinBlocks()) {
llvm::APSInt MinBlocks(32);
MinBlocks = Attr->getMinBlocks()->EvaluateKnownConstInt(getContext());
- if (MinBlocks > 0)
- // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
- NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
- MinBlocks.getExtValue());
+ if (MinBlocks > 0) {
+ if (MinBlocksVal)
+ *MinBlocksVal = MinBlocks.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "minctasm",
+ MinBlocks.getExtValue());
+ }
+ }
}
if (Attr->getMaxBlocks()) {
llvm::APSInt MaxBlocks(32);
MaxBlocks = Attr->getMaxBlocks()->EvaluateKnownConstInt(getContext());
- if (MaxBlocks > 0)
- // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
- NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
- MaxBlocks.getExtValue());
+ if (MaxBlocks > 0) {
+ if (MaxClusterRankVal)
+ *MaxClusterRankVal = MaxBlocks.getExtValue();
+ if (F) {
+ // Create !{<func-ref>, metadata !"maxclusterrank", i32 <val>} node
+ NVPTXTargetCodeGenInfo::addNVVMMetadata(F, "maxclusterrank",
+ MaxBlocks.getExtValue());
+ }
+ }
}
}
diff --git a/clang/test/OpenMP/ompx_attributes_codegen.cpp b/clang/test/OpenMP/ompx_attributes_codegen.cpp
index 21e9805cbe8293b..bcf524b464aef5f 100644
--- a/clang/test/OpenMP/ompx_attributes_codegen.cpp
+++ b/clang/test/OpenMP/ompx_attributes_codegen.cpp
@@ -1,16 +1,17 @@
// REQUIRES: amdgpu-registered-target
// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
-// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
-// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=AMD
+// RUN: %clang_cc1 -target-cpu gfx900 -fopenmp -x c++ -std=c++11 -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=AMD
+// RUN: %clang_cc1 -fopenmp -x c++ -std=c++11 -triple nvptx64 -fopenmp-targets=nvptx64 -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=NVIDIA
// expected-no-diagnostics
// Check that the target attributes are set on the generated kernel
void func() {
- // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l15() #0
- // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l17()
- // CHECK: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l19() #4
+ // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l16() #0
+ // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l18()
+ // AMD: amdgpu_kernel void @__omp_offloading[[HASH:.*]]_l20() #4
#pragma omp target ompx_attribute([[clang::amdgpu_flat_work_group_size(10, 20)]])
{}
@@ -21,11 +22,20 @@ void func() {
{}
}
-// CHECK: attributes #0
-// CHECK-SAME: "amdgpu-flat-work-group-size"="10,20"
-// CHECK: attributes #4
-// CHECK-SAME: "amdgpu-flat-work-group-size"="3,17"
-// CHECK-SAME: "amdgpu-waves-per-eu"="3,7"
+// AMD: attributes #0
+// AMD-SAME: "amdgpu-flat-work-group-size"="10,20"
+// AMD-SAME: "omp_target_thread_limit"="20"
+// AMD: "omp_target_thread_limit"="45"
+// AMD: attributes #4
+// AMD-SAME: "amdgpu-flat-work-group-size"="3,17"
+// AMD-SAME: "amdgpu-waves-per-eu"="3,7"
+// AMD-SAME: "omp_target_thread_limit"="17"
-// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"maxntidx", i32 45}
-// CHECK: !{ptr @__omp_offloading[[HASH]]_l17, !"minctasm", i32 90}
+// It is unclear if we should use the AMD annotations for other targets, we do for now.
+// NVIDIA: "omp_target_thread_limit"="20"
+// NVIDIA: "omp_target_thread_limit"="45"
+// NVIDIA: "omp_target_thread_limit"="17"
+// NVIDIA: !{ptr @__omp_offloading[[HASH1:.*]]_l16, !"maxntidx", i32 20}
+// NVIDIA: !{ptr @__omp_offloading[[HASH2:.*]]_l18, !"minctasm", i32 90}
+// NVIDIA: !{ptr @__omp_offloading[[HASH2]]_l18, !"maxntidx", i32 45}
+// NVIDIA: !{ptr @__omp_offloading[[HASH3:.*]]_l20, !"maxntidx", i32 17}
diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp
index df8a2c878760c1d..c8af38e32e638e6 100644
--- a/clang/test/OpenMP/target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_codegen.cpp
@@ -603,42 +603,42 @@ int bar(int n){
// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T:%.*]], ptr [[TMP4]], i32 0, i32 2
// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], ptr [[TMP4]], i32 0, i32 0
// CHECK1-NEXT: [[TMP7:%.*]] = load ptr, ptr [[TMP6]], align 8
-// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]])
-// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]])
-// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]])
-// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]])
-// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21
-// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !21
-// CHECK1-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21
-// CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21
-// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !21
-// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !21
-// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !21
-// CHECK1-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !21
+// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]])
+// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]])
+// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]])
+// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]])
+// CHECK1-NEXT: store i32 [[TMP2]], ptr [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !35
+// CHECK1-NEXT: store ptr [[TMP5]], ptr [[DOTPART_ID__ADDR_I]], align 8, !noalias !35
+// CHECK1-NEXT: store ptr null, ptr [[DOTPRIVATES__ADDR_I]], align 8, !noalias !35
+// CHECK1-NEXT: store ptr null, ptr [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !35
+// CHECK1-NEXT: store ptr [[TMP3]], ptr [[DOTTASK_T__ADDR_I]], align 8, !noalias !35
+// CHECK1-NEXT: store ptr [[TMP7]], ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !35
+// CHECK1-NEXT: [[TMP8:%.*]] = load ptr, ptr [[__CONTEXT_ADDR_I]], align 8, !noalias !35
+// CHECK1-NEXT: store i32 2, ptr [[KERNEL_ARGS_I]], align 4, !noalias !35
// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 1
-// CHECK1-NEXT: store i32 0, ptr [[TMP9]], align 4, !noalias !21
+// CHECK1-NEXT: store i32 0, ptr [[TMP9]], align 4, !noalias !35
// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [[STRUCT___TGT_KERNEL_ARGUMENTS]], ptr [[KERNEL_ARGS_I]], i32 0, i32 2
-// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8, !noalias !21
+// CHECK1-NEXT: store ptr null, ptr [[TMP10]], align 8, !noalias !35
// ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/70247
More information about the cfe-commits
mailing list