[llvm] [AMDGPU] Change isSingleLaneExecution to account for WWM enabling lanes even if there's only one workitem (PR #188316)
Domenic Nutile via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 1 09:58:52 PDT 2026
https://github.com/saxlungs updated https://github.com/llvm/llvm-project/pull/188316
>From f8240f01c6c18673542f691ac92909981d3df22f Mon Sep 17 00:00:00 2001
From: Domenic Nutile <domenic.nutile at gmail.com>
Date: Tue, 24 Mar 2026 15:05:21 -0400
Subject: [PATCH 1/6] [AMDGPU] Change isSingleLaneExecution to account for WWM
enabling lanes even if there's only one workitem
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 14 +++
...mdgpu-uniform-intrinsic-wwm-single-lane.ll | 91 +++++++++++++++++++
2 files changed, 105 insertions(+)
create mode 100644 llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 1cd693f46500d..61454f0e7214d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/MDBuilder.h"
@@ -263,6 +264,19 @@ unsigned AMDGPUSubtarget::getMaxWorkitemID(const Function &Kernel,
}
bool AMDGPUSubtarget::isSingleLaneExecution(const Function &Func) const {
+ // If the function calls the WWM intrinsic, just return false as
+ // all threads will be active at some point
+ for (const_inst_iterator Inst = inst_begin(Func), InstEnd = inst_end(Func);
+ Inst != InstEnd; ++Inst) {
+ const Instruction *I = &(*Inst);
+ if (auto *callInst = dyn_cast<CallInst>(I)) {
+ Function *calledFunc = callInst->getCalledFunction();
+ if (calledFunc && calledFunc->isIntrinsic() &&
+ calledFunc->getIntrinsicID() == Intrinsic::amdgcn_strict_wwm)
+ return false;
+ }
+ }
+
for (int I = 0; I < 3; ++I) {
if (getMaxWorkitemID(Func, I) > 0)
return false;
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
new file mode 100644
index 0000000000000..31254d9a9d5b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
@@ -0,0 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -mtriple=amdgcn--amdpal -amdgpu-uniform-intrinsic-combine -mattr=+wavefrontsize64 -S < %s | FileCheck %s -check-prefix=CHECK
+
+; If uniformity analysis sees that the workgroup size is 1, it would say this function is trivially uniform.
+; However, the function's use of wwm means that all lanes will be active even if the workgroup size is 1, so
+; it should not be considered uniform, and we should not see the permlane64 optimized out.
+define amdgpu_kernel void @kernel(i32 inreg noundef %input) #0 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel(
+; CHECK-SAME: i32 inreg noundef [[INPUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[_ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc()
+; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[INPUT]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4)
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i64 16
+; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP5]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 2
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[TMP6]], i32 [[TMP9]], i32 0)
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP10]], i32 0)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float
+; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 [[TMP11]], i32 177, i32 15, i32 15, i1 true)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float
+; CHECK-NEXT: [[TMP15:%.*]] = fadd float [[TMP12]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP15]] to i32
+; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 [[TMP16]], i32 78, i32 15, i32 15, i1 true)
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float
+; CHECK-NEXT: [[TMP19:%.*]] = fadd float [[TMP15]], [[TMP18]]
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast float [[TMP19]] to i32
+; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 [[TMP20]], i32 321, i32 15, i32 15, i1 true)
+; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float
+; CHECK-NEXT: [[TMP23:%.*]] = fadd float [[TMP19]], [[TMP22]]
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast float [[TMP23]] to i32
+; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 [[TMP24]], i32 320, i32 15, i32 15, i1 true)
+; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float
+; CHECK-NEXT: [[TMP27:%.*]] = fadd float [[TMP23]], [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = bitcast float [[TMP27]] to i32
+; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.permlanex16.i32(i32 poison, i32 [[TMP28]], i32 -1, i32 -1, i1 true, i1 false)
+; CHECK-NEXT: [[TMP30:%.*]] = bitcast i32 [[TMP29]] to float
+; CHECK-NEXT: [[TMP31:%.*]] = fadd float [[TMP27]], [[TMP30]]
+; CHECK-NEXT: [[TMP36:%.*]] = call float @llvm.amdgcn.permlane64.f32(float [[TMP31]])
+; CHECK-NEXT: [[TMP32:%.*]] = fadd float [[TMP36]], [[TMP31]]
+; CHECK-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP32]])
+; CHECK-NEXT: [[TMP34:%.*]] = bitcast float [[TMP33]] to i32
+; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[TMP34]])
+; CHECK-NEXT: ret void
+;
+.entry:
+ %0 = call i64 @llvm.amdgcn.s.getpc()
+ %1 = and i64 %0, -4294967296
+ %2 = zext i32 %input to i64
+ %3 = or disjoint i64 %1, %2
+ %4 = inttoptr i64 %3 to ptr addrspace(4)
+ %5 = getelementptr i8, ptr addrspace(4) %4, i64 16
+ %6 = load <4 x i32>, ptr addrspace(4) %5, align 16
+ %7 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+ %8 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %7)
+ %9 = shl i32 %8, 2
+ %10 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %6, i32 %9, i32 0)
+ %11 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %10, i32 0)
+ %12 = bitcast i32 %11 to float
+ %13 = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %11, i32 177, i32 15, i32 15, i1 true)
+ %14 = bitcast i32 %13 to float
+ %15 = fadd float %12, %14
+ %16 = bitcast float %15 to i32
+ %17 = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %16, i32 78, i32 15, i32 15, i1 true)
+ %18 = bitcast i32 %17 to float
+ %19 = fadd float %15, %18
+ %20 = bitcast float %19 to i32
+ %21 = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %20, i32 321, i32 15, i32 15, i1 true)
+ %22 = bitcast i32 %21 to float
+ %23 = fadd float %19, %22
+ %24 = bitcast float %23 to i32
+ %25 = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %24, i32 320, i32 15, i32 15, i1 true)
+ %26 = bitcast i32 %25 to float
+ %27 = fadd float %23, %26
+ %28 = bitcast float %27 to i32
+ %29 = call i32 @llvm.amdgcn.permlanex16.i32(i32 poison, i32 %28, i32 -1, i32 -1, i1 true, i1 false)
+ %30 = bitcast i32 %29 to float
+ %31 = fadd float %27, %30
+ %32 = call float @llvm.amdgcn.permlane64.f32(float %31)
+ %33 = fadd float %32, %31
+ %34 = call float @llvm.amdgcn.readfirstlane.f32(float %33)
+ %35 = bitcast float %34 to i32
+ %36 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %35)
+ ret void
+}
+
+attributes #0 = { alwaysinline nounwind memory(readwrite) "amdgpu-flat-work-group-size"="1,1" }
>From fdf473e321b89ad2a2c034997bcc8c1a1217891e Mon Sep 17 00:00:00 2001
From: Domenic Nutile <domenic.nutile at gmail.com>
Date: Wed, 25 Mar 2026 13:57:10 -0400
Subject: [PATCH 2/6] Refactor to use Attributor to detect WWM
---
llvm/docs/AMDGPUUsage.rst | 3 +++
llvm/lib/Target/AMDGPU/AMDGPUAttributes.def | 1 +
llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp | 5 ++++-
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 18 +++++-------------
4 files changed, 13 insertions(+), 14 deletions(-)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 1ede5ca2d4cf6..dd6c51c12d399 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -2053,6 +2053,9 @@ The AMDGPU backend supports the following LLVM IR attributes.
"amdgpu-no-dispatch-id" The same as amdgpu-no-workitem-id-x, except for the
llvm.amdgcn.dispatch.id intrinsic.
+ "amdgpu-no-wwm" The same as amdgpu-no-workitem-id-x, except for the
+ llvm.amdgcn.strict.wwm intrinsic.
+
"amdgpu-no-queue-ptr" Similar to amdgpu-no-workitem-id-x, except for the
llvm.amdgcn.queue.ptr intrinsic. Note that unlike the other ABI hint
attributes, the queue pointer may be required in situations where the
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
index 4c9715e4a1737..a8714c55dfc63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributes.def
@@ -34,5 +34,6 @@ AMDGPU_ATTRIBUTE(FLAT_SCRATCH_INIT, "amdgpu-no-flat-scratch-init")
AMDGPU_ATTRIBUTE(CLUSTER_ID_X, "amdgpu-no-cluster-id-x")
AMDGPU_ATTRIBUTE(CLUSTER_ID_Y, "amdgpu-no-cluster-id-y")
AMDGPU_ATTRIBUTE(CLUSTER_ID_Z, "amdgpu-no-cluster-id-z")
+AMDGPU_ATTRIBUTE(WHOLE_WAVE_MODE, "amdgpu-no-wwm")
#undef AMDGPU_ATTRIBUTE
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
index 7178abe8b7552..2a0b2faa6a996 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAttributor.cpp
@@ -108,6 +108,9 @@ intrinsicToAttrMask(Intrinsic::ID ID, bool &NonKernelOnly, bool &NeedsImplicit,
// offsets.
return CodeObjectVersion >= AMDGPU::AMDHSA_COV5 ? IMPLICIT_ARG_PTR
: QUEUE_PTR;
+ case Intrinsic::amdgcn_wwm:
+ case Intrinsic::amdgcn_strict_wwm:
+ return WHOLE_WAVE_MODE;
case Intrinsic::trap:
case Intrinsic::debugtrap:
case Intrinsic::ubsantrap:
@@ -1694,4 +1697,4 @@ PreservedAnalyses llvm::AMDGPUAttributorCGSCCPass::run(LazyCallGraph::SCC &C,
TM, Options, ThinOrFullLTOPhase::None)
? PreservedAnalyses::none()
: PreservedAnalyses::all();
-}
\ No newline at end of file
+}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 61454f0e7214d..386add7b684d3 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -264,24 +264,16 @@ unsigned AMDGPUSubtarget::getMaxWorkitemID(const Function &Kernel,
}
bool AMDGPUSubtarget::isSingleLaneExecution(const Function &Func) const {
- // If the function calls the WWM intrinsic, just return false as
- // all threads will be active at some point
- for (const_inst_iterator Inst = inst_begin(Func), InstEnd = inst_end(Func);
- Inst != InstEnd; ++Inst) {
- const Instruction *I = &(*Inst);
- if (auto *callInst = dyn_cast<CallInst>(I)) {
- Function *calledFunc = callInst->getCalledFunction();
- if (calledFunc && calledFunc->isIntrinsic() &&
- calledFunc->getIntrinsicID() == Intrinsic::amdgcn_strict_wwm)
- return false;
- }
- }
-
for (int I = 0; I < 3; ++I) {
if (getMaxWorkitemID(Func, I) > 0)
return false;
}
+ // If the function calls the WWM intrinsic, just return false as
+ // all threads will be active at some point
+ if (!Func.hasFnAttribute("amdgpu-no-wwm"))
+ return false;
+
return true;
}
>From d7a1a8c470edc5b5a8168f25196522a45b0ec33f Mon Sep 17 00:00:00 2001
From: Domenic Nutile <domenic.nutile at gmail.com>
Date: Wed, 25 Mar 2026 14:04:06 -0400
Subject: [PATCH 3/6] Remove unnecessarily added header
---
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 386add7b684d3..1b682103dc0d5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -23,7 +23,6 @@
#include "llvm/CodeGen/MachineScheduler.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/IntrinsicsR600.h"
#include "llvm/IR/MDBuilder.h"
>From 1e2c167e3d6b52690110ad33c45a729058b54b0b Mon Sep 17 00:00:00 2001
From: Domenic Nutile <domenic.nutile at gmail.com>
Date: Thu, 26 Mar 2026 17:30:26 -0400
Subject: [PATCH 4/6] Fix tests, update new test to be smaller and just check
that permlane64 remains
---
.../AMDGPU/always_uniform.ll | 2 +-
.../AMDGPU/workitem-intrinsics.ll | 4 +-
...-divergent-i1-phis-no-lane-mask-merging.ll | 65 ++++---
.../AMDGPU/addrspacecast-constantexpr.ll | 4 +-
...mdgpu-attributor-flat-scratch-init-asan.ll | 2 +-
.../amdgpu-attributor-min-agpr-alloc.ll | 77 ++++----
...amdgpu-attributor-nocallback-intrinsics.ll | 4 +-
.../AMDGPU/amdgpu-attributor-trap-leaf.ll | 4 +-
...mdgpu-uniform-intrinsic-wwm-single-lane.ll | 70 +------
.../annotate-kernel-features-hsa-call.ll | 38 ++--
.../AMDGPU/annotate-kernel-features-hsa.ll | 26 +--
.../AMDGPU/annotate-kernel-features.ll | 18 +-
.../atomic_optimizations_single_lane.ll | 2 +-
...utor-flatscratchinit-undefined-behavior.ll | 4 +-
.../AMDGPU/attributor-flatscratchinit.ll | 12 +-
.../CodeGen/AMDGPU/direct-indirect-call.ll | 2 +-
.../AMDGPU/duplicate-attribute-indirect.ll | 2 +-
.../AMDGPU/implicitarg-offset-attributes.ll | 26 +--
.../indirect-call-set-from-other-function.ll | 2 +-
...120256-annotate-constexpr-addrspacecast.ll | 4 +-
.../AMDGPU/load-constant-always-uniform.ll | 27 +--
.../AMDGPU/propagate-flat-work-group-size.ll | 18 +-
.../CodeGen/AMDGPU/propagate-waves-per-eu.ll | 42 ++--
.../AMDGPU/recursive_global_initializer.ll | 2 +-
.../AMDGPU/remove-no-kernel-id-attribute.ll | 10 +-
.../CodeGen/AMDGPU/simple-indirect-call-2.ll | 6 +-
.../CodeGen/AMDGPU/simple-indirect-call.ll | 2 +-
.../uniform-work-group-attribute-missing.ll | 4 +-
.../AMDGPU/uniform-work-group-multistep.ll | 4 +-
...niform-work-group-nested-function-calls.ll | 4 +-
...ork-group-prevent-attribute-propagation.ll | 4 +-
.../uniform-work-group-propagate-attribute.ll | 4 +-
.../uniform-work-group-recursion-test.ll | 6 +-
.../CodeGen/AMDGPU/uniform-work-group-test.ll | 2 +-
.../nontrivial-unswitch-divergent-target.ll | 180 +++++++-----------
.../single-lane-execution.ll | 17 +-
36 files changed, 298 insertions(+), 402 deletions(-)
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
index 3f56f12f3cb34..e649dd9a7ef6f 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/always_uniform.ll
@@ -228,4 +228,4 @@ declare i32 @llvm.amdgcn.cluster.workgroup.max.flat.id()
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind readnone convergent }
-attributes #2 = { "amdgpu-flat-work-group-size"="1,1" }
+attributes #2 = { "amdgpu-flat-work-group-size"="1,1" "amdgpu-no-wwm" }
diff --git a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
index f5668cef5d63e..fbd855b0d691b 100644
--- a/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
+++ b/llvm/test/Analysis/UniformityAnalysis/AMDGPU/workitem-intrinsics.ll
@@ -67,7 +67,7 @@ define amdgpu_kernel void @workitem_id_z_singlethreaded() #2 {
; CHECK-LABEL: UniformityInfo for function 'workitem_id_x_singlethreaded_md':
; CHECK-NOT: DIVERGENT
-define amdgpu_kernel void @workitem_id_x_singlethreaded_md() !reqd_work_group_size !0 {
+define amdgpu_kernel void @workitem_id_x_singlethreaded_md() "amdgpu-no-wwm" !reqd_work_group_size !0 {
%id.x = call i32 @llvm.amdgcn.workitem.id.x()
store volatile i32 %id.x, ptr addrspace(1) undef
ret void
@@ -141,7 +141,7 @@ define amdgpu_kernel void @workitem_id_y_uniform_in_subgroup(ptr %o) #3 !reqd_wo
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
-attributes #2 = { "amdgpu-flat-work-group-size"="1,1" }
+attributes #2 = { "amdgpu-flat-work-group-size"="1,1" "amdgpu-no-wwm" }
attributes #3 = { "target-cpu"="gfx900" "amdgpu-flat-work-group-size"="256,256" }
!0 = !{i32 1, i32 1, i32 1}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
index 5418dc88dd413..f14738d16bc02 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergence-divergent-i1-phis-no-lane-mask-merging.ll
@@ -232,47 +232,50 @@ define amdgpu_cs void @single_lane_execution_attribute(i32 inreg %.userdata0, <3
; GFX10-NEXT: s_mov_b32 s3, s5
; GFX10-NEXT: v_mbcnt_lo_u32_b32 v1, -1, 0
; GFX10-NEXT: s_load_dwordx8 s[4:11], s[2:3], 0x0
-; GFX10-NEXT: v_mbcnt_hi_u32_b32 v1, -1, v1
-; GFX10-NEXT: v_lshlrev_b32_e32 v2, 2, v1
-; GFX10-NEXT: v_and_b32_e32 v3, 1, v1
-; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
-; GFX10-NEXT: s_xor_b32 s2, vcc_lo, exec_lo
+; GFX10-NEXT: s_mov_b32 s3, 0
+; GFX10-NEXT: v_mbcnt_hi_u32_b32 v2, -1, v1
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 2, v2
+; GFX10-NEXT: v_and_b32_e32 v3, 1, v2
+; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, v3
+; GFX10-NEXT: ; implicit-def: $vgpr3
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: buffer_load_dword v2, v2, s[4:7], 0 offen
-; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
-; GFX10-NEXT: s_mov_b32 s2, 0
+; GFX10-NEXT: buffer_load_dword v1, v1, s[4:7], 0 offen
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2
-; GFX10-NEXT: s_cbranch_vccnz .LBB4_4
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX10-NEXT: s_and_saveexec_b32 s2, s0
+; GFX10-NEXT: s_xor_b32 s2, exec_lo, s2
+; GFX10-NEXT: s_cbranch_execz .LBB4_4
; GFX10-NEXT: ; %bb.1: ; %.preheader.preheader
-; GFX10-NEXT: s_mov_b32 s3, 0
+; GFX10-NEXT: s_mov_b32 s12, 0
+; GFX10-NEXT: s_mov_b32 s13, 0
; GFX10-NEXT: .LBB4_2: ; %.preheader
; GFX10-NEXT: ; =>This Inner Loop Header: Depth=1
-; GFX10-NEXT: v_mov_b32_e32 v3, s2
-; GFX10-NEXT: v_add_nc_u32_e32 v1, -1, v1
-; GFX10-NEXT: s_add_i32 s2, s2, 4
+; GFX10-NEXT: v_mov_b32_e32 v3, s3
+; GFX10-NEXT: v_add_nc_u32_e32 v2, -1, v2
+; GFX10-NEXT: s_add_i32 s3, s3, 4
; GFX10-NEXT: buffer_load_dword v3, v3, s[4:7], 0 offen
-; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX10-NEXT: v_cmp_eq_u32_e64 s0, 0, v2
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_readfirstlane_b32 s12, v3
-; GFX10-NEXT: s_add_i32 s3, s12, s3
-; GFX10-NEXT: s_cbranch_vccnz .LBB4_2
+; GFX10-NEXT: v_readfirstlane_b32 s14, v3
+; GFX10-NEXT: s_add_i32 s13, s14, s13
+; GFX10-NEXT: s_or_b32 s12, s0, s12
+; GFX10-NEXT: v_mov_b32_e32 v3, s13
+; GFX10-NEXT: s_andn2_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: s_cbranch_execnz .LBB4_2
; GFX10-NEXT: ; %bb.3: ; %.preheader._crit_edge
-; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, s3, v2
-; GFX10-NEXT: s_or_b32 s2, s0, vcc_lo
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2
-; GFX10-NEXT: s_branch .LBB4_6
-; GFX10-NEXT: .LBB4_4:
-; GFX10-NEXT: s_mov_b32 s2, exec_lo
-; GFX10-NEXT: ; implicit-def: $vgpr1
-; GFX10-NEXT: s_and_b32 vcc_lo, exec_lo, s2
-; GFX10-NEXT: s_cbranch_vccz .LBB4_6
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s12
+; GFX10-NEXT: v_cmp_eq_u32_e64 s0, v3, v1
+; GFX10-NEXT: s_or_b32 s0, vcc_lo, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: .LBB4_4: ; %Flow
+; GFX10-NEXT: s_andn2_saveexec_b32 s0, s2
; GFX10-NEXT: ; %bb.5: ; %.19
-; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
-; GFX10-NEXT: v_or_b32_e32 v1, 2, v1
-; GFX10-NEXT: .LBB4_6: ; %.22
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-NEXT: v_or_b32_e32 v3, 2, v1
+; GFX10-NEXT: ; %bb.6: ; %.22
+; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s0
; GFX10-NEXT: v_add_lshl_u32 v0, v0, s1, 2
-; GFX10-NEXT: buffer_store_dword v1, v0, s[8:11], 0 offen
+; GFX10-NEXT: buffer_store_dword v3, v0, s[8:11], 0 offen
; GFX10-NEXT: s_endpgm
.entry:
%.0 = call i64 @llvm.amdgcn.s.getpc()
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 25d44edaaa5e0..f27055cb3d65d 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -169,6 +169,6 @@ attributes #1 = { nounwind }
;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll
index 9387e6ffde05d..f7c8e39ae7327 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-flat-scratch-init-asan.ll
@@ -20,5 +20,5 @@ define amdgpu_kernel void @k0() #0 {
attributes #0 = { sanitize_address }
; "amdgpu-no-flat-scratch-init" attribute should not be present in attribute list
;.
-; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
index 410ded81b1663..7e497023b1503 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
@@ -181,7 +181,7 @@ define amdgpu_kernel void @kernel_calls_extern() {
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
; CHECK-SAME: ) #[[ATTR3]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR29:[0-9]+]]
+; CHECK-NEXT: call void @unknown() #[[ATTR30:[0-9]+]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -205,7 +205,7 @@ define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR29]]
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR30]]
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
@@ -750,7 +750,7 @@ define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr)
define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR20:[0-9]+]] {
; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
; CHECK-NEXT: call void @use_most()
@@ -764,7 +764,7 @@ define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(
define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
-; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR20:[0-9]+]] {
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR21:[0-9]+]] {
; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
; CHECK-NEXT: call void @use_most()
@@ -897,7 +897,7 @@ define void @kernel_max_callgraph(i1 %cond) {
define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs(
-; CHECK-SAME: ) #[[ATTR21:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR22:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -909,7 +909,7 @@ define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1(
-; CHECK-SAME: ) #[[ATTR21]] {
+; CHECK-SAME: ) #[[ATTR22]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -921,7 +921,7 @@ define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
define void @recursive() {
; CHECK-LABEL: define void @recursive(
-; CHECK-SAME: ) #[[ATTR22:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR23:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: call void @recursive()
@@ -935,7 +935,7 @@ define void @recursive() {
define void @indirect_0() {
; CHECK-LABEL: define void @indirect_0(
-; CHECK-SAME: ) #[[ATTR22]] {
+; CHECK-SAME: ) #[[ATTR23]] {
; CHECK-NEXT: call void asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -947,7 +947,7 @@ define void @indirect_0() {
define void @indirect_1() {
; CHECK-LABEL: define void @indirect_1(
-; CHECK-SAME: ) #[[ATTR23:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR24:[0-9]+]] {
; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i32> asm sideeffect "
; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
@@ -959,7 +959,7 @@ define void @indirect_1() {
define amdgpu_kernel void @knowable_indirect_call(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @knowable_indirect_call(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR22]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR23]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @indirect_0, ptr @indirect_1
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @indirect_1
; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -1027,36 +1027,37 @@ attributes #1 = { "amdgpu-waves-per-eu"="1,1" }
!4 = !{!"a256"}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="1" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="2" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="1" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="2" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nocreateundeforpoison nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="4" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="6" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="5" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="14" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="256" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="32" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="9" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="64" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="49" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="33" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="8" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="13" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="58" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="60" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR25:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR27:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR28:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR29]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="4" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="6" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="5" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="14" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="256" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="32" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="9" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="64" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="49" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="33" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="8" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="13" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="58" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="60" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="256" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="7" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR24]] = { "amdgpu-agpr-alloc"="3" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR25:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR27:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR28:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR29:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR30]] = { "amdgpu-agpr-alloc"="0" }
;.
; CHECK: [[META0]] = !{!"a55"}
; CHECK: [[META1]] = !{!"v55"}
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
index db37d0cb511b6..d17b164f5e0f8 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-nocallback-intrinsics.ll
@@ -76,8 +76,8 @@ define void @use_ubsantrap() {
attributes #0 = { "trap-func-name"="handler" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
index 4eeb85276a2ec..d2ce8d4d064aa 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-trap-leaf.ll
@@ -58,8 +58,8 @@ attributes #1 = { nocallback "trap-func-name"="handler" }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { cold noreturn nounwind memory(inaccessiblemem: write) "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx90a" }
; CHECK: attributes #[[ATTR4]] = { "trap-func-name"="handler" }
; CHECK: attributes #[[ATTR5]] = { nocallback "trap-func-name"="handler" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
index 31254d9a9d5b0..70b6ef01f4a36 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn--amdpal -amdgpu-uniform-intrinsic-combine -mattr=+wavefrontsize64 -S < %s | FileCheck %s -check-prefix=CHECK
; If uniformity analysis sees that the workgroup size is 1, it would say this function is trivially uniform.
@@ -6,47 +5,7 @@
; it should not be considered uniform, and we should not see the permlane64 optimized out.
define amdgpu_kernel void @kernel(i32 inreg noundef %input) #0 {
; CHECK-LABEL: define amdgpu_kernel void @kernel(
-; CHECK-SAME: i32 inreg noundef [[INPUT:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: [[_ENTRY:.*:]]
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.amdgcn.s.getpc()
-; CHECK-NEXT: [[TMP1:%.*]] = and i64 [[TMP0]], -4294967296
-; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[INPUT]] to i64
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = inttoptr i64 [[TMP3]] to ptr addrspace(4)
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i64 16
-; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr addrspace(4) [[TMP5]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
-; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 [[TMP7]])
-; CHECK-NEXT: [[TMP9:%.*]] = shl i32 [[TMP8]], 2
-; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> [[TMP6]], i32 [[TMP9]], i32 0)
-; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.set.inactive.i32(i32 [[TMP10]], i32 0)
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i32 [[TMP11]] to float
-; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 [[TMP11]], i32 177, i32 15, i32 15, i1 true)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP13]] to float
-; CHECK-NEXT: [[TMP15:%.*]] = fadd float [[TMP12]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP15]] to i32
-; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 [[TMP16]], i32 78, i32 15, i32 15, i1 true)
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i32 [[TMP17]] to float
-; CHECK-NEXT: [[TMP19:%.*]] = fadd float [[TMP15]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = bitcast float [[TMP19]] to i32
-; CHECK-NEXT: [[TMP21:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 [[TMP20]], i32 321, i32 15, i32 15, i1 true)
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast i32 [[TMP21]] to float
-; CHECK-NEXT: [[TMP23:%.*]] = fadd float [[TMP19]], [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast float [[TMP23]] to i32
-; CHECK-NEXT: [[TMP25:%.*]] = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 [[TMP24]], i32 320, i32 15, i32 15, i1 true)
-; CHECK-NEXT: [[TMP26:%.*]] = bitcast i32 [[TMP25]] to float
-; CHECK-NEXT: [[TMP27:%.*]] = fadd float [[TMP23]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = bitcast float [[TMP27]] to i32
-; CHECK-NEXT: [[TMP29:%.*]] = call i32 @llvm.amdgcn.permlanex16.i32(i32 poison, i32 [[TMP28]], i32 -1, i32 -1, i1 true, i1 false)
-; CHECK-NEXT: [[TMP30:%.*]] = bitcast i32 [[TMP29]] to float
-; CHECK-NEXT: [[TMP31:%.*]] = fadd float [[TMP27]], [[TMP30]]
-; CHECK-NEXT: [[TMP36:%.*]] = call float @llvm.amdgcn.permlane64.f32(float [[TMP31]])
-; CHECK-NEXT: [[TMP32:%.*]] = fadd float [[TMP36]], [[TMP31]]
-; CHECK-NEXT: [[TMP33:%.*]] = call float @llvm.amdgcn.readfirstlane.f32(float [[TMP32]])
-; CHECK-NEXT: [[TMP34:%.*]] = bitcast float [[TMP33]] to i32
-; CHECK-NEXT: [[TMP35:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[TMP34]])
-; CHECK-NEXT: ret void
-;
+; CHECK: call float @llvm.amdgcn.permlane64.f32
.entry:
%0 = call i64 @llvm.amdgcn.s.getpc()
%1 = and i64 %0, -4294967296
@@ -61,30 +20,11 @@ define amdgpu_kernel void @kernel(i32 inreg noundef %input) #0 {
%10 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %6, i32 %9, i32 0)
%11 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %10, i32 0)
%12 = bitcast i32 %11 to float
- %13 = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %11, i32 177, i32 15, i32 15, i1 true)
- %14 = bitcast i32 %13 to float
- %15 = fadd float %12, %14
+ %13 = call float @llvm.amdgcn.permlane64.f32(float %12)
+ %14 = fadd float %13, %12
+ %15 = call float @llvm.amdgcn.readfirstlane.f32(float %14)
%16 = bitcast float %15 to i32
- %17 = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %16, i32 78, i32 15, i32 15, i1 true)
- %18 = bitcast i32 %17 to float
- %19 = fadd float %15, %18
- %20 = bitcast float %19 to i32
- %21 = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %20, i32 321, i32 15, i32 15, i1 true)
- %22 = bitcast i32 %21 to float
- %23 = fadd float %19, %22
- %24 = bitcast float %23 to i32
- %25 = call i32 @llvm.amdgcn.update.dpp.i32(i32 poison, i32 %24, i32 320, i32 15, i32 15, i1 true)
- %26 = bitcast i32 %25 to float
- %27 = fadd float %23, %26
- %28 = bitcast float %27 to i32
- %29 = call i32 @llvm.amdgcn.permlanex16.i32(i32 poison, i32 %28, i32 -1, i32 -1, i1 true, i1 false)
- %30 = bitcast i32 %29 to float
- %31 = fadd float %27, %30
- %32 = call float @llvm.amdgcn.permlane64.f32(float %31)
- %33 = fadd float %32, %31
- %34 = call float @llvm.amdgcn.readfirstlane.f32(float %33)
- %35 = bitcast float %34 to i32
- %36 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %35)
+ %17 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %16)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index 23893a98f3a9b..0f7d813a027b1 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -690,27 +690,27 @@ attributes #6 = { "enqueued-block" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="fiji" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx900" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx900" }
; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
; ATTRIBUTOR_HSA: attributes #[[ATTR17:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind sanitize_address "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR19]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" }
; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "enqueued-block" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "enqueued-block" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 66b6ba168f07d..b86f621b9e0cf 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -475,19 +475,19 @@ attributes #1 = { nounwind }
;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
; HSA: attributes #[[ATTR1:[0-9]+]] = { nocallback nocreateundeforpoison nofree nosync nounwind speculatable willreturn memory(none) }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" }
-; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" }
-; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" }
-; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
; HSA: [[META0]] = !{i32 1, i32 3, i32 4, i32 10}
; HSA: [[META1]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 0581caf72aef4..c8cc6884af01a 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -294,13 +294,13 @@ attributes #1 = { nounwind }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" }
-; CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" }
-; CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" }
+; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_single_lane.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_single_lane.ll
index 091ea662facf3..0ec415f8826f4 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_single_lane.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_single_lane.ll
@@ -3,7 +3,7 @@
; Check that no optimization is generated for single lane execution.
-define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace(8) %inout) "amdgpu-flat-work-group-size"="1,1" {
+define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out, ptr addrspace(8) %inout) "amdgpu-flat-work-group-size"="1,1" "amdgpu-no-wwm" {
; GFX12-LABEL: add_i32_constant:
; GFX12: ; %bb.0: ; %entry
; GFX12-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
index 39ab4019d5593..ccc434842e498 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
@@ -147,10 +147,10 @@ define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %
attributes #0 = { "amdgpu-no-flat-scratch-init" }
;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx900" }
; GFX9: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1010" }
; GFX10: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
;.
; GFX9: [[META0]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
index a91d64d1c2591..1e8a33781ee8d 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
@@ -865,17 +865,17 @@ define amdgpu_kernel void @with_inline_asm() {
}
;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" }
-; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx900" }
; GFX9: attributes #[[ATTR2]] = { "target-cpu"="gfx900" }
; GFX9: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
-; GFX9: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx900" }
;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" }
-; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1010" }
; GFX10: attributes #[[ATTR2]] = { "target-cpu"="gfx1010" }
; GFX10: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
-; GFX10: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1010" }
;.
; GFX9: [[META0]] = !{i32 2, i32 10}
; GFX9: [[META1]] = !{i32 1, i32 2, i32 3, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index 0fa94916d3b54..e7674ee6423f8 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -33,5 +33,5 @@ define amdgpu_kernel void @test_direct_indirect_call() {
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index dfcc3a33c7ecf..80f378fbd8195 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -28,6 +28,6 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
attributes #0 = { "amdgpu-no-dispatch-id" }
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index 2f2fc7e85c55e..6cfbb3778d108 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -276,23 +276,23 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
;.
; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V4: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V4: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V4: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V4: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V4: attributes #[[ATTR5]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
+; V4: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V4: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V4: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V4: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V4: attributes #[[ATTR5]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
;.
; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V5: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V5: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V5: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V5: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
+; V5: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V5: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V5: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V5: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
;.
; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V6: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V6: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V6: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
-; V6: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" }
+; V6: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V6: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V6: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
+; V6: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-wwm" }
;.
; V4: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
index f580dc12e6207..7ff75aed51b12 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
@@ -67,5 +67,5 @@ if.end:
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
index c5ce14faa3347..24393cd29e765 100644
--- a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
@@ -55,8 +55,8 @@ define amdgpu_kernel void @issue120256_private(ptr addrspace(1) %out) {
; FIXME: Inference of amdgpu-no-queue-ptr should not depend on code object version.
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx803" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx803" }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll b/llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll
index 13ea8b08d2ade..2ea640cfb1097 100644
--- a/llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll
+++ b/llvm/test/CodeGen/AMDGPU/load-constant-always-uniform.ll
@@ -9,18 +9,11 @@ define amdgpu_cs void @test_uniform_load_b96(ptr addrspace(1) %ptr, i32 %arg) "a
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_lshlrev_b64 v[2:3], 2, v[2:3]
; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
-; GFX11-NEXT: v_readfirstlane_b32 s0, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NEXT: v_readfirstlane_b32 s1, v3
-; GFX11-NEXT: s_clause 0x1
-; GFX11-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
-; GFX11-NEXT: s_load_b32 s0, s[0:1], 0x8
-; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_mov_b32_e32 v2, s3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NEXT: v_or3_b32 v2, s2, v2, s0
+; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
+; GFX11-NEXT: global_load_b96 v[2:4], v[2:3], off
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_or3_b32 v2, v2, v3, v4
; GFX11-NEXT: global_store_b32 v[0:1], v2, off
; GFX11-NEXT: s_endpgm
;
@@ -30,15 +23,11 @@ define amdgpu_cs void @test_uniform_load_b96(ptr addrspace(1) %ptr, i32 %arg) "a
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX12-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
; GFX12-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
-; GFX12-NEXT: v_readfirstlane_b32 s0, v2
-; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_1)
-; GFX12-NEXT: v_readfirstlane_b32 s1, v3
-; GFX12-NEXT: s_load_b96 s[0:2], s[0:1], 0x0
-; GFX12-NEXT: s_wait_kmcnt 0x0
-; GFX12-NEXT: v_mov_b32_e32 v2, s0
-; GFX12-NEXT: v_or3_b32 v2, v2, s1, s2
+; GFX12-NEXT: global_load_b96 v[2:4], v[2:3], off
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_or3_b32 v2, v2, v3, v4
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_endpgm
bb:
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
index e9a16d442a3b4..e71eddbd7077a 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -202,13 +202,13 @@ attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
index e90acc97a1c6e..a19fe155d1c0e 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
@@ -399,25 +399,25 @@ attributes #17 = { "amdgpu-waves-per-eu"="5,8" }
attributes #18 = { "amdgpu-waves-per-eu"="9,10" }
attributes #19 = { "amdgpu-waves-per-eu"="8,9" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" }
-; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" }
-; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" }
-; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" }
-; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" }
-; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" }
-; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" }
-; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" }
-; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="2,8" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="1,8" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="1,2" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="1,4" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="9,9" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="1,1" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="9,10" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="2,9" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="3,8" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="1,123" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="3,6" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="3,6" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="4,8" }
+; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="6,8" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="5,5" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="5,8" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="9,10" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "amdgpu-waves-per-eu"="8,9" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
index c41e741e7d48b..c8b2603b689e3 100644
--- a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
@@ -19,5 +19,5 @@ define void @hoge() {
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
index c833f544914e4..7c9a9b8b5b3e1 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
@@ -186,12 +186,12 @@ define amdgpu_kernel void @kernel_lds_recursion() {
!1 = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-lds-size"="4" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-lds-size"="4" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
index 968975adb003b..6c01f82cc5e9b 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
@@ -101,11 +101,11 @@ entry:
}
;.
-; NO: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; NO: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
-; OW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; OW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
-; CW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
; NO: [[META0]] = !{ptr @bar1, ptr @bar2}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index cd738f91f4559..cb52a639d12bd 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -57,7 +57,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
; ATTRIBUTOR_GCN: [[META0]] = !{i32 1, i32 5, i32 6, i32 10}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index 56819036c1a5d..9dedf231b4ddd 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -31,6 +31,6 @@ define amdgpu_kernel void @kernel1() #1 {
attributes #0 = { "uniform-work-group-size" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "uniform-work-group-size" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
index d889c1a70c22a..8ed644502ec82 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -96,7 +96,7 @@ define amdgpu_kernel void @kernel2() #0 {
attributes #0 = { "uniform-work-group-size" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "uniform-work-group-size" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index 05b1aa7dcfc6e..49e2c1f637cf4 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
attributes #2 = { "uniform-work-group-size" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "uniform-work-group-size" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index 8554e3d0b63f4..a67d6af9cc0f0 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel2() #2 {
attributes #1 = { "uniform-work-group-size" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "uniform-work-group-size" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
index 53a7021c12bd5..febb091f259b3 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -51,8 +51,8 @@ define amdgpu_kernel void @kernel2() #1 {
attributes #0 = { nounwind }
attributes #1 = { "uniform-work-group-size" }
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
; CHECK: attributes #[[ATTR2]] = { nounwind }
; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 1472004f0d7c2..0d6b8102b343c 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
attributes #0 = { nounwind readnone }
attributes #1 = { "uniform-work-group-size" }
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
-; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size" }
+; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
+; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "uniform-work-group-size" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "uniform-work-group-size" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index 333a9993a89d6..dae9bcc70ed61 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -60,5 +60,5 @@ define amdgpu_kernel void @kernel3() {
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" }
;.
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll
index 8d3386031e75d..4e0b4b64c9da2 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/AMDGPU/nontrivial-unswitch-divergent-target.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; RUN: opt -mtriple=amdgcn-- -passes='loop(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -passes='loop-mssa(simple-loop-unswitch<nontrivial>),verify<loops>' -S < %s | FileCheck %s
; RUN: opt -mtriple=amdgcn-- -passes='simple-loop-unswitch<nontrivial>' -verify-memoryssa -S < %s | FileCheck %s
@@ -10,54 +11,56 @@ declare i32 @c()
; conditions to unswitch within the loop. The conditions are divergent
; and should not unswitch.
define void @test1(ptr %ptr, i1 %cond1, i1 %cond2) {
-; CHECK-LABEL: @test1(
+; CHECK-LABEL: define void @test1(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[LOOP_BEGIN:.*]]
+; CHECK: [[LOOP_BEGIN]]:
+; CHECK-NEXT: br i1 [[COND1]], label %[[LOOP_A:.*]], label %[[LOOP_B:.*]]
+; CHECK: [[LOOP_A]]:
+; CHECK-NEXT: [[UNUSED_A:%.*]] = call i32 @a()
+; CHECK-NEXT: br label %[[LATCH:.*]]
+; CHECK: [[LOOP_B]]:
+; CHECK-NEXT: br i1 [[COND2]], label %[[LOOP_B_A:.*]], label %[[LOOP_B_B:.*]]
+; CHECK: [[LOOP_B_A]]:
+; CHECK-NEXT: [[UNUSED_B:%.*]] = call i32 @b()
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LOOP_B_B]]:
+; CHECK-NEXT: [[UNUSED_C:%.*]] = call i32 @c()
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LATCH]]:
+; CHECK-NEXT: [[V:%.*]] = load i1, ptr [[PTR]], align 1
+; CHECK-NEXT: br i1 [[V]], label %[[LOOP_BEGIN]], label %[[LOOP_EXIT:.*]]
+; CHECK: [[LOOP_EXIT]]:
+; CHECK-NEXT: ret void
+;
entry:
br label %loop_begin
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %loop_begin
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
-; CHECK: loop_begin:
-; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
loop_a:
%unused.a = call i32 @a()
br label %latch
-; CHECK: loop_a:
-; CHECK-NEXT: %unused.a = call i32 @a()
-; CHECK-NEXT: br label %latch
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
-; CHECK: loop_b:
-; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
loop_b_a:
%unused.b = call i32 @b()
br label %latch
-; CHECK: loop_b_a:
-; CHECK-NEXT: %unused.b = call i32 @b()
-; CHECK-NEXT: br label %latch
loop_b_b:
%unused.c = call i32 @c()
br label %latch
-; CHECK: loop_b_b:
-; CHECK-NEXT: %unused.c = call i32 @c()
-; CHECK-NEXT: br label %latch
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
-; CHECK: latch:
-; CHECK-NEXT: %v = load i1, ptr %ptr
-; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
-; CHECK: loop_exit:
-; CHECK-NEXT: ret void
}
; Non-trivial loop unswitching where there are two distinct trivial
@@ -65,65 +68,87 @@ loop_exit:
; be uniform, so it should be unswitchable. However, unswitch
; currently does not make use of UniformityAnalysis.
define amdgpu_kernel void @test1_uniform(ptr %ptr, i1 %cond1, i1 %cond2) {
-; CHECK-LABEL: @test1_uniform(
+; CHECK-LABEL: define amdgpu_kernel void @test1_uniform(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[LOOP_BEGIN:.*]]
+; CHECK: [[LOOP_BEGIN]]:
+; CHECK-NEXT: br i1 [[COND1]], label %[[LOOP_A:.*]], label %[[LOOP_B:.*]]
+; CHECK: [[LOOP_A]]:
+; CHECK-NEXT: [[UNUSED_A:%.*]] = call i32 @a()
+; CHECK-NEXT: br label %[[LATCH:.*]]
+; CHECK: [[LOOP_B]]:
+; CHECK-NEXT: br i1 [[COND2]], label %[[LOOP_B_A:.*]], label %[[LOOP_B_B:.*]]
+; CHECK: [[LOOP_B_A]]:
+; CHECK-NEXT: [[UNUSED_B:%.*]] = call i32 @b()
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LOOP_B_B]]:
+; CHECK-NEXT: [[UNUSED_C:%.*]] = call i32 @c()
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LATCH]]:
+; CHECK-NEXT: [[V:%.*]] = load i1, ptr [[PTR]], align 1
+; CHECK-NEXT: br i1 [[V]], label %[[LOOP_BEGIN]], label %[[LOOP_EXIT:.*]]
+; CHECK: [[LOOP_EXIT]]:
+; CHECK-NEXT: ret void
+;
entry:
br label %loop_begin
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br label %loop_begin
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
-; CHECK: loop_begin:
-; CHECK-NEXT: br i1 %cond1, label %loop_a, label %loop_b
loop_a:
%unused.a = call i32 @a()
br label %latch
-; CHECK: loop_a:
-; CHECK-NEXT: %unused.a = call i32 @a()
-; CHECK-NEXT: br label %latch
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
-; CHECK: loop_b:
-; CHECK-NEXT: br i1 %cond2, label %loop_b_a, label %loop_b_b
loop_b_a:
%unused.b = call i32 @b()
br label %latch
-; CHECK: loop_b_a:
-; CHECK-NEXT: %unused.b = call i32 @b()
-; CHECK-NEXT: br label %latch
loop_b_b:
%unused.c = call i32 @c()
br label %latch
-; CHECK: loop_b_b:
-; CHECK-NEXT: %unused.c = call i32 @c()
-; CHECK-NEXT: br label %latch
latch:
%v = load i1, ptr %ptr
br i1 %v, label %loop_begin, label %loop_exit
-; CHECK: latch:
-; CHECK-NEXT: %v = load i1, ptr %ptr
-; CHECK-NEXT: br i1 %v, label %loop_begin, label %loop_exit
loop_exit:
ret void
-; CHECK: loop_exit:
-; CHECK-NEXT: ret void
}
; Non-trivial loop unswitching where there are two distinct trivial
; conditions to unswitch within the loop. There is no divergence
; because it's assumed it can only execute with a workgroup of size 1.
define void @test1_single_lane_execution(ptr %ptr, i1 %cond1, i1 %cond2) #0 {
-; CHECK-LABEL: @test1_single_lane_execution(
+; CHECK-LABEL: define void @test1_single_lane_execution(
+; CHECK-SAME: ptr [[PTR:%.*]], i1 [[COND1:%.*]], i1 [[COND2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br label %[[LOOP_BEGIN:.*]]
+; CHECK: [[LOOP_BEGIN]]:
+; CHECK-NEXT: br i1 [[COND1]], label %[[LOOP_A:.*]], label %[[LOOP_B:.*]]
+; CHECK: [[LOOP_A]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call i32 @a()
+; CHECK-NEXT: br label %[[LATCH:.*]]
+; CHECK: [[LOOP_B]]:
+; CHECK-NEXT: br i1 [[COND2]], label %[[LOOP_B_A:.*]], label %[[LOOP_B_B:.*]]
+; CHECK: [[LOOP_B_A]]:
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @b()
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LOOP_B_B]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @c()
+; CHECK-NEXT: br label %[[LATCH]]
+; CHECK: [[LATCH]]:
+; CHECK-NEXT: [[V:%.*]] = load i1, ptr [[PTR]], align 1
+; CHECK-NEXT: br i1 [[V]], label %[[LOOP_BEGIN]], label %[[LOOP_EXIT:.*]]
+; CHECK: [[LOOP_EXIT]]:
+; CHECK-NEXT: ret void
+;
entry:
br label %loop_begin
-; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 %cond1, label %entry.split.us, label %entry.split
loop_begin:
br i1 %cond1, label %loop_a, label %loop_b
@@ -132,80 +157,20 @@ loop_a:
call i32 @a()
br label %latch
; The 'loop_a' unswitched loop.
-;
-; CHECK: entry.split.us:
-; CHECK-NEXT: br label %loop_begin.us
-;
-; CHECK: loop_begin.us:
-; CHECK-NEXT: br label %loop_a.us
-;
-; CHECK: loop_a.us:
-; CHECK-NEXT: call i32 @a()
-; CHECK-NEXT: br label %latch.us
-;
-; CHECK: latch.us:
-; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
-; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us, label %loop_exit.split.us
-;
-; CHECK: loop_exit.split.us:
-; CHECK-NEXT: br label %loop_exit
loop_b:
br i1 %cond2, label %loop_b_a, label %loop_b_b
; The second unswitched condition.
-;
-; CHECK: entry.split:
-; CHECK-NEXT: br i1 %cond2, label %entry.split.split.us, label %entry.split.split
loop_b_a:
call i32 @b()
br label %latch
; The 'loop_b_a' unswitched loop.
-;
-; CHECK: entry.split.split.us:
-; CHECK-NEXT: br label %loop_begin.us1
-;
-; CHECK: loop_begin.us1:
-; CHECK-NEXT: br label %loop_b.us
-;
-; CHECK: loop_b.us:
-; CHECK-NEXT: br label %loop_b_a.us
-;
-; CHECK: loop_b_a.us:
-; CHECK-NEXT: call i32 @b()
-; CHECK-NEXT: br label %latch.us2
-;
-; CHECK: latch.us2:
-; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
-; CHECK-NEXT: br i1 %[[V]], label %loop_begin.us1, label %loop_exit.split.split.us
-;
-; CHECK: loop_exit.split.split.us:
-; CHECK-NEXT: br label %loop_exit.split
loop_b_b:
call i32 @c()
br label %latch
; The 'loop_b_b' unswitched loop.
-;
-; CHECK: entry.split.split:
-; CHECK-NEXT: br label %loop_begin
-;
-; CHECK: loop_begin:
-; CHECK-NEXT: br label %loop_b
-;
-; CHECK: loop_b:
-; CHECK-NEXT: br label %loop_b_b
-;
-; CHECK: loop_b_b:
-; CHECK-NEXT: call i32 @c()
-; CHECK-NEXT: br label %latch
-;
-; CHECK: latch:
-; CHECK-NEXT: %[[V:.*]] = load i1, ptr %ptr
-; CHECK-NEXT: br i1 %[[V]], label %loop_begin, label %loop_exit.split.split
-;
-; CHECK: loop_exit.split.split:
-; CHECK-NEXT: br label %loop_exit.split
latch:
%v = load i1, ptr %ptr
@@ -213,11 +178,6 @@ latch:
loop_exit:
ret void
-; CHECK: loop_exit.split:
-; CHECK-NEXT: br label %loop_exit
-;
-; CHECK: loop_exit:
-; CHECK-NEXT: ret
}
attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }
diff --git a/llvm/test/Transforms/SpeculativeExecution/single-lane-execution.ll b/llvm/test/Transforms/SpeculativeExecution/single-lane-execution.ll
index 7f881d0342b5d..ca517d4b91a46 100644
--- a/llvm/test/Transforms/SpeculativeExecution/single-lane-execution.ll
+++ b/llvm/test/Transforms/SpeculativeExecution/single-lane-execution.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
; REQUIRES: amdgpu-registered-target
; RUN: opt -S -passes=speculative-execution -mtriple=amdgcn-- \
; RUN: -spec-exec-only-if-divergent-target \
@@ -6,19 +7,21 @@
; Hoist in if-then pattern.
define void @skip_single_lane_ifThen() #0 {
-; CHECK-LABEL: @skip_single_lane_ifThen(
-; CHECK: br i1 true
+; CHECK-LABEL: define void @skip_single_lane_ifThen(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[X:%.*]] = add i32 2, 3
+; CHECK-NEXT: br i1 true, label %[[A:.*]], label %[[B:.*]]
+; CHECK: [[A]]:
+; CHECK-NEXT: br label %[[B]]
+; CHECK: [[B]]:
+; CHECK-NEXT: ret void
+;
br i1 true, label %a, label %b
-; CHECK: a:
-; CHECK: %x = add i32 2, 3
a:
%x = add i32 2, 3
-; CHECK: br label
br label %b
-; CHECK: b:
b:
-; CHECK: ret void
ret void
}
>From f990a465917702dc12dcceb8eb8eb851dfe6b553 Mon Sep 17 00:00:00 2001
From: Domenic Nutile <domenic.nutile at gmail.com>
Date: Mon, 30 Mar 2026 15:41:13 -0400
Subject: [PATCH 5/6] Refactor testing from feedback, add specific testing
around new attribute generation
---
...mdgpu-uniform-intrinsic-wwm-single-lane.ll | 34 +++---
llvm/test/CodeGen/AMDGPU/attributor-wwm.ll | 108 ++++++++++++++++++
.../JumpThreading/divergent-target-test.ll | 2 +-
3 files changed, 124 insertions(+), 20 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/attributor-wwm.ll
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
index 70b6ef01f4a36..89a53a6e41cad 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-uniform-intrinsic-wwm-single-lane.ll
@@ -3,28 +3,24 @@
; If uniformity analysis sees that the workgroup size is 1, it would say this function is trivially uniform.
; However, the function's use of wwm means that all lanes will be active even if the workgroup size is 1, so
; it should not be considered uniform, and we should not see the permlane64 optimized out.
-define amdgpu_kernel void @kernel(i32 inreg noundef %input) #0 {
+define amdgpu_kernel void @kernel(ptr addrspace(4) %input, ptr addrspace(1) %out) #0 {
; CHECK-LABEL: define amdgpu_kernel void @kernel(
; CHECK: call float @llvm.amdgcn.permlane64.f32
.entry:
- %0 = call i64 @llvm.amdgcn.s.getpc()
- %1 = and i64 %0, -4294967296
- %2 = zext i32 %input to i64
- %3 = or disjoint i64 %1, %2
- %4 = inttoptr i64 %3 to ptr addrspace(4)
- %5 = getelementptr i8, ptr addrspace(4) %4, i64 16
- %6 = load <4 x i32>, ptr addrspace(4) %5, align 16
- %7 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
- %8 = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %7)
- %9 = shl i32 %8, 2
- %10 = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %6, i32 %9, i32 0)
- %11 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %10, i32 0)
- %12 = bitcast i32 %11 to float
- %13 = call float @llvm.amdgcn.permlane64.f32(float %12)
- %14 = fadd float %13, %12
- %15 = call float @llvm.amdgcn.readfirstlane.f32(float %14)
- %16 = bitcast float %15 to i32
- %17 = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %16)
+ %element_ptr = getelementptr i8, ptr addrspace(4) %input, i64 16
+ %buffer = load <4 x i32>, ptr addrspace(4) %element_ptr, align 16
+ %lane_id_lo = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0)
+ %lane_id = call i32 @llvm.amdgcn.mbcnt.hi(i32 -1, i32 %lane_id_lo)
+ %lane_id_idx = shl i32 %lane_id, 2
+ %vals = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %buffer, i32 %lane_id_idx, i32 0)
+ %vals_inactive_zeroed = call i32 @llvm.amdgcn.set.inactive.i32(i32 %vals, i32 0)
+ %float_vals = bitcast i32 %vals_inactive_zeroed to float
+ %swapped_vals = call float @llvm.amdgcn.permlane64.f32(float %float_vals)
+ %sum = fadd float %swapped_vals, %float_vals
+ %res = call float @llvm.amdgcn.readfirstlane.f32(float %sum)
+ %res_i32 = bitcast float %res to i32
+ %res_i32_wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %res_i32)
+ store i32 %res_i32_wwm, ptr addrspace(1) %out
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-wwm.ll b/llvm/test/CodeGen/AMDGPU/attributor-wwm.ll
new file mode 100644
index 0000000000000..6788ba265d02f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/attributor-wwm.ll
@@ -0,0 +1,108 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
+; Test the generation of the attribute amdgpu-no-wwm
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX10 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX11 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX12 %s
+
+define amdgpu_kernel void @test_no_wwm(i32 %input, ptr addrspace(1) %out) {
+; GFX9-LABEL: define amdgpu_kernel void @test_no_wwm(
+; GFX9-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX9-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
+; GFX9-NEXT: ret void
+;
+; GFX10-LABEL: define amdgpu_kernel void @test_no_wwm(
+; GFX10-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX10-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
+; GFX10-NEXT: ret void
+;
+; GFX11-LABEL: define amdgpu_kernel void @test_no_wwm(
+; GFX11-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX11-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
+; GFX11-NEXT: ret void
+;
+; GFX12-LABEL: define amdgpu_kernel void @test_no_wwm(
+; GFX12-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; GFX12-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
+; GFX12-NEXT: ret void
+;
+ store i32 %input, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_old_wwm(i32 %input, ptr addrspace(1) %out) {
+; GFX9-LABEL: define amdgpu_kernel void @test_old_wwm(
+; GFX9-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
+; GFX9-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
+; GFX9-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; GFX9-NEXT: ret void
+;
+; GFX10-LABEL: define amdgpu_kernel void @test_old_wwm(
+; GFX10-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
+; GFX10-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
+; GFX10-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; GFX10-NEXT: ret void
+;
+; GFX11-LABEL: define amdgpu_kernel void @test_old_wwm(
+; GFX11-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
+; GFX11-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
+; GFX11-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; GFX11-NEXT: ret void
+;
+; GFX12-LABEL: define amdgpu_kernel void @test_old_wwm(
+; GFX12-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
+; GFX12-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
+; GFX12-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; GFX12-NEXT: ret void
+;
+ %wwm = call i32 @llvm.amdgcn.wwm.i32(i32 %input)
+ store i32 %wwm, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_strict_wwm(i32 %input, ptr addrspace(1) %out) {
+; GFX9-LABEL: define amdgpu_kernel void @test_strict_wwm(
+; GFX9-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; GFX9-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
+; GFX9-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; GFX9-NEXT: ret void
+;
+; GFX10-LABEL: define amdgpu_kernel void @test_strict_wwm(
+; GFX10-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; GFX10-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
+; GFX10-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; GFX10-NEXT: ret void
+;
+; GFX11-LABEL: define amdgpu_kernel void @test_strict_wwm(
+; GFX11-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; GFX11-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
+; GFX11-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; GFX11-NEXT: ret void
+;
+; GFX12-LABEL: define amdgpu_kernel void @test_strict_wwm(
+; GFX12-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; GFX12-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
+; GFX12-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; GFX12-NEXT: ret void
+;
+ %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %input)
+ store i32 %wwm, ptr addrspace(1) %out
+ ret void
+}
+;.
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
+;.
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
+;.
+; GFX11: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1100" }
+; GFX11: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1100" }
+; GFX11: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx1100" }
+;.
+; GFX12: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1200" }
+; GFX12: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1200" }
+; GFX12: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx1200" }
+;.
diff --git a/llvm/test/Transforms/JumpThreading/divergent-target-test.ll b/llvm/test/Transforms/JumpThreading/divergent-target-test.ll
index 5e505aa695f07..2e3fa50a1f7f9 100644
--- a/llvm/test/Transforms/JumpThreading/divergent-target-test.ll
+++ b/llvm/test/Transforms/JumpThreading/divergent-target-test.ll
@@ -80,4 +80,4 @@ F2:
ret i32 %B
}
-attributes #0 = { "amdgpu-flat-work-group-size"="1,1" }
+attributes #0 = { "amdgpu-flat-work-group-size"="1,1" "amdgpu-no-wwm" }
>From 80d75d306e6d1731255da9a41c6def1ee5d8806f Mon Sep 17 00:00:00 2001
From: Domenic Nutile <domenic.nutile at gmail.com>
Date: Wed, 1 Apr 2026 12:55:43 -0400
Subject: [PATCH 6/6] Only have test for one subtarget, check nested call of
wwm stil prevents no-wwm attribute
---
llvm/test/CodeGen/AMDGPU/attributor-wwm.ll | 124 +++++++--------------
1 file changed, 40 insertions(+), 84 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-wwm.ll b/llvm/test/CodeGen/AMDGPU/attributor-wwm.ll
index 6788ba265d02f..a5717bfec3b5a 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-wwm.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-wwm.ll
@@ -1,59 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5
; Test the generation of the attribute amdgpu-no-wwm
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX9 %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX10 %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX11 %s
-; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=GFX12 %s
+; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -passes=amdgpu-attributor < %s | FileCheck -check-prefixes=CHECK %s
define amdgpu_kernel void @test_no_wwm(i32 %input, ptr addrspace(1) %out) {
-; GFX9-LABEL: define amdgpu_kernel void @test_no_wwm(
-; GFX9-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
-; GFX9-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
-; GFX9-NEXT: ret void
-;
-; GFX10-LABEL: define amdgpu_kernel void @test_no_wwm(
-; GFX10-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
-; GFX10-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
-; GFX10-NEXT: ret void
-;
-; GFX11-LABEL: define amdgpu_kernel void @test_no_wwm(
-; GFX11-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
-; GFX11-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
-; GFX11-NEXT: ret void
-;
-; GFX12-LABEL: define amdgpu_kernel void @test_no_wwm(
-; GFX12-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
-; GFX12-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
-; GFX12-NEXT: ret void
+; CHECK-LABEL: define amdgpu_kernel void @test_no_wwm(
+; CHECK-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: store i32 [[INPUT]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
;
store i32 %input, ptr addrspace(1) %out
ret void
}
define amdgpu_kernel void @test_old_wwm(i32 %input, ptr addrspace(1) %out) {
-; GFX9-LABEL: define amdgpu_kernel void @test_old_wwm(
-; GFX9-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
-; GFX9-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
-; GFX9-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
-; GFX9-NEXT: ret void
-;
-; GFX10-LABEL: define amdgpu_kernel void @test_old_wwm(
-; GFX10-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
-; GFX10-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
-; GFX10-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
-; GFX10-NEXT: ret void
-;
-; GFX11-LABEL: define amdgpu_kernel void @test_old_wwm(
-; GFX11-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
-; GFX11-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
-; GFX11-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
-; GFX11-NEXT: ret void
-;
-; GFX12-LABEL: define amdgpu_kernel void @test_old_wwm(
-; GFX12-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
-; GFX12-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
-; GFX12-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
-; GFX12-NEXT: ret void
+; CHECK-LABEL: define amdgpu_kernel void @test_old_wwm(
+; CHECK-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
+; CHECK-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
;
%wwm = call i32 @llvm.amdgcn.wwm.i32(i32 %input)
store i32 %wwm, ptr addrspace(1) %out
@@ -61,48 +25,40 @@ define amdgpu_kernel void @test_old_wwm(i32 %input, ptr addrspace(1) %out) {
}
define amdgpu_kernel void @test_strict_wwm(i32 %input, ptr addrspace(1) %out) {
-; GFX9-LABEL: define amdgpu_kernel void @test_strict_wwm(
-; GFX9-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
-; GFX9-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
-; GFX9-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
-; GFX9-NEXT: ret void
-;
-; GFX10-LABEL: define amdgpu_kernel void @test_strict_wwm(
-; GFX10-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
-; GFX10-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
-; GFX10-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
-; GFX10-NEXT: ret void
-;
-; GFX11-LABEL: define amdgpu_kernel void @test_strict_wwm(
-; GFX11-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
-; GFX11-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
-; GFX11-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
-; GFX11-NEXT: ret void
-;
-; GFX12-LABEL: define amdgpu_kernel void @test_strict_wwm(
-; GFX12-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
-; GFX12-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
-; GFX12-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
-; GFX12-NEXT: ret void
+; CHECK-LABEL: define amdgpu_kernel void @test_strict_wwm(
+; CHECK-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.strict.wwm.i32(i32 [[INPUT]])
+; CHECK-NEXT: store i32 [[WWM]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
;
%wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %input)
store i32 %wwm, ptr addrspace(1) %out
ret void
}
+
+define i32 @test_calls_wwm(i32 %input) {
+; CHECK-LABEL: define i32 @test_calls_wwm(
+; CHECK-SAME: i32 [[INPUT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[WWM:%.*]] = call i32 @llvm.amdgcn.wwm.i32(i32 [[INPUT]])
+; CHECK-NEXT: ret i32 [[WWM]]
+;
+ %wwm = call i32 @llvm.amdgcn.wwm.i32(i32 %input)
+ ret i32 %wwm
+}
+
+define amdgpu_kernel void @test_nested(i32 %input, ptr addrspace(1) %out) {
+; CHECK-LABEL: define amdgpu_kernel void @test_no_propagate(
+; CHECK-SAME: i32 [[INPUT:%.*]], ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[RES:%.*]] = call i32 @test_calls_wwm(i32 [[INPUT]])
+; CHECK-NEXT: store i32 [[RES]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: ret void
+;
+ %res = call i32 @test_calls_wwm(i32 %input)
+ store i32 %res, ptr addrspace(1) %out
+ ret void
+}
;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx900" }
-; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" }
-; GFX9: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
-;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1010" }
-; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" }
-; GFX10: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
-;.
-; GFX11: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1100" }
-; GFX11: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1100" }
-; GFX11: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx1100" }
-;.
-; GFX12: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1200" }
-; GFX12: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1200" }
-; GFX12: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx1200" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-no-wwm" "target-cpu"="gfx1200" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1200" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nounwind speculatable willreturn memory(none) "target-cpu"="gfx1200" }
;.
More information about the llvm-commits
mailing list