[llvm] [AMDGPU] SelDAG: fix lowering of undefined workitem intrinsics (PR #126058)
Robert Imschweiler via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 13:02:59 PST 2025
https://github.com/ro-i updated https://github.com/llvm/llvm-project/pull/126058
>From 9c8af8a0a76019069d04b93608ac0ebd4e2df234 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 5 Feb 2025 08:02:43 -0600
Subject: [PATCH 1/4] [AMDGPU] SelDAG: fix lowering of undefined workitem
intrinsics
GlobalISel already handles undefined workitem.id.{x,y,z} intrinsics, SelDAG failed in AMDGPUISelLowering.cpp due to a failed assertion in `AMDGPUTargetLowering::loadInputValue`: `Arg && "Attempting to load missing argument"`.
This commit changes the behavior of SelDAG to instead use a zero constant.
This LLVM defect was identified via the AMD Fuzzing project.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 23 ++++++++++--
.../llvm.amdgcn.workitem.id-undefined-attr.ll | 36 +++++++++++++++++++
2 files changed, 56 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index b632c50dae0e3..47654890ff50e 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8790,11 +8790,28 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
}
case Intrinsic::amdgcn_workitem_id_x:
- return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
+ if (!MFI->getArgInfo().WorkItemIDX) {
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ return DAG.getConstant(0, SDLoc(Op),
+ EVT::getIntegerVT(*DAG.getContext(), 32));
+ } else {
+ return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
+ }
case Intrinsic::amdgcn_workitem_id_y:
- return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
+ if (!MFI->getArgInfo().WorkItemIDY) {
+ return DAG.getConstant(0, SDLoc(Op),
+ EVT::getIntegerVT(*DAG.getContext(), 32));
+ } else {
+ return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
+ }
case Intrinsic::amdgcn_workitem_id_z:
- return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
+ if (!MFI->getArgInfo().WorkItemIDZ) {
+ return DAG.getConstant(0, SDLoc(Op),
+ EVT::getIntegerVT(*DAG.getContext(), 32));
+ } else {
+ return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
+ }
case Intrinsic::amdgcn_wavefrontsize:
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
SDLoc(Op), MVT::i32);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
new file mode 100644
index 0000000000000..0e6a448fcae5a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 --global-isel=false -o - %s | FileCheck %s
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+declare i32 @llvm.amdgcn.workitem.id.z() #0
+
+define amdgpu_ps i32 @test_workitem_id_x() #1 {
+; CHECK-LABEL: test_workitem_id_x:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: ; return to shader part epilog
+ %id = call i32 @llvm.amdgcn.workitem.id.x()
+ ret i32 %id
+}
+
+define amdgpu_ps i32 @test_workitem_id_y() #1 {
+; CHECK-LABEL: test_workitem_id_y:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: ; return to shader part epilog
+ %id = call i32 @llvm.amdgcn.workitem.id.y()
+ ret i32 %id
+}
+
+define amdgpu_ps i32 @test_workitem_id_z() #1 {
+; CHECK-LABEL: test_workitem_id_z:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: s_mov_b32 s0, 0
+; CHECK-NEXT: ; return to shader part epilog
+ %id = call i32 @llvm.amdgcn.workitem.id.z()
+ ret i32 %id
+}
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }
>From b9210231f10c4929d77f3e1fd4fa8521710a7dff Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Fri, 7 Feb 2025 05:11:20 -0600
Subject: [PATCH 2/4] implement feedback
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 28 +++++----------
.../GlobalISel/llvm.amdgcn.workitem.id.ll | 25 +++++++++++++
.../llvm.amdgcn.workitem.id-undefined-attr.ll | 36 -------------------
.../CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll | 25 +++++++++++++
4 files changed, 58 insertions(+), 56 deletions(-)
delete mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 47654890ff50e..39c95bacad26b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8619,6 +8619,11 @@ SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
if (MaxID == 0)
return DAG.getConstant(0, SL, MVT::i32);
+ // It's undefined behavior if a function marked with the amdgpu-no-*
+ // attributes uses the corresponding intrinsic.
+ if (!Arg)
+ return DAG.getUNDEF(EVT::getIntegerVT(*DAG.getContext(), 32));
+
SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()), Arg);
@@ -8790,28 +8795,11 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
AMDGPUFunctionArgInfo::LDS_KERNEL_ID);
}
case Intrinsic::amdgcn_workitem_id_x:
- if (!MFI->getArgInfo().WorkItemIDX) {
- // It's undefined behavior if a function marked with the amdgpu-no-*
- // attributes uses the corresponding intrinsic.
- return DAG.getConstant(0, SDLoc(Op),
- EVT::getIntegerVT(*DAG.getContext(), 32));
- } else {
- return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
- }
+ return lowerWorkitemID(DAG, Op, 0, MFI->getArgInfo().WorkItemIDX);
case Intrinsic::amdgcn_workitem_id_y:
- if (!MFI->getArgInfo().WorkItemIDY) {
- return DAG.getConstant(0, SDLoc(Op),
- EVT::getIntegerVT(*DAG.getContext(), 32));
- } else {
- return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
- }
+ return lowerWorkitemID(DAG, Op, 1, MFI->getArgInfo().WorkItemIDY);
case Intrinsic::amdgcn_workitem_id_z:
- if (!MFI->getArgInfo().WorkItemIDZ) {
- return DAG.getConstant(0, SDLoc(Op),
- EVT::getIntegerVT(*DAG.getContext(), 32));
- } else {
- return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
- }
+ return lowerWorkitemID(DAG, Op, 2, MFI->getArgInfo().WorkItemIDZ);
case Intrinsic::amdgcn_wavefrontsize:
return DAG.getConstant(MF.getSubtarget<GCNSubtarget>().getWavefrontSize(),
SDLoc(Op), MVT::i32);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index d5646820a1983..eac575742cf0e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -9,6 +9,7 @@
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v6.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -o - %t.v4.ll | FileCheck --check-prefixes=UNDEF %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -195,6 +196,30 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_
ret void
}
+define amdgpu_kernel void @undefined_workitem_x_only() {
+; UNDEF-LABEL: undefined_workitem_x_only:
+; UNDEF: ; %bb.0:
+; UNDEF-NEXT: s_endpgm
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ ret void
+}
+
+define amdgpu_kernel void @undefined_workitem_y_only() {
+; UNDEF-LABEL: undefined_workitem_y_only:
+; UNDEF: ; %bb.0:
+; UNDEF-NEXT: s_endpgm
+ %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+ ret void
+}
+
+define amdgpu_kernel void @undefined_workitem_z_only() {
+; UNDEF-LABEL: undefined_workitem_z_only:
+; UNDEF: ; %bb.0:
+; UNDEF-NEXT: s_endpgm
+ %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
deleted file mode 100644
index 0e6a448fcae5a..0000000000000
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-undefined-attr.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 --global-isel=false -o - %s | FileCheck %s
-
-declare i32 @llvm.amdgcn.workitem.id.x() #0
-declare i32 @llvm.amdgcn.workitem.id.y() #0
-declare i32 @llvm.amdgcn.workitem.id.z() #0
-
-define amdgpu_ps i32 @test_workitem_id_x() #1 {
-; CHECK-LABEL: test_workitem_id_x:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: ; return to shader part epilog
- %id = call i32 @llvm.amdgcn.workitem.id.x()
- ret i32 %id
-}
-
-define amdgpu_ps i32 @test_workitem_id_y() #1 {
-; CHECK-LABEL: test_workitem_id_y:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: ; return to shader part epilog
- %id = call i32 @llvm.amdgcn.workitem.id.y()
- ret i32 %id
-}
-
-define amdgpu_ps i32 @test_workitem_id_z() #1 {
-; CHECK-LABEL: test_workitem_id_z:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: s_mov_b32 s0, 0
-; CHECK-NEXT: ; return to shader part epilog
- %id = call i32 @llvm.amdgcn.workitem.id.z()
- ret i32 %id
-}
-
-attributes #0 = { nounwind readnone }
-attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
index eaee8ec73fe41..af46f0c02eda2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
@@ -5,6 +5,7 @@
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global < %t.bc | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %t.bc | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %t.bc | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -o - %s | FileCheck --check-prefix=UNDEF %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -128,6 +129,30 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_
ret void
}
+define amdgpu_kernel void @undefined_workitem_x_only() {
+; UNDEF-LABEL: undefined_workitem_x_only:
+; UNDEF: ; %bb.0:
+; UNDEF-NEXT: s_endpgm
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ ret void
+}
+
+define amdgpu_kernel void @undefined_workitem_y_only() {
+; UNDEF-LABEL: undefined_workitem_y_only:
+; UNDEF: ; %bb.0:
+; UNDEF-NEXT: s_endpgm
+ %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+ ret void
+}
+
+define amdgpu_kernel void @undefined_workitem_z_only() {
+; UNDEF-LABEL: undefined_workitem_z_only:
+; UNDEF: ; %bb.0:
+; UNDEF-NEXT: s_endpgm
+ %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+ ret void
+}
+
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
>From 6ee6adbb56128f26ea9262c59cae346ea55b8190 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Fri, 7 Feb 2025 14:26:08 -0600
Subject: [PATCH 3/4] implement feedback
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 2 +-
.../GlobalISel/llvm.amdgcn.workitem.id.ll | 25 --------
...kitem.id-unsupported-calling-convention.ll | 61 +++++++++++++++++++
.../CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll | 25 --------
4 files changed, 62 insertions(+), 51 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 39c95bacad26b..28debbcfc1ede 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -8622,7 +8622,7 @@ SDValue SITargetLowering::lowerWorkitemID(SelectionDAG &DAG, SDValue Op,
// It's undefined behavior if a function marked with the amdgpu-no-*
// attributes uses the corresponding intrinsic.
if (!Arg)
- return DAG.getUNDEF(EVT::getIntegerVT(*DAG.getContext(), 32));
+ return DAG.getUNDEF(Op->getValueType(0));
SDValue Val = loadInputValue(DAG, &AMDGPU::VGPR_32RegClass, MVT::i32,
SDLoc(DAG.getEntryNode()), Arg);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index eac575742cf0e..d5646820a1983 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -9,7 +9,6 @@
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v4.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %t.v6.ll | FileCheck -check-prefixes=ALL,PACKED-TID %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -o - %t.v4.ll | FileCheck --check-prefixes=UNDEF %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -196,30 +195,6 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_
ret void
}
-define amdgpu_kernel void @undefined_workitem_x_only() {
-; UNDEF-LABEL: undefined_workitem_x_only:
-; UNDEF: ; %bb.0:
-; UNDEF-NEXT: s_endpgm
- %id.x = call i32 @llvm.amdgcn.workitem.id.x()
- ret void
-}
-
-define amdgpu_kernel void @undefined_workitem_y_only() {
-; UNDEF-LABEL: undefined_workitem_y_only:
-; UNDEF: ; %bb.0:
-; UNDEF-NEXT: s_endpgm
- %id.y = call i32 @llvm.amdgcn.workitem.id.y()
- ret void
-}
-
-define amdgpu_kernel void @undefined_workitem_z_only() {
-; UNDEF-LABEL: undefined_workitem_z_only:
-; UNDEF: ; %bb.0:
-; UNDEF-NEXT: s_endpgm
- %id.z = call i32 @llvm.amdgcn.workitem.id.z()
- ret void
-}
-
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll
new file mode 100644
index 0000000000000..4f914455809a4
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 -stop-after=si-fix-sgpr-copies -o - %s | FileCheck --check-prefix=SelDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=legalizer -o - %s | FileCheck --check-prefix=GlobalISel %s
+
+declare i32 @llvm.amdgcn.workitem.id.x()
+declare i32 @llvm.amdgcn.workitem.id.y()
+declare i32 @llvm.amdgcn.workitem.id.z()
+
+define amdgpu_ps void @undefined_workitems(ptr %p, ptr %q, ptr %r) {
+ ; SelDAG-LABEL: name: undefined_workitems
+ ; SelDAG: bb.0 (%ir-block.0):
+ ; SelDAG-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; SelDAG-NEXT: {{ $}}
+ ; SelDAG-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr5
+ ; SelDAG-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr4
+ ; SelDAG-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr3
+ ; SelDAG-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; SelDAG-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; SelDAG-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; SelDAG-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+ ; SelDAG-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; SelDAG-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; SelDAG-NEXT: [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[DEF8:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; SelDAG-NEXT: S_ENDPGM 0
+ ;
+ ; GlobalISel-LABEL: name: undefined_workitems
+ ; GlobalISel: bb.1 (%ir-block.0):
+ ; GlobalISel-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GlobalISel-NEXT: {{ $}}
+ ; GlobalISel-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GlobalISel-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GlobalISel-NEXT: [[MV:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GlobalISel-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GlobalISel-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GlobalISel-NEXT: [[MV1:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GlobalISel-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GlobalISel-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GlobalISel-NEXT: [[MV2:%[0-9]+]]:_(p0) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GlobalISel-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; GlobalISel-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+ ; GlobalISel-NEXT: G_STORE [[COPY6]](s32), [[MV]](p0) :: (store (s32) into %ir.p)
+ ; GlobalISel-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
+ ; GlobalISel-NEXT: G_STORE [[COPY7]](s32), [[MV1]](p0) :: (store (s32) into %ir.q)
+ ; GlobalISel-NEXT: G_STORE [[DEF]](s32), [[MV2]](p0) :: (store (s32) into %ir.r)
+ ; GlobalISel-NEXT: S_ENDPGM 0
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ store i32 %id.x, ptr %p
+ %id.y = call i32 @llvm.amdgcn.workitem.id.y()
+ store i32 %id.y, ptr %q
+ %id.z = call i32 @llvm.amdgcn.workitem.id.z()
+ store i32 %id.z, ptr %r
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
index af46f0c02eda2..eaee8ec73fe41 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id.ll
@@ -5,7 +5,6 @@
; RUN: llc -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -mattr=-flat-for-global < %t.bc | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %t.bc | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -amdgpu-enable-vopd=0 < %t.bc | FileCheck -check-prefixes=ALL,PACKED-TID %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -o - %s | FileCheck --check-prefix=UNDEF %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -129,30 +128,6 @@ define amdgpu_kernel void @test_reqd_workgroup_size_z_only(ptr %out) !reqd_work_
ret void
}
-define amdgpu_kernel void @undefined_workitem_x_only() {
-; UNDEF-LABEL: undefined_workitem_x_only:
-; UNDEF: ; %bb.0:
-; UNDEF-NEXT: s_endpgm
- %id.x = call i32 @llvm.amdgcn.workitem.id.x()
- ret void
-}
-
-define amdgpu_kernel void @undefined_workitem_y_only() {
-; UNDEF-LABEL: undefined_workitem_y_only:
-; UNDEF: ; %bb.0:
-; UNDEF-NEXT: s_endpgm
- %id.y = call i32 @llvm.amdgcn.workitem.id.y()
- ret void
-}
-
-define amdgpu_kernel void @undefined_workitem_z_only() {
-; UNDEF-LABEL: undefined_workitem_z_only:
-; UNDEF: ; %bb.0:
-; UNDEF-NEXT: s_endpgm
- %id.z = call i32 @llvm.amdgcn.workitem.id.z()
- ret void
-}
-
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
>From 7fed31497c70b5bef88fa521bc536f2df60e75d5 Mon Sep 17 00:00:00 2001
From: Robert Imschweiler <robert.imschweiler at amd.com>
Date: Wed, 12 Feb 2025 15:02:39 -0600
Subject: [PATCH 4/4] implement feedback
---
...kitem.id-unsupported-calling-convention.ll | 20 +++++++------------
1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll
index 4f914455809a4..684b59c66ee8e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.workitem.id-unsupported-calling-convention.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 -stop-after=si-fix-sgpr-copies -o - %s | FileCheck --check-prefix=SelDAG %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -O0 -stop-after=amdgpu-isel -o - %s | FileCheck --check-prefix=SelDAG %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stop-after=legalizer -o - %s | FileCheck --check-prefix=GlobalISel %s
declare i32 @llvm.amdgcn.workitem.id.x()
@@ -17,18 +17,12 @@ define amdgpu_ps void @undefined_workitems(ptr %p, ptr %q, ptr %r) {
; SelDAG-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; SelDAG-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; SelDAG-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr0
- ; SelDAG-NEXT: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; SelDAG-NEXT: [[DEF1:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; SelDAG-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
- ; SelDAG-NEXT: [[DEF2:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; SelDAG-NEXT: [[DEF3:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; SelDAG-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
- ; SelDAG-NEXT: [[DEF4:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; SelDAG-NEXT: [[DEF5:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
- ; SelDAG-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
- ; SelDAG-NEXT: [[DEF6:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; SelDAG-NEXT: [[DEF7:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
- ; SelDAG-NEXT: [[DEF8:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
+ ; SelDAG-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[COPY]], %subreg.sub1
+ ; SelDAG-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY3]], %subreg.sub0, [[COPY2]], %subreg.sub1
+ ; SelDAG-NEXT: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY5]], %subreg.sub0, [[COPY4]], %subreg.sub1
+ ; SelDAG-NEXT: [[COPY6:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE]]
+ ; SelDAG-NEXT: [[COPY7:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE1]]
+ ; SelDAG-NEXT: [[COPY8:%[0-9]+]]:vreg_64_align2 = COPY [[REG_SEQUENCE2]]
; SelDAG-NEXT: S_ENDPGM 0
;
; GlobalISel-LABEL: name: undefined_workitems
More information about the llvm-commits
mailing list