[llvm] [AMDGPU] Support arbitrary types in amdgcn.dead (PR #134841)
Diana Picus via llvm-commits
llvm-commits at lists.llvm.org
Tue Apr 8 04:25:41 PDT 2025
https://github.com/rovka created https://github.com/llvm/llvm-project/pull/134841
Legalize the amdgcn.dead intrinsic to work with types other than i32. It still generates IMPLICIT_DEFs.
Remove some of the previous code for selecting/reg bank mapping it for 32-bit types, since everything is done in the legalizer now.
>From 9971bf8ade79a4876c433b4ef7ac060857d72d3e Mon Sep 17 00:00:00 2001
From: Diana Picus <diana-magda.picus at amd.com>
Date: Thu, 3 Apr 2025 15:07:17 +0200
Subject: [PATCH] [AMDGPU] Support arbitrary types in amdgcn.dead
Legalize the amdgcn.dead intrinsic to work with types other than i32. It
still generates IMPLICIT_DEFs.
Remove some of the previous code for selecting/reg bank mapping it
for 32-bit types, since everything is done in the legalizer now.
---
.../AMDGPU/AMDGPUInstructionSelector.cpp | 6 -
.../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp | 6 +
.../Target/AMDGPU/AMDGPURegisterBankInfo.cpp | 1 -
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 11 +
llvm/lib/Target/AMDGPU/SIInstructions.td | 6 -
.../CodeGen/AMDGPU/legalize-amdgcn.dead.mir | 32 ++
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll | 384 +++++++++++++++++-
7 files changed, 430 insertions(+), 16 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6ef7505ec6f62..e6caffe61e705 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1191,12 +1191,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
case Intrinsic::amdgcn_permlane16_swap:
case Intrinsic::amdgcn_permlane32_swap:
return selectPermlaneSwapIntrin(I, IntrinsicID);
- case Intrinsic::amdgcn_dead: {
- I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
- I.removeOperand(1); // drop intrinsic ID
- return RBI.constrainGenericRegister(I.getOperand(0).getReg(),
- AMDGPU::VGPR_32RegClass, *MRI);
- }
default:
return selectImpl(I, *CoverageInfo);
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 275d0193452a5..5d35a15123d63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7651,6 +7651,12 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
return legalizeLaneOp(Helper, MI, IntrID);
case Intrinsic::amdgcn_s_buffer_prefetch_data:
return legalizeSBufferPrefetch(Helper, MI);
+ case Intrinsic::amdgcn_dead: {
+ for (const MachineOperand &Def : MI.defs())
+ B.buildUndef(Def);
+ MI.eraseFromParent();
+ return true;
+ }
default: {
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrID))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 1d0e81db5a5db..f38665ee81bda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4701,7 +4701,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
case Intrinsic::amdgcn_set_inactive_chain_arg:
case Intrinsic::amdgcn_permlane64:
case Intrinsic::amdgcn_ds_bpermute_fi_b32:
- case Intrinsic::amdgcn_dead:
return getDefaultMappingAllVGPR(MI);
case Intrinsic::amdgcn_cvt_pkrtz:
if (Subtarget.hasSALUFloatInsts() && isSALUMapping(MI))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 356040da95672..006717d141027 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6629,6 +6629,11 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(LoadVal);
return;
}
+ case Intrinsic::amdgcn_dead: {
+ for (unsigned I = 0, E = N->getNumValues(); I < E; ++I)
+ Results.push_back(DAG.getUNDEF(N->getValueType(I)));
+ return;
+ }
}
break;
}
@@ -9116,6 +9121,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::amdgcn_mov_dpp8:
case Intrinsic::amdgcn_update_dpp:
return lowerLaneOp(*this, Op.getNode(), DAG);
+ case Intrinsic::amdgcn_dead: {
+ SmallVector<SDValue, 8> Undefs;
+ for (unsigned I = 0, E = Op.getNode()->getNumValues(); I != E; ++I)
+ Undefs.push_back(DAG.getUNDEF(Op.getNode()->getValueType(I)));
+ return DAG.getMergeValues(Undefs, SDLoc(Op));
+ }
default:
if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 9051db0c01ed1..fe384b33911b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -4484,9 +4484,3 @@ def V_ILLEGAL : Enc32, InstSI<(outs), (ins), "v_illegal"> {
let hasSideEffects = 1;
let SubtargetPredicate = isGFX10Plus;
}
-
-// FIXME: Would be nice if we could set the register class for the destination
-// register too.
-def IMP_DEF_FROM_INTRINSIC: Pat<
- (i32 (int_amdgcn_dead)), (IMPLICIT_DEF)>;
-
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir
new file mode 100644
index 0000000000000..ec940f8d3b0b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir
@@ -0,0 +1,32 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amdpal -mcpu=gfx1200 -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: test_struct
+body: |
+ bb.1.entry:
+
+ ; CHECK-LABEL: name: test_struct
+ ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<3 x s32>)
+ ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
+ ; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32)
+ ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32)
+ ; CHECK-NEXT: $vgpr3 = COPY [[UV2]](s32)
+ ; CHECK-NEXT: $vgpr4_vgpr5 = COPY [[DEF2]](s64)
+ ; CHECK-NEXT: $vgpr6 = COPY [[DEF3]](<2 x s16>)
+ ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %0:_(s32), %1:_(<3 x s32>), %2:_(s64), %3:_(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.dead)
+
+ %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %1(<3 x s32>)
+ $vgpr0 = COPY %0(s32)
+ $vgpr1 = COPY %4(s32)
+ $vgpr2 = COPY %5(s32)
+ $vgpr3 = COPY %6(s32)
+ $vgpr4_vgpr5 = COPY %2(s64)
+ $vgpr6 = COPY %3(<2 x s16>)
+ SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll
index a009854542f21..ad3a316c4c91c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll
@@ -3,8 +3,8 @@
; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=ASM-GISEL %s
; Test that we can use v0 for temporaries in the if.then block.
-define i32 @dead(i1 %cond, i32 %x, ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2) #0 {
-; ASM-DAG-LABEL: dead:
+define i32 @dead_i32(i1 %cond, i32 %x, ptr addrspace(1) %ptr1) #0 {
+; ASM-DAG-LABEL: dead_i32:
; ASM-DAG: ; %bb.0: ; %entry
; ASM-DAG-NEXT: s_wait_loadcnt_dscnt 0x0
; ASM-DAG-NEXT: s_wait_expcnt 0x0
@@ -27,7 +27,7 @@ define i32 @dead(i1 %cond, i32 %x, ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr
; ASM-DAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
; ASM-DAG-NEXT: s_setpc_b64 s[30:31]
;
-; ASM-GISEL-LABEL: dead:
+; ASM-GISEL-LABEL: dead_i32:
; ASM-GISEL: ; %bb.0: ; %entry
; ASM-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; ASM-GISEL-NEXT: s_wait_expcnt 0x0
@@ -62,3 +62,381 @@ if.end:
%res = phi i32 [ %x, %entry ], [ %dead, %if.then ]
ret i32 %res
}
+
+%trivial_types = type { i32, float, <3 x i32>, i64, ptr addrspace(5), ptr addrspace(1), <4 x float>, { float, <2 x i16> } }
+
+define %trivial_types @dead_struct(i1 %cond, %trivial_types %x, ptr addrspace(1) %ptr1, i32 %v) #0 {
+; ASM-DAG-LABEL: dead_struct:
+; ASM-DAG: ; %bb.0: ; %entry
+; ASM-DAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; ASM-DAG-NEXT: s_wait_expcnt 0x0
+; ASM-DAG-NEXT: s_wait_samplecnt 0x0
+; ASM-DAG-NEXT: s_wait_bvhcnt 0x0
+; ASM-DAG-NEXT: s_wait_kmcnt 0x0
+; ASM-DAG-NEXT: v_mov_b32_e32 v20, v0
+; ASM-DAG-NEXT: v_mov_b32_e32 v0, v1
+; ASM-DAG-NEXT: s_mov_b32 s0, exec_lo
+; ASM-DAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; ASM-DAG-NEXT: v_and_b32_e32 v1, 1, v20
+; ASM-DAG-NEXT: v_cmpx_eq_u32_e32 1, v1
+; ASM-DAG-NEXT: s_cbranch_execz .LBB1_2
+; ASM-DAG-NEXT: ; %bb.1: ; %if.then
+; ASM-DAG-NEXT: v_dual_mov_b32 v11, 0 :: v_dual_add_nc_u32 v0, 15, v19
+; ASM-DAG-NEXT: v_mov_b32_e32 v2, 0x3fc00000
+; ASM-DAG-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; ASM-DAG-NEXT: ; implicit-def: $vgpr6_vgpr7
+; ASM-DAG-NEXT: ; implicit-def: $vgpr8
+; ASM-DAG-NEXT: ; implicit-def: $vgpr9_vgpr10
+; ASM-DAG-NEXT: ; implicit-def: $vgpr15
+; ASM-DAG-NEXT: ; implicit-def: $vgpr16
+; ASM-DAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; ASM-DAG-NEXT: v_dual_mov_b32 v12, v11 :: v_dual_mov_b32 v13, v11
+; ASM-DAG-NEXT: v_mov_b32_e32 v14, v11
+; ASM-DAG-NEXT: global_store_b32 v[17:18], v0, off
+; ASM-DAG-NEXT: ; implicit-def: $vgpr0
+; ASM-DAG-NEXT: .LBB1_2: ; %if.end
+; ASM-DAG-NEXT: s_wait_alu 0xfffe
+; ASM-DAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; ASM-DAG-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
+; ASM-DAG-NEXT: v_dual_mov_b32 v3, v4 :: v_dual_mov_b32 v4, v5
+; ASM-DAG-NEXT: v_dual_mov_b32 v5, v6 :: v_dual_mov_b32 v6, v7
+; ASM-DAG-NEXT: v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
+; ASM-DAG-NEXT: v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
+; ASM-DAG-NEXT: v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
+; ASM-DAG-NEXT: v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
+; ASM-DAG-NEXT: v_mov_b32_e32 v15, v16
+; ASM-DAG-NEXT: s_setpc_b64 s[30:31]
+;
+; ASM-GISEL-LABEL: dead_struct:
+; ASM-GISEL: ; %bb.0: ; %entry
+; ASM-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; ASM-GISEL-NEXT: s_wait_expcnt 0x0
+; ASM-GISEL-NEXT: s_wait_samplecnt 0x0
+; ASM-GISEL-NEXT: s_wait_bvhcnt 0x0
+; ASM-GISEL-NEXT: s_wait_kmcnt 0x0
+; ASM-GISEL-NEXT: v_mov_b32_e32 v20, v0
+; ASM-GISEL-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v2
+; ASM-GISEL-NEXT: s_mov_b32 s0, exec_lo
+; ASM-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; ASM-GISEL-NEXT: v_and_b32_e32 v2, 1, v20
+; ASM-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v2
+; ASM-GISEL-NEXT: s_cbranch_execz .LBB1_2
+; ASM-GISEL-NEXT: ; %bb.1: ; %if.then
+; ASM-GISEL-NEXT: s_mov_b32 s4, 0
+; ASM-GISEL-NEXT: s_mov_b32 s1, 0x3fc00000
+; ASM-GISEL-NEXT: s_wait_alu 0xfffe
+; ASM-GISEL-NEXT: s_mov_b32 s7, s4
+; ASM-GISEL-NEXT: s_mov_b32 s5, s4
+; ASM-GISEL-NEXT: s_mov_b32 s6, s4
+; ASM-GISEL-NEXT: s_wait_alu 0xfffe
+; ASM-GISEL-NEXT: v_dual_mov_b32 v14, s7 :: v_dual_mov_b32 v13, s6
+; ASM-GISEL-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_add_nc_u32 v0, 15, v19
+; ASM-GISEL-NEXT: v_dual_mov_b32 v12, s5 :: v_dual_mov_b32 v11, s4
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr3_vgpr4_vgpr5
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr6_vgpr7
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr8
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr9_vgpr10
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr15
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr16
+; ASM-GISEL-NEXT: global_store_b32 v[17:18], v0, off
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr0
+; ASM-GISEL-NEXT: .LBB1_2: ; %if.end
+; ASM-GISEL-NEXT: s_wait_alu 0xfffe
+; ASM-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; ASM-GISEL-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; ASM-GISEL-NEXT: v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6
+; ASM-GISEL-NEXT: v_dual_mov_b32 v6, v7 :: v_dual_mov_b32 v7, v8
+; ASM-GISEL-NEXT: v_dual_mov_b32 v8, v9 :: v_dual_mov_b32 v9, v10
+; ASM-GISEL-NEXT: v_dual_mov_b32 v10, v11 :: v_dual_mov_b32 v11, v12
+; ASM-GISEL-NEXT: v_dual_mov_b32 v12, v13 :: v_dual_mov_b32 v13, v14
+; ASM-GISEL-NEXT: v_dual_mov_b32 v14, v15 :: v_dual_mov_b32 v15, v16
+; ASM-GISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ br i1 %cond, label %if.then, label %if.end
+
+if.then:
+ %dead = call %trivial_types @llvm.amdgcn.dead.s_trivial_typess()
+ %dead_insert_1 = insertvalue %trivial_types %dead, float 1.5, 1
+ %dead_insert_3 = insertvalue %trivial_types %dead_insert_1, <4 x float> zeroinitializer, 6
+
+ %vgpr_use = add i32 %v, 15 ; may use v0 or one of the other implicit_defs
+ store i32 %vgpr_use, ptr addrspace(1) %ptr1
+
+ br label %if.end
+
+if.end:
+ %res = phi %trivial_types [ %x, %entry ], [ %dead_insert_3, %if.then ]
+ ret %trivial_types %res
+}
+
+define [32 x i32] @dead_array(i1 %cond, [32 x i32] %x, ptr addrspace(1) %ptr1, i32 %v) #0 {
+; ASM-DAG-LABEL: dead_array:
+; ASM-DAG: ; %bb.0: ; %entry
+; ASM-DAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; ASM-DAG-NEXT: s_wait_expcnt 0x0
+; ASM-DAG-NEXT: s_wait_samplecnt 0x0
+; ASM-DAG-NEXT: s_wait_bvhcnt 0x0
+; ASM-DAG-NEXT: s_wait_kmcnt 0x0
+; ASM-DAG-NEXT: v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v0
+; ASM-DAG-NEXT: v_mov_b32_e32 v0, v1
+; ASM-DAG-NEXT: s_clause 0x4
+; ASM-DAG-NEXT: scratch_load_b32 v35, off, s32 offset:12
+; ASM-DAG-NEXT: scratch_load_b32 v34, off, s32 offset:8
+; ASM-DAG-NEXT: scratch_load_b32 v31, off, s32 offset:4
+; ASM-DAG-NEXT: scratch_load_b32 v30, off, s32
+; ASM-DAG-NEXT: scratch_load_b32 v1, off, s32 offset:16
+; ASM-DAG-NEXT: s_mov_b32 s0, exec_lo
+; ASM-DAG-NEXT: v_and_b32_e32 v33, 1, v33
+; ASM-DAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; ASM-DAG-NEXT: v_cmpx_eq_u32_e32 1, v33
+; ASM-DAG-NEXT: s_cbranch_execz .LBB2_2
+; ASM-DAG-NEXT: ; %bb.1: ; %if.then
+; ASM-DAG-NEXT: v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v7, 13
+; ASM-DAG-NEXT: s_wait_loadcnt 0x0
+; ASM-DAG-NEXT: v_add_nc_u32_e32 v0, 15, v1
+; ASM-DAG-NEXT: ; implicit-def: $vgpr2
+; ASM-DAG-NEXT: ; implicit-def: $vgpr3
+; ASM-DAG-NEXT: ; implicit-def: $vgpr4
+; ASM-DAG-NEXT: ; implicit-def: $vgpr5
+; ASM-DAG-NEXT: ; implicit-def: $vgpr6
+; ASM-DAG-NEXT: ; implicit-def: $vgpr9
+; ASM-DAG-NEXT: ; implicit-def: $vgpr10
+; ASM-DAG-NEXT: ; implicit-def: $vgpr11
+; ASM-DAG-NEXT: ; implicit-def: $vgpr12
+; ASM-DAG-NEXT: ; implicit-def: $vgpr13
+; ASM-DAG-NEXT: ; implicit-def: $vgpr14
+; ASM-DAG-NEXT: ; implicit-def: $vgpr15
+; ASM-DAG-NEXT: ; implicit-def: $vgpr16
+; ASM-DAG-NEXT: ; implicit-def: $vgpr17
+; ASM-DAG-NEXT: ; implicit-def: $vgpr18
+; ASM-DAG-NEXT: ; implicit-def: $vgpr19
+; ASM-DAG-NEXT: ; implicit-def: $vgpr20
+; ASM-DAG-NEXT: ; implicit-def: $vgpr21
+; ASM-DAG-NEXT: ; implicit-def: $vgpr22
+; ASM-DAG-NEXT: ; implicit-def: $vgpr23
+; ASM-DAG-NEXT: ; implicit-def: $vgpr24
+; ASM-DAG-NEXT: ; implicit-def: $vgpr25
+; ASM-DAG-NEXT: ; implicit-def: $vgpr26
+; ASM-DAG-NEXT: ; implicit-def: $vgpr27
+; ASM-DAG-NEXT: ; implicit-def: $vgpr28
+; ASM-DAG-NEXT: ; implicit-def: $vgpr29
+; ASM-DAG-NEXT: ; implicit-def: $vgpr32
+; ASM-DAG-NEXT: ; implicit-def: $vgpr30
+; ASM-DAG-NEXT: ; implicit-def: $vgpr31
+; ASM-DAG-NEXT: global_store_b32 v[34:35], v0, off
+; ASM-DAG-NEXT: ; implicit-def: $vgpr0
+; ASM-DAG-NEXT: .LBB2_2: ; %if.end
+; ASM-DAG-NEXT: s_wait_alu 0xfffe
+; ASM-DAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; ASM-DAG-NEXT: s_wait_loadcnt 0x0
+; ASM-DAG-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
+; ASM-DAG-NEXT: v_dual_mov_b32 v3, v4 :: v_dual_mov_b32 v4, v5
+; ASM-DAG-NEXT: v_dual_mov_b32 v5, v6 :: v_dual_mov_b32 v6, v7
+; ASM-DAG-NEXT: v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
+; ASM-DAG-NEXT: v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
+; ASM-DAG-NEXT: v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
+; ASM-DAG-NEXT: v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
+; ASM-DAG-NEXT: v_dual_mov_b32 v15, v16 :: v_dual_mov_b32 v16, v17
+; ASM-DAG-NEXT: v_dual_mov_b32 v17, v18 :: v_dual_mov_b32 v18, v19
+; ASM-DAG-NEXT: v_dual_mov_b32 v19, v20 :: v_dual_mov_b32 v20, v21
+; ASM-DAG-NEXT: v_dual_mov_b32 v21, v22 :: v_dual_mov_b32 v22, v23
+; ASM-DAG-NEXT: v_dual_mov_b32 v23, v24 :: v_dual_mov_b32 v24, v25
+; ASM-DAG-NEXT: v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v26, v27
+; ASM-DAG-NEXT: v_dual_mov_b32 v27, v28 :: v_dual_mov_b32 v28, v29
+; ASM-DAG-NEXT: v_mov_b32_e32 v29, v32
+; ASM-DAG-NEXT: s_setpc_b64 s[30:31]
+;
+; ASM-GISEL-LABEL: dead_array:
+; ASM-GISEL: ; %bb.0: ; %entry
+; ASM-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; ASM-GISEL-NEXT: s_wait_expcnt 0x0
+; ASM-GISEL-NEXT: s_wait_samplecnt 0x0
+; ASM-GISEL-NEXT: s_wait_bvhcnt 0x0
+; ASM-GISEL-NEXT: s_wait_kmcnt 0x0
+; ASM-GISEL-NEXT: v_mov_b32_e32 v32, v0
+; ASM-GISEL-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v2
+; ASM-GISEL-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; ASM-GISEL-NEXT: v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6
+; ASM-GISEL-NEXT: v_dual_mov_b32 v6, v7 :: v_dual_mov_b32 v7, v8
+; ASM-GISEL-NEXT: v_dual_mov_b32 v8, v9 :: v_dual_mov_b32 v9, v10
+; ASM-GISEL-NEXT: v_dual_mov_b32 v10, v11 :: v_dual_mov_b32 v11, v12
+; ASM-GISEL-NEXT: v_dual_mov_b32 v12, v13 :: v_dual_mov_b32 v13, v14
+; ASM-GISEL-NEXT: v_dual_mov_b32 v14, v15 :: v_dual_mov_b32 v15, v16
+; ASM-GISEL-NEXT: v_dual_mov_b32 v16, v17 :: v_dual_mov_b32 v17, v18
+; ASM-GISEL-NEXT: v_dual_mov_b32 v18, v19 :: v_dual_mov_b32 v19, v20
+; ASM-GISEL-NEXT: v_dual_mov_b32 v20, v21 :: v_dual_mov_b32 v21, v22
+; ASM-GISEL-NEXT: v_dual_mov_b32 v22, v23 :: v_dual_mov_b32 v23, v24
+; ASM-GISEL-NEXT: v_dual_mov_b32 v24, v25 :: v_dual_mov_b32 v25, v26
+; ASM-GISEL-NEXT: v_dual_mov_b32 v26, v27 :: v_dual_mov_b32 v27, v28
+; ASM-GISEL-NEXT: v_dual_mov_b32 v28, v29 :: v_dual_mov_b32 v29, v30
+; ASM-GISEL-NEXT: s_clause 0x4
+; ASM-GISEL-NEXT: scratch_load_b32 v30, off, s32
+; ASM-GISEL-NEXT: scratch_load_b32 v31, off, s32 offset:4
+; ASM-GISEL-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; ASM-GISEL-NEXT: scratch_load_b32 v34, off, s32 offset:12
+; ASM-GISEL-NEXT: scratch_load_b32 v35, off, s32 offset:16
+; ASM-GISEL-NEXT: v_and_b32_e32 v32, 1, v32
+; ASM-GISEL-NEXT: s_mov_b32 s0, exec_lo
+; ASM-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; ASM-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v32
+; ASM-GISEL-NEXT: s_cbranch_execz .LBB2_2
+; ASM-GISEL-NEXT: ; %bb.1: ; %if.then
+; ASM-GISEL-NEXT: s_mov_b32 s1, 15
+; ASM-GISEL-NEXT: s_mov_b32 s2, 13
+; ASM-GISEL-NEXT: s_wait_loadcnt 0x0
+; ASM-GISEL-NEXT: s_wait_alu 0xfffe
+; ASM-GISEL-NEXT: v_dual_mov_b32 v7, s1 :: v_dual_add_nc_u32 v0, 15, v35
+; ASM-GISEL-NEXT: v_mov_b32_e32 v6, s2
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr1
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr2
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr3
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr4
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr5
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr8
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr9
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr10
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr11
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr12
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr13
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr14
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr15
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr16
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr17
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr18
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr19
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr20
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr21
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr22
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr23
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr24
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr25
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr26
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr27
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr28
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr29
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr30
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr31
+; ASM-GISEL-NEXT: global_store_b32 v[33:34], v0, off
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr0
+; ASM-GISEL-NEXT: .LBB2_2: ; %if.end
+; ASM-GISEL-NEXT: s_wait_alu 0xfffe
+; ASM-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; ASM-GISEL-NEXT: s_wait_loadcnt 0x0
+; ASM-GISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ br i1 %cond, label %if.then, label %if.end
+
+if.then:
+ %dead = call [32 x i32] @llvm.amdgcn.dead()
+ %dead_insert_1 = insertvalue [32 x i32] %dead, i32 15, 7
+ %dead_insert_3 = insertvalue [32 x i32] %dead_insert_1, i32 13, 6
+
+ %vgpr_use = add i32 %v, 15 ; may use v0 or one of the other implicit_defs
+ store i32 %vgpr_use, ptr addrspace(1) %ptr1
+
+ br label %if.end
+
+if.end:
+ %res = phi [32 x i32] [ %x, %entry ], [ %dead_insert_3, %if.then ]
+ ret [32 x i32] %res
+}
+
+%non_trivial_types = type { i8, i16, half, bfloat, <2 x i16>, <2 x half>, <2 x bfloat>, <5 x i32>, i128}
+
+define %non_trivial_types @dead_non_trivial(i1 %cond, %non_trivial_types %x, ptr addrspace(1) %ptr1, i32 %v) #0 {
+; ASM-DAG-LABEL: dead_non_trivial:
+; ASM-DAG: ; %bb.0: ; %entry
+; ASM-DAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; ASM-DAG-NEXT: s_wait_expcnt 0x0
+; ASM-DAG-NEXT: s_wait_samplecnt 0x0
+; ASM-DAG-NEXT: s_wait_bvhcnt 0x0
+; ASM-DAG-NEXT: s_wait_kmcnt 0x0
+; ASM-DAG-NEXT: v_mov_b32_e32 v20, v0
+; ASM-DAG-NEXT: v_mov_b32_e32 v0, v1
+; ASM-DAG-NEXT: s_mov_b32 s0, exec_lo
+; ASM-DAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; ASM-DAG-NEXT: v_and_b32_e32 v1, 1, v20
+; ASM-DAG-NEXT: v_cmpx_eq_u32_e32 1, v1
+; ASM-DAG-NEXT: s_cbranch_execz .LBB3_2
+; ASM-DAG-NEXT: ; %bb.1: ; %if.then
+; ASM-DAG-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_add_nc_u32 v0, 15, v19
+; ASM-DAG-NEXT: v_mov_b32_e32 v3, 0x3e00
+; ASM-DAG-NEXT: ; implicit-def: $vgpr2
+; ASM-DAG-NEXT: ; implicit-def: $vgpr4
+; ASM-DAG-NEXT: ; implicit-def: $vgpr5
+; ASM-DAG-NEXT: ; implicit-def: $vgpr6
+; ASM-DAG-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12
+; ASM-DAG-NEXT: ; implicit-def: $vgpr13_vgpr14
+; ASM-DAG-NEXT: ; implicit-def: $vgpr15_vgpr16
+; ASM-DAG-NEXT: global_store_b32 v[17:18], v0, off
+; ASM-DAG-NEXT: ; implicit-def: $vgpr0
+; ASM-DAG-NEXT: .LBB3_2: ; %if.end
+; ASM-DAG-NEXT: s_wait_alu 0xfffe
+; ASM-DAG-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; ASM-DAG-NEXT: v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
+; ASM-DAG-NEXT: v_dual_mov_b32 v3, v4 :: v_dual_mov_b32 v4, v5
+; ASM-DAG-NEXT: v_dual_mov_b32 v5, v6 :: v_dual_mov_b32 v6, v7
+; ASM-DAG-NEXT: v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
+; ASM-DAG-NEXT: v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
+; ASM-DAG-NEXT: v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
+; ASM-DAG-NEXT: v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
+; ASM-DAG-NEXT: v_mov_b32_e32 v15, v16
+; ASM-DAG-NEXT: s_setpc_b64 s[30:31]
+;
+; ASM-GISEL-LABEL: dead_non_trivial:
+; ASM-GISEL: ; %bb.0: ; %entry
+; ASM-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; ASM-GISEL-NEXT: s_wait_expcnt 0x0
+; ASM-GISEL-NEXT: s_wait_samplecnt 0x0
+; ASM-GISEL-NEXT: s_wait_bvhcnt 0x0
+; ASM-GISEL-NEXT: s_wait_kmcnt 0x0
+; ASM-GISEL-NEXT: v_mov_b32_e32 v20, v0
+; ASM-GISEL-NEXT: v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v2
+; ASM-GISEL-NEXT: v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; ASM-GISEL-NEXT: v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6
+; ASM-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; ASM-GISEL-NEXT: v_dual_mov_b32 v6, v7 :: v_dual_and_b32 v7, 1, v20
+; ASM-GISEL-NEXT: s_mov_b32 s0, exec_lo
+; ASM-GISEL-NEXT: v_cmpx_ne_u32_e32 0, v7
+; ASM-GISEL-NEXT: s_cbranch_execz .LBB3_2
+; ASM-GISEL-NEXT: ; %bb.1: ; %if.then
+; ASM-GISEL-NEXT: s_movk_i32 s1, 0x3e00
+; ASM-GISEL-NEXT: s_mov_b32 s2, 0
+; ASM-GISEL-NEXT: v_add_nc_u32_e32 v0, 15, v19
+; ASM-GISEL-NEXT: s_wait_alu 0xfffe
+; ASM-GISEL-NEXT: v_mov_b32_e32 v2, s1
+; ASM-GISEL-NEXT: v_mov_b32_e32 v6, s2
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr1
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr3
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr4
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr5
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr13_vgpr14_vgpr15_vgpr16
+; ASM-GISEL-NEXT: global_store_b32 v[17:18], v0, off
+; ASM-GISEL-NEXT: ; implicit-def: $vgpr0
+; ASM-GISEL-NEXT: .LBB3_2: ; %if.end
+; ASM-GISEL-NEXT: s_wait_alu 0xfffe
+; ASM-GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0
+; ASM-GISEL-NEXT: v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
+; ASM-GISEL-NEXT: v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
+; ASM-GISEL-NEXT: v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
+; ASM-GISEL-NEXT: v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
+; ASM-GISEL-NEXT: v_mov_b32_e32 v15, v16
+; ASM-GISEL-NEXT: s_setpc_b64 s[30:31]
+entry:
+ br i1 %cond, label %if.then, label %if.end
+
+if.then:
+ %dead = call %non_trivial_types @llvm.amdgcn.dead.s_non_trivial_typess()
+ %dead_insert_1 = insertvalue %non_trivial_types %dead, half 1.5, 2
+ %dead_insert_3 = insertvalue %non_trivial_types %dead_insert_1, <2 x bfloat> zeroinitializer, 6
+
+ %vgpr_use = add i32 %v, 15 ; may use v0 or one of the other implicit_defs
+ store i32 %vgpr_use, ptr addrspace(1) %ptr1
+
+ br label %if.end
+
+if.end:
+ %res = phi %non_trivial_types [ %x, %entry ], [ %dead_insert_3, %if.then ]
+ ret %non_trivial_types %res
+}
More information about the llvm-commits
mailing list