[llvm] [AMDGPU] Support arbitrary types in amdgcn.dead (PR #134841)

Diana Picus via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 8 04:25:41 PDT 2025


https://github.com/rovka created https://github.com/llvm/llvm-project/pull/134841

Legalize the amdgcn.dead intrinsic to work with types other than i32. It still generates IMPLICIT_DEFs.

Remove some of the previous code for selecting/reg bank mapping it for 32-bit types, since everything is done in the legalizer now.

>From 9971bf8ade79a4876c433b4ef7ac060857d72d3e Mon Sep 17 00:00:00 2001
From: Diana Picus <diana-magda.picus at amd.com>
Date: Thu, 3 Apr 2025 15:07:17 +0200
Subject: [PATCH] [AMDGPU] Support arbitrary types in amdgcn.dead

Legalize the amdgcn.dead intrinsic to work with types other than i32. It
still generates IMPLICIT_DEFs.

Remove some of the previous code for selecting/reg bank mapping it
for 32-bit types, since everything is done in the legalizer now.
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   6 -
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   6 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |   1 -
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |  11 +
 llvm/lib/Target/AMDGPU/SIInstructions.td      |   6 -
 .../CodeGen/AMDGPU/legalize-amdgcn.dead.mir   |  32 ++
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll  | 384 +++++++++++++++++-
 7 files changed, 430 insertions(+), 16 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 6ef7505ec6f62..e6caffe61e705 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1191,12 +1191,6 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
   case Intrinsic::amdgcn_permlane16_swap:
   case Intrinsic::amdgcn_permlane32_swap:
     return selectPermlaneSwapIntrin(I, IntrinsicID);
-  case Intrinsic::amdgcn_dead: {
-    I.setDesc(TII.get(TargetOpcode::IMPLICIT_DEF));
-    I.removeOperand(1); // drop intrinsic ID
-    return RBI.constrainGenericRegister(I.getOperand(0).getReg(),
-                                        AMDGPU::VGPR_32RegClass, *MRI);
-  }
   default:
     return selectImpl(I, *CoverageInfo);
   }
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 275d0193452a5..5d35a15123d63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -7651,6 +7651,12 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,
     return legalizeLaneOp(Helper, MI, IntrID);
   case Intrinsic::amdgcn_s_buffer_prefetch_data:
     return legalizeSBufferPrefetch(Helper, MI);
+  case Intrinsic::amdgcn_dead: {
+    for (const MachineOperand &Def : MI.defs())
+      B.buildUndef(Def);
+    MI.eraseFromParent();
+    return true;
+  }
   default: {
     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
             AMDGPU::getImageDimIntrinsicInfo(IntrID))
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 1d0e81db5a5db..f38665ee81bda 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -4701,7 +4701,6 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
     case Intrinsic::amdgcn_set_inactive_chain_arg:
     case Intrinsic::amdgcn_permlane64:
     case Intrinsic::amdgcn_ds_bpermute_fi_b32:
-    case Intrinsic::amdgcn_dead:
       return getDefaultMappingAllVGPR(MI);
     case Intrinsic::amdgcn_cvt_pkrtz:
       if (Subtarget.hasSALUFloatInsts() && isSALUMapping(MI))
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 356040da95672..006717d141027 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -6629,6 +6629,11 @@ void SITargetLowering::ReplaceNodeResults(SDNode *N,
       Results.push_back(LoadVal);
       return;
     }
+    case Intrinsic::amdgcn_dead: {
+      for (unsigned I = 0, E = N->getNumValues(); I < E; ++I)
+        Results.push_back(DAG.getUNDEF(N->getValueType(I)));
+      return;
+    }
     }
     break;
   }
@@ -9116,6 +9121,12 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   case Intrinsic::amdgcn_mov_dpp8:
   case Intrinsic::amdgcn_update_dpp:
     return lowerLaneOp(*this, Op.getNode(), DAG);
+  case Intrinsic::amdgcn_dead: {
+    SmallVector<SDValue, 8> Undefs;
+    for (unsigned I = 0, E = Op.getNode()->getNumValues(); I != E; ++I)
+      Undefs.push_back(DAG.getUNDEF(Op.getNode()->getValueType(I)));
+    return DAG.getMergeValues(Undefs, SDLoc(Op));
+  }
   default:
     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
             AMDGPU::getImageDimIntrinsicInfo(IntrinsicID))
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 9051db0c01ed1..fe384b33911b9 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -4484,9 +4484,3 @@ def V_ILLEGAL : Enc32, InstSI<(outs), (ins), "v_illegal"> {
   let hasSideEffects = 1;
   let SubtargetPredicate = isGFX10Plus;
 }
-
-// FIXME: Would be nice if we could set the register class for the destination
-// register too.
-def IMP_DEF_FROM_INTRINSIC: Pat<
-  (i32 (int_amdgcn_dead)), (IMPLICIT_DEF)>;
-
diff --git a/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir
new file mode 100644
index 0000000000000..ec940f8d3b0b0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/legalize-amdgcn.dead.mir
@@ -0,0 +1,32 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amdpal -mcpu=gfx1200 -run-pass=legalizer %s -o - | FileCheck %s
+
+---
+name: test_struct
+body: |
+  bb.1.entry:
+
+    ; CHECK-LABEL: name: test_struct
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s32>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF1]](<3 x s32>)
+    ; CHECK-NEXT: $vgpr0 = COPY [[DEF]](s32)
+    ; CHECK-NEXT: $vgpr1 = COPY [[UV]](s32)
+    ; CHECK-NEXT: $vgpr2 = COPY [[UV1]](s32)
+    ; CHECK-NEXT: $vgpr3 = COPY [[UV2]](s32)
+    ; CHECK-NEXT: $vgpr4_vgpr5 = COPY [[DEF2]](s64)
+    ; CHECK-NEXT: $vgpr6 = COPY [[DEF3]](<2 x s16>)
+    ; CHECK-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+    %0:_(s32), %1:_(<3 x s32>), %2:_(s64), %3:_(<2 x s16>) = G_INTRINSIC intrinsic(@llvm.amdgcn.dead)
+
+    %4:_(s32), %5:_(s32), %6:_(s32) = G_UNMERGE_VALUES %1(<3 x s32>)
+    $vgpr0 = COPY %0(s32)
+    $vgpr1 = COPY %4(s32)
+    $vgpr2 = COPY %5(s32)
+    $vgpr3 = COPY %6(s32)
+    $vgpr4_vgpr5 = COPY %2(s64)
+    $vgpr6 = COPY %3(<2 x s16>)
+    SI_RETURN implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+...
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll
index a009854542f21..ad3a316c4c91c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.dead.ll
@@ -3,8 +3,8 @@
 ; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck -check-prefix=ASM-GISEL %s
 
 ; Test that we can use v0 for temporaries in the if.then block.
-define i32 @dead(i1 %cond, i32 %x, ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr2) #0 {
-; ASM-DAG-LABEL: dead:
+define i32 @dead_i32(i1 %cond, i32 %x, ptr addrspace(1) %ptr1) #0 {
+; ASM-DAG-LABEL: dead_i32:
 ; ASM-DAG:       ; %bb.0: ; %entry
 ; ASM-DAG-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; ASM-DAG-NEXT:    s_wait_expcnt 0x0
@@ -27,7 +27,7 @@ define i32 @dead(i1 %cond, i32 %x, ptr addrspace(1) %ptr1, ptr addrspace(1) %ptr
 ; ASM-DAG-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; ASM-DAG-NEXT:    s_setpc_b64 s[30:31]
 ;
-; ASM-GISEL-LABEL: dead:
+; ASM-GISEL-LABEL: dead_i32:
 ; ASM-GISEL:       ; %bb.0: ; %entry
 ; ASM-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; ASM-GISEL-NEXT:    s_wait_expcnt 0x0
@@ -62,3 +62,381 @@ if.end:
   %res = phi i32 [ %x, %entry ], [ %dead, %if.then ]
   ret i32 %res
 }
+
+%trivial_types = type { i32, float, <3 x i32>, i64, ptr addrspace(5), ptr addrspace(1), <4 x float>, { float, <2 x i16> } }
+
+define %trivial_types @dead_struct(i1 %cond, %trivial_types %x, ptr addrspace(1) %ptr1, i32 %v) #0 {
+; ASM-DAG-LABEL: dead_struct:
+; ASM-DAG:       ; %bb.0: ; %entry
+; ASM-DAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; ASM-DAG-NEXT:    s_wait_expcnt 0x0
+; ASM-DAG-NEXT:    s_wait_samplecnt 0x0
+; ASM-DAG-NEXT:    s_wait_bvhcnt 0x0
+; ASM-DAG-NEXT:    s_wait_kmcnt 0x0
+; ASM-DAG-NEXT:    v_mov_b32_e32 v20, v0
+; ASM-DAG-NEXT:    v_mov_b32_e32 v0, v1
+; ASM-DAG-NEXT:    s_mov_b32 s0, exec_lo
+; ASM-DAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; ASM-DAG-NEXT:    v_and_b32_e32 v1, 1, v20
+; ASM-DAG-NEXT:    v_cmpx_eq_u32_e32 1, v1
+; ASM-DAG-NEXT:    s_cbranch_execz .LBB1_2
+; ASM-DAG-NEXT:  ; %bb.1: ; %if.then
+; ASM-DAG-NEXT:    v_dual_mov_b32 v11, 0 :: v_dual_add_nc_u32 v0, 15, v19
+; ASM-DAG-NEXT:    v_mov_b32_e32 v2, 0x3fc00000
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr3_vgpr4_vgpr5
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr8
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr9_vgpr10
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr15
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr16
+; ASM-DAG-NEXT:    s_delay_alu instid0(VALU_DEP_2)
+; ASM-DAG-NEXT:    v_dual_mov_b32 v12, v11 :: v_dual_mov_b32 v13, v11
+; ASM-DAG-NEXT:    v_mov_b32_e32 v14, v11
+; ASM-DAG-NEXT:    global_store_b32 v[17:18], v0, off
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr0
+; ASM-DAG-NEXT:  .LBB1_2: ; %if.end
+; ASM-DAG-NEXT:    s_wait_alu 0xfffe
+; ASM-DAG-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; ASM-DAG-NEXT:    v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
+; ASM-DAG-NEXT:    v_dual_mov_b32 v3, v4 :: v_dual_mov_b32 v4, v5
+; ASM-DAG-NEXT:    v_dual_mov_b32 v5, v6 :: v_dual_mov_b32 v6, v7
+; ASM-DAG-NEXT:    v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
+; ASM-DAG-NEXT:    v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
+; ASM-DAG-NEXT:    v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
+; ASM-DAG-NEXT:    v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
+; ASM-DAG-NEXT:    v_mov_b32_e32 v15, v16
+; ASM-DAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; ASM-GISEL-LABEL: dead_struct:
+; ASM-GISEL:       ; %bb.0: ; %entry
+; ASM-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; ASM-GISEL-NEXT:    s_wait_expcnt 0x0
+; ASM-GISEL-NEXT:    s_wait_samplecnt 0x0
+; ASM-GISEL-NEXT:    s_wait_bvhcnt 0x0
+; ASM-GISEL-NEXT:    s_wait_kmcnt 0x0
+; ASM-GISEL-NEXT:    v_mov_b32_e32 v20, v0
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v2
+; ASM-GISEL-NEXT:    s_mov_b32 s0, exec_lo
+; ASM-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; ASM-GISEL-NEXT:    v_and_b32_e32 v2, 1, v20
+; ASM-GISEL-NEXT:    v_cmpx_ne_u32_e32 0, v2
+; ASM-GISEL-NEXT:    s_cbranch_execz .LBB1_2
+; ASM-GISEL-NEXT:  ; %bb.1: ; %if.then
+; ASM-GISEL-NEXT:    s_mov_b32 s4, 0
+; ASM-GISEL-NEXT:    s_mov_b32 s1, 0x3fc00000
+; ASM-GISEL-NEXT:    s_wait_alu 0xfffe
+; ASM-GISEL-NEXT:    s_mov_b32 s7, s4
+; ASM-GISEL-NEXT:    s_mov_b32 s5, s4
+; ASM-GISEL-NEXT:    s_mov_b32 s6, s4
+; ASM-GISEL-NEXT:    s_wait_alu 0xfffe
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v14, s7 :: v_dual_mov_b32 v13, s6
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v1, s1 :: v_dual_add_nc_u32 v0, 15, v19
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v12, s5 :: v_dual_mov_b32 v11, s4
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr3_vgpr4_vgpr5
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr6_vgpr7
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr8
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr9_vgpr10
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr15
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr16
+; ASM-GISEL-NEXT:    global_store_b32 v[17:18], v0, off
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr0
+; ASM-GISEL-NEXT:  .LBB1_2: ; %if.end
+; ASM-GISEL-NEXT:    s_wait_alu 0xfffe
+; ASM-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v6, v7 :: v_dual_mov_b32 v7, v8
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v8, v9 :: v_dual_mov_b32 v9, v10
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v10, v11 :: v_dual_mov_b32 v11, v12
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v12, v13 :: v_dual_mov_b32 v13, v14
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v14, v15 :: v_dual_mov_b32 v15, v16
+; ASM-GISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  %dead = call %trivial_types @llvm.amdgcn.dead.s_trivial_typess()
+  %dead_insert_1 = insertvalue %trivial_types %dead, float 1.5, 1
+  %dead_insert_3 = insertvalue %trivial_types %dead_insert_1, <4 x float> zeroinitializer, 6
+
+  %vgpr_use = add i32 %v, 15 ; may use v0 or one of the other implicit_defs
+  store i32 %vgpr_use, ptr addrspace(1) %ptr1
+
+  br label %if.end
+
+if.end:
+  %res = phi %trivial_types [ %x, %entry ], [ %dead_insert_3, %if.then ]
+  ret %trivial_types %res
+}
+
+define [32 x i32] @dead_array(i1 %cond, [32 x i32] %x, ptr addrspace(1) %ptr1, i32 %v) #0 {
+; ASM-DAG-LABEL: dead_array:
+; ASM-DAG:       ; %bb.0: ; %entry
+; ASM-DAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; ASM-DAG-NEXT:    s_wait_expcnt 0x0
+; ASM-DAG-NEXT:    s_wait_samplecnt 0x0
+; ASM-DAG-NEXT:    s_wait_bvhcnt 0x0
+; ASM-DAG-NEXT:    s_wait_kmcnt 0x0
+; ASM-DAG-NEXT:    v_dual_mov_b32 v32, v30 :: v_dual_mov_b32 v33, v0
+; ASM-DAG-NEXT:    v_mov_b32_e32 v0, v1
+; ASM-DAG-NEXT:    s_clause 0x4
+; ASM-DAG-NEXT:    scratch_load_b32 v35, off, s32 offset:12
+; ASM-DAG-NEXT:    scratch_load_b32 v34, off, s32 offset:8
+; ASM-DAG-NEXT:    scratch_load_b32 v31, off, s32 offset:4
+; ASM-DAG-NEXT:    scratch_load_b32 v30, off, s32
+; ASM-DAG-NEXT:    scratch_load_b32 v1, off, s32 offset:16
+; ASM-DAG-NEXT:    s_mov_b32 s0, exec_lo
+; ASM-DAG-NEXT:    v_and_b32_e32 v33, 1, v33
+; ASM-DAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; ASM-DAG-NEXT:    v_cmpx_eq_u32_e32 1, v33
+; ASM-DAG-NEXT:    s_cbranch_execz .LBB2_2
+; ASM-DAG-NEXT:  ; %bb.1: ; %if.then
+; ASM-DAG-NEXT:    v_dual_mov_b32 v8, 15 :: v_dual_mov_b32 v7, 13
+; ASM-DAG-NEXT:    s_wait_loadcnt 0x0
+; ASM-DAG-NEXT:    v_add_nc_u32_e32 v0, 15, v1
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr2
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr3
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr4
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr5
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr6
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr9
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr10
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr11
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr12
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr13
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr14
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr15
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr16
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr17
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr18
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr19
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr20
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr21
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr22
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr23
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr24
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr25
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr26
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr27
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr28
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr29
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr32
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr30
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr31
+; ASM-DAG-NEXT:    global_store_b32 v[34:35], v0, off
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr0
+; ASM-DAG-NEXT:  .LBB2_2: ; %if.end
+; ASM-DAG-NEXT:    s_wait_alu 0xfffe
+; ASM-DAG-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; ASM-DAG-NEXT:    s_wait_loadcnt 0x0
+; ASM-DAG-NEXT:    v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
+; ASM-DAG-NEXT:    v_dual_mov_b32 v3, v4 :: v_dual_mov_b32 v4, v5
+; ASM-DAG-NEXT:    v_dual_mov_b32 v5, v6 :: v_dual_mov_b32 v6, v7
+; ASM-DAG-NEXT:    v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
+; ASM-DAG-NEXT:    v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
+; ASM-DAG-NEXT:    v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
+; ASM-DAG-NEXT:    v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
+; ASM-DAG-NEXT:    v_dual_mov_b32 v15, v16 :: v_dual_mov_b32 v16, v17
+; ASM-DAG-NEXT:    v_dual_mov_b32 v17, v18 :: v_dual_mov_b32 v18, v19
+; ASM-DAG-NEXT:    v_dual_mov_b32 v19, v20 :: v_dual_mov_b32 v20, v21
+; ASM-DAG-NEXT:    v_dual_mov_b32 v21, v22 :: v_dual_mov_b32 v22, v23
+; ASM-DAG-NEXT:    v_dual_mov_b32 v23, v24 :: v_dual_mov_b32 v24, v25
+; ASM-DAG-NEXT:    v_dual_mov_b32 v25, v26 :: v_dual_mov_b32 v26, v27
+; ASM-DAG-NEXT:    v_dual_mov_b32 v27, v28 :: v_dual_mov_b32 v28, v29
+; ASM-DAG-NEXT:    v_mov_b32_e32 v29, v32
+; ASM-DAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; ASM-GISEL-LABEL: dead_array:
+; ASM-GISEL:       ; %bb.0: ; %entry
+; ASM-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; ASM-GISEL-NEXT:    s_wait_expcnt 0x0
+; ASM-GISEL-NEXT:    s_wait_samplecnt 0x0
+; ASM-GISEL-NEXT:    s_wait_bvhcnt 0x0
+; ASM-GISEL-NEXT:    s_wait_kmcnt 0x0
+; ASM-GISEL-NEXT:    v_mov_b32_e32 v32, v0
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v2
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v6, v7 :: v_dual_mov_b32 v7, v8
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v8, v9 :: v_dual_mov_b32 v9, v10
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v10, v11 :: v_dual_mov_b32 v11, v12
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v12, v13 :: v_dual_mov_b32 v13, v14
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v14, v15 :: v_dual_mov_b32 v15, v16
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v16, v17 :: v_dual_mov_b32 v17, v18
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v18, v19 :: v_dual_mov_b32 v19, v20
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v20, v21 :: v_dual_mov_b32 v21, v22
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v22, v23 :: v_dual_mov_b32 v23, v24
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v24, v25 :: v_dual_mov_b32 v25, v26
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v26, v27 :: v_dual_mov_b32 v27, v28
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v28, v29 :: v_dual_mov_b32 v29, v30
+; ASM-GISEL-NEXT:    s_clause 0x4
+; ASM-GISEL-NEXT:    scratch_load_b32 v30, off, s32
+; ASM-GISEL-NEXT:    scratch_load_b32 v31, off, s32 offset:4
+; ASM-GISEL-NEXT:    scratch_load_b32 v33, off, s32 offset:8
+; ASM-GISEL-NEXT:    scratch_load_b32 v34, off, s32 offset:12
+; ASM-GISEL-NEXT:    scratch_load_b32 v35, off, s32 offset:16
+; ASM-GISEL-NEXT:    v_and_b32_e32 v32, 1, v32
+; ASM-GISEL-NEXT:    s_mov_b32 s0, exec_lo
+; ASM-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; ASM-GISEL-NEXT:    v_cmpx_ne_u32_e32 0, v32
+; ASM-GISEL-NEXT:    s_cbranch_execz .LBB2_2
+; ASM-GISEL-NEXT:  ; %bb.1: ; %if.then
+; ASM-GISEL-NEXT:    s_mov_b32 s1, 15
+; ASM-GISEL-NEXT:    s_mov_b32 s2, 13
+; ASM-GISEL-NEXT:    s_wait_loadcnt 0x0
+; ASM-GISEL-NEXT:    s_wait_alu 0xfffe
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v7, s1 :: v_dual_add_nc_u32 v0, 15, v35
+; ASM-GISEL-NEXT:    v_mov_b32_e32 v6, s2
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr1
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr2
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr3
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr4
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr5
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr8
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr9
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr10
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr11
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr12
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr13
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr14
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr15
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr16
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr17
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr18
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr19
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr20
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr21
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr22
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr23
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr24
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr25
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr26
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr27
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr28
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr29
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr30
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr31
+; ASM-GISEL-NEXT:    global_store_b32 v[33:34], v0, off
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr0
+; ASM-GISEL-NEXT:  .LBB2_2: ; %if.end
+; ASM-GISEL-NEXT:    s_wait_alu 0xfffe
+; ASM-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; ASM-GISEL-NEXT:    s_wait_loadcnt 0x0
+; ASM-GISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  %dead = call [32 x i32] @llvm.amdgcn.dead()
+  %dead_insert_1 = insertvalue [32 x i32] %dead, i32 15, 7
+  %dead_insert_3 = insertvalue [32 x i32] %dead_insert_1, i32 13, 6
+
+  %vgpr_use = add i32 %v, 15 ; may use v0 or one of the other implicit_defs
+  store i32 %vgpr_use, ptr addrspace(1) %ptr1
+
+  br label %if.end
+
+if.end:
+  %res = phi [32 x i32] [ %x, %entry ], [ %dead_insert_3, %if.then ]
+  ret [32 x i32] %res
+}
+
+%non_trivial_types = type { i8, i16, half, bfloat, <2 x i16>, <2 x half>, <2 x bfloat>, <5 x i32>, i128}
+
+define %non_trivial_types @dead_non_trivial(i1 %cond, %non_trivial_types %x, ptr addrspace(1) %ptr1, i32 %v) #0 {
+; ASM-DAG-LABEL: dead_non_trivial:
+; ASM-DAG:       ; %bb.0: ; %entry
+; ASM-DAG-NEXT:    s_wait_loadcnt_dscnt 0x0
+; ASM-DAG-NEXT:    s_wait_expcnt 0x0
+; ASM-DAG-NEXT:    s_wait_samplecnt 0x0
+; ASM-DAG-NEXT:    s_wait_bvhcnt 0x0
+; ASM-DAG-NEXT:    s_wait_kmcnt 0x0
+; ASM-DAG-NEXT:    v_mov_b32_e32 v20, v0
+; ASM-DAG-NEXT:    v_mov_b32_e32 v0, v1
+; ASM-DAG-NEXT:    s_mov_b32 s0, exec_lo
+; ASM-DAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; ASM-DAG-NEXT:    v_and_b32_e32 v1, 1, v20
+; ASM-DAG-NEXT:    v_cmpx_eq_u32_e32 1, v1
+; ASM-DAG-NEXT:    s_cbranch_execz .LBB3_2
+; ASM-DAG-NEXT:  ; %bb.1: ; %if.then
+; ASM-DAG-NEXT:    v_dual_mov_b32 v7, 0 :: v_dual_add_nc_u32 v0, 15, v19
+; ASM-DAG-NEXT:    v_mov_b32_e32 v3, 0x3e00
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr2
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr4
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr5
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr6
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr13_vgpr14
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr15_vgpr16
+; ASM-DAG-NEXT:    global_store_b32 v[17:18], v0, off
+; ASM-DAG-NEXT:    ; implicit-def: $vgpr0
+; ASM-DAG-NEXT:  .LBB3_2: ; %if.end
+; ASM-DAG-NEXT:    s_wait_alu 0xfffe
+; ASM-DAG-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; ASM-DAG-NEXT:    v_dual_mov_b32 v1, v2 :: v_dual_mov_b32 v2, v3
+; ASM-DAG-NEXT:    v_dual_mov_b32 v3, v4 :: v_dual_mov_b32 v4, v5
+; ASM-DAG-NEXT:    v_dual_mov_b32 v5, v6 :: v_dual_mov_b32 v6, v7
+; ASM-DAG-NEXT:    v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
+; ASM-DAG-NEXT:    v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
+; ASM-DAG-NEXT:    v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
+; ASM-DAG-NEXT:    v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
+; ASM-DAG-NEXT:    v_mov_b32_e32 v15, v16
+; ASM-DAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; ASM-GISEL-LABEL: dead_non_trivial:
+; ASM-GISEL:       ; %bb.0: ; %entry
+; ASM-GISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
+; ASM-GISEL-NEXT:    s_wait_expcnt 0x0
+; ASM-GISEL-NEXT:    s_wait_samplecnt 0x0
+; ASM-GISEL-NEXT:    s_wait_bvhcnt 0x0
+; ASM-GISEL-NEXT:    s_wait_kmcnt 0x0
+; ASM-GISEL-NEXT:    v_mov_b32_e32 v20, v0
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v0, v1 :: v_dual_mov_b32 v1, v2
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v2, v3 :: v_dual_mov_b32 v3, v4
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v4, v5 :: v_dual_mov_b32 v5, v6
+; ASM-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v6, v7 :: v_dual_and_b32 v7, 1, v20
+; ASM-GISEL-NEXT:    s_mov_b32 s0, exec_lo
+; ASM-GISEL-NEXT:    v_cmpx_ne_u32_e32 0, v7
+; ASM-GISEL-NEXT:    s_cbranch_execz .LBB3_2
+; ASM-GISEL-NEXT:  ; %bb.1: ; %if.then
+; ASM-GISEL-NEXT:    s_movk_i32 s1, 0x3e00
+; ASM-GISEL-NEXT:    s_mov_b32 s2, 0
+; ASM-GISEL-NEXT:    v_add_nc_u32_e32 v0, 15, v19
+; ASM-GISEL-NEXT:    s_wait_alu 0xfffe
+; ASM-GISEL-NEXT:    v_mov_b32_e32 v2, s1
+; ASM-GISEL-NEXT:    v_mov_b32_e32 v6, s2
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr1
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr3
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr4
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr5
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr13_vgpr14_vgpr15_vgpr16
+; ASM-GISEL-NEXT:    global_store_b32 v[17:18], v0, off
+; ASM-GISEL-NEXT:    ; implicit-def: $vgpr0
+; ASM-GISEL-NEXT:  .LBB3_2: ; %if.end
+; ASM-GISEL-NEXT:    s_wait_alu 0xfffe
+; ASM-GISEL-NEXT:    s_or_b32 exec_lo, exec_lo, s0
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v7, v8 :: v_dual_mov_b32 v8, v9
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v9, v10 :: v_dual_mov_b32 v10, v11
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v11, v12 :: v_dual_mov_b32 v12, v13
+; ASM-GISEL-NEXT:    v_dual_mov_b32 v13, v14 :: v_dual_mov_b32 v14, v15
+; ASM-GISEL-NEXT:    v_mov_b32_e32 v15, v16
+; ASM-GISEL-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  br i1 %cond, label %if.then, label %if.end
+
+if.then:
+  %dead = call %non_trivial_types @llvm.amdgcn.dead.s_non_trivial_typess()
+  %dead_insert_1 = insertvalue %non_trivial_types %dead, half 1.5, 2
+  %dead_insert_3 = insertvalue %non_trivial_types %dead_insert_1, <2 x bfloat> zeroinitializer, 6
+
+  %vgpr_use = add i32 %v, 15 ; may use v0 or one of the other implicit_defs
+  store i32 %vgpr_use, ptr addrspace(1) %ptr1
+
+  br label %if.end
+
+if.end:
+  %res = phi %non_trivial_types [ %x, %entry ], [ %dead_insert_3, %if.then ]
+  ret %non_trivial_types %res
+}



More information about the llvm-commits mailing list