[llvm-branch-commits] [llvm] [AMDGPU] Propagate Constants for Wave Reduction Intrinsics (PR #150395)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Fri Jul 25 03:31:10 PDT 2025


https://github.com/easyonaadit updated https://github.com/llvm/llvm-project/pull/150395

>From b5a89fa2f32582537b8c234db778e119563f6896 Mon Sep 17 00:00:00 2001
From: Aaditya <Aaditya.AlokDeshpande at amd.com>
Date: Thu, 24 Jul 2025 15:14:30 +0530
Subject: [PATCH] Propagate Constants for Wave Reduction Intrinsics

---
 llvm/lib/Analysis/ConstantFolding.cpp         |  14 +
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll  | 620 +++------------
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll  |  88 +--
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll  |  88 +--
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll  |  88 +--
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll   |  88 +--
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll  | 746 +++---------------
 .../CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll  | 677 +++-------------
 .../ConstProp/AMDGPU/wave.reduce.ll           | 455 ++++++++++-
 9 files changed, 755 insertions(+), 2109 deletions(-)
 mode change 100644 => 100755 llvm/lib/Analysis/ConstantFolding.cpp

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
old mode 100644
new mode 100755
index e71ba5ea5521e..11d22c75831d0
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1647,6 +1647,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
   case Intrinsic::amdgcn_perm:
   case Intrinsic::amdgcn_wave_reduce_umin:
   case Intrinsic::amdgcn_wave_reduce_umax:
+  case Intrinsic::amdgcn_wave_reduce_max:
+  case Intrinsic::amdgcn_wave_reduce_min:
+  case Intrinsic::amdgcn_wave_reduce_add:
+  case Intrinsic::amdgcn_wave_reduce_sub:
+  case Intrinsic::amdgcn_wave_reduce_and:
+  case Intrinsic::amdgcn_wave_reduce_or:
+  case Intrinsic::amdgcn_wave_reduce_xor:
   case Intrinsic::amdgcn_s_wqm:
   case Intrinsic::amdgcn_s_quadmask:
   case Intrinsic::amdgcn_s_bitreplicate:
@@ -3520,6 +3527,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
       return ConstantInt::get(Ty, C0->abs());
     case Intrinsic::amdgcn_wave_reduce_umin:
     case Intrinsic::amdgcn_wave_reduce_umax:
+    case Intrinsic::amdgcn_wave_reduce_max:
+    case Intrinsic::amdgcn_wave_reduce_min:
+    case Intrinsic::amdgcn_wave_reduce_add:
+    case Intrinsic::amdgcn_wave_reduce_sub:
+    case Intrinsic::amdgcn_wave_reduce_and:
+    case Intrinsic::amdgcn_wave_reduce_or:
+    case Intrinsic::amdgcn_wave_reduce_xor:
       return dyn_cast<Constant>(Operands[0]);
     }
 
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index b6af8b4bb798d..d3d9275a566e2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -191,10 +191,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: const_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
@@ -204,10 +201,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX8GISEL-LABEL: const_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
@@ -217,11 +211,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-LABEL: const_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
@@ -229,72 +220,35 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX9GISEL-LABEL: const_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: const_value:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
+; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10DAGISEL-NEXT:    s_endpgm
+;
+; GFX10GISEL-LABEL: const_value:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX1164DAGISEL-LABEL: const_value:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
@@ -302,12 +256,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1164GISEL-LABEL: const_value:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
@@ -315,12 +265,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
@@ -328,12 +273,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1132GISEL-LABEL: const_value:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
@@ -346,152 +286,35 @@ entry:
 define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: poison_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: poison_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: poison_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: poison_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: poison_value:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
+; GFX10GISEL-LABEL: poison_value:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
+; GFX11DAGISEL-LABEL: poison_value:
+; GFX11DAGISEL:       ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
+; GFX11GISEL-LABEL: poison_value:
+; GFX11GISEL:       ; %bb.0: ; %entry
+; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
   store i32 %result, ptr addrspace(1) %out
@@ -1431,33 +1254,21 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: const_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s4, s3
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: const_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
@@ -1465,129 +1276,56 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-LABEL: const_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: const_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: const_value_i64:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
+; GFX10DAGISEL-NEXT:    s_endpgm
+;
+; GFX10GISEL-LABEL: const_value_i64:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX1164DAGISEL-LABEL: const_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1164GISEL-LABEL: const_value_i64:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
@@ -1595,31 +1333,16 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1132GISEL-LABEL: const_value_i64:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_3) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s4, s2, 0
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
@@ -1632,197 +1355,35 @@ entry:
 define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX8DAGISEL-LABEL: poison_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s4, s0, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s4, s0, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: poison_value_i64:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
+; GFX10GISEL-LABEL: poison_value_i64:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
+; GFX11DAGISEL-LABEL: poison_value_i64:
+; GFX11DAGISEL:       ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: poison_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s3, s0, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s4, s1, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
+; GFX11GISEL-LABEL: poison_value_i64:
+; GFX11GISEL:       ; %bb.0: ; %entry
+; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1)
   store i64 %result, ptr addrspace(1) %out
@@ -2582,8 +2143,3 @@ endif:
   store i64 %combine, ptr addrspace(1) %out
   ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX10DAGISEL: {{.*}}
-; GFX10GISEL: {{.*}}
-; GFX11DAGISEL: {{.*}}
-; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
index 55e6189f65675..9795463cb4e26 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll
@@ -223,68 +223,34 @@ entry:
 define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: poison_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
 ; GFX10DAGISEL-LABEL: poison_value:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: poison_value:
 ; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX11DAGISEL-LABEL: poison_value:
 ; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX11GISEL-LABEL: poison_value:
 ; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1)
@@ -1113,11 +1079,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-LABEL: const_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: const_value_i64:
@@ -1133,11 +1098,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX10DAGISEL-LABEL: const_value_i64:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
 ; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: const_value_i64:
@@ -1153,11 +1117,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-LABEL: const_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1164GISEL-LABEL: const_value_i64:
@@ -1173,10 +1136,9 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1132GISEL-LABEL: const_value_i64:
@@ -1196,68 +1158,34 @@ entry:
 define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX8DAGISEL-LABEL: poison_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
 ; GFX10DAGISEL-LABEL: poison_value_i64:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: poison_value_i64:
 ; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX11DAGISEL-LABEL: poison_value_i64:
 ; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
 ; GFX11DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX11GISEL-LABEL: poison_value_i64:
 ; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
 ; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
index 96b67e71fcd28..ce02ad2d6fb6f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll
@@ -223,68 +223,34 @@ entry:
 define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: poison_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
 ; GFX10DAGISEL-LABEL: poison_value:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: poison_value:
 ; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX11DAGISEL-LABEL: poison_value:
 ; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX11GISEL-LABEL: poison_value:
 ; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1)
@@ -1113,11 +1079,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-LABEL: const_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: const_value_i64:
@@ -1133,11 +1098,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX10DAGISEL-LABEL: const_value_i64:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
 ; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: const_value_i64:
@@ -1153,11 +1117,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-LABEL: const_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1164GISEL-LABEL: const_value_i64:
@@ -1173,10 +1136,9 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1132GISEL-LABEL: const_value_i64:
@@ -1196,68 +1158,34 @@ entry:
 define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX8DAGISEL-LABEL: poison_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
 ; GFX10DAGISEL-LABEL: poison_value_i64:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: poison_value_i64:
 ; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX11DAGISEL-LABEL: poison_value_i64:
 ; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
 ; GFX11DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX11GISEL-LABEL: poison_value_i64:
 ; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
 ; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
index 4e0c9ec111cbe..5ba35aee371d5 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll
@@ -223,68 +223,34 @@ entry:
 define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: poison_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
 ; GFX10DAGISEL-LABEL: poison_value:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: poison_value:
 ; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX11DAGISEL-LABEL: poison_value:
 ; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX11GISEL-LABEL: poison_value:
 ; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1)
@@ -1113,11 +1079,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-LABEL: const_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: const_value_i64:
@@ -1133,11 +1098,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX10DAGISEL-LABEL: const_value_i64:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
 ; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: const_value_i64:
@@ -1153,11 +1117,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-LABEL: const_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1164GISEL-LABEL: const_value_i64:
@@ -1173,10 +1136,9 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1132GISEL-LABEL: const_value_i64:
@@ -1196,68 +1158,34 @@ entry:
 define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX8DAGISEL-LABEL: poison_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
 ; GFX10DAGISEL-LABEL: poison_value_i64:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: poison_value_i64:
 ; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX11DAGISEL-LABEL: poison_value_i64:
 ; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
 ; GFX11DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX11GISEL-LABEL: poison_value_i64:
 ; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
 ; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
index 5dcd1de6d05d7..dbf1f0cb5fa0c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll
@@ -225,68 +225,34 @@ entry:
 define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: poison_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v0
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
 ; GFX10DAGISEL-LABEL: poison_value:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: poison_value:
 ; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dword v0, v0, s[0:1]
 ; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX11DAGISEL-LABEL: poison_value:
 ; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX11GISEL-LABEL: poison_value:
 ; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b32 v0, v0, s[0:1]
 ; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1)
@@ -1115,11 +1081,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-LABEL: const_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: const_value_i64:
@@ -1135,11 +1100,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX10DAGISEL-LABEL: const_value_i64:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
 ; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: const_value_i64:
@@ -1155,11 +1119,10 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1164DAGISEL-LABEL: const_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1164GISEL-LABEL: const_value_i64:
@@ -1175,10 +1138,9 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1132GISEL-LABEL: const_value_i64:
@@ -1198,68 +1160,34 @@ entry:
 define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX8DAGISEL-LABEL: poison_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[0:1], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
 ; GFX10DAGISEL-LABEL: poison_value_i64:
 ; GFX10DAGISEL:       ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX10DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX10GISEL-LABEL: poison_value_i64:
 ; GFX10GISEL:       ; %bb.0: ; %entry
-; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT:    global_store_dwordx2 v0, v[0:1], s[0:1]
 ; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX11DAGISEL-LABEL: poison_value_i64:
 ; GFX11DAGISEL:       ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
 ; GFX11DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX11GISEL-LABEL: poison_value_i64:
 ; GFX11GISEL:       ; %bb.0: ; %entry
-; GFX11GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX11GISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT:    global_store_b64 v0, v[0:1], s[0:1]
 ; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
index 9a0917133fc59..653a4814ddc42 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll
@@ -204,27 +204,19 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: const_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_sub_i32 s4, 0, 0x7b
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s4, s2
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: const_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, s2
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
@@ -232,12 +224,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-LABEL: const_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
@@ -245,78 +233,35 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX9GISEL-LABEL: const_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: const_value:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
+; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10DAGISEL-NEXT:    s_endpgm
+;
+; GFX10GISEL-LABEL: const_value:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX1164DAGISEL-LABEL: const_value:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
@@ -324,13 +269,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1164GISEL-LABEL: const_value:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
@@ -338,12 +278,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
@@ -351,13 +286,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1132GISEL-LABEL: const_value:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    s_sub_i32 s3, 0, 0x7b
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
@@ -370,165 +299,35 @@ entry:
 define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: poison_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_sub_i32 s4, 0, s0
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s4, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: poison_value:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
+; GFX10GISEL-LABEL: poison_value:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
+; GFX11DAGISEL-LABEL: poison_value:
+; GFX11DAGISEL:       ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_sub_i32 s3, 0, s0
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s3, s2
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
+; GFX11GISEL-LABEL: poison_value:
+; GFX11GISEL:       ; %bb.0: ; %entry
+; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1)
   store i32 %result, ptr addrspace(1) %out
@@ -1531,204 +1330,79 @@ entry:
 define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: const_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX8DAGISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX8DAGISEL-NEXT:    s_mul_i32 s4, s2, 0x7b
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s5, 0x7b, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: const_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX8GISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX8GISEL-NEXT:    s_mul_i32 s4, s2, 0x7b
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s5, 0x7b, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: const_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, s2
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s2, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s5, 0x7b, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: const_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, s2
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX9GISEL-NEXT:    s_mul_i32 s4, s2, 0x7b
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s5, 0x7b, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1064DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1064DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1064GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1064GISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1064GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1032DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1032DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1032GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1032GISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1032GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: const_value_i64:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
+; GFX10DAGISEL-NEXT:    s_endpgm
+;
+; GFX10GISEL-LABEL: const_value_i64:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX1164DAGISEL-LABEL: const_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1164DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1164DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1164GISEL-LABEL: const_value_i64:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1164GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1164GISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1164GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
@@ -1736,41 +1410,16 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1132DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1132GISEL-LABEL: const_value_i64:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1132GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s5, s2, 0
-; GFX1132GISEL-NEXT:    s_mulk_i32 s3, 0x7b
-; GFX1132GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
@@ -1783,249 +1432,35 @@ entry:
 define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX8DAGISEL-LABEL: poison_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX8DAGISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s4, s0, s2
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s5, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX8GISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s4, s0, s2
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s5, s0, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX9DAGISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s0, s2
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s5, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_sub_i32 s3, 0, s2
-; GFX9GISEL-NEXT:    s_ashr_i32 s2, s3, 31
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s4, s0, s2
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s5, s0, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s3, s1, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s5, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1064DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1064DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: poison_value_i64:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1064GISEL-LABEL: poison_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1064GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1064GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
+; GFX10GISEL-LABEL: poison_value_i64:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1032DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1032DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
+; GFX11DAGISEL-LABEL: poison_value_i64:
+; GFX11DAGISEL:       ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1032GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1032GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1164DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1164DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: poison_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1164GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1164GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1132DAGISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_sub_i32 s2, 0, s2
-; GFX1132GISEL-NEXT:    s_ashr_i32 s3, s2, 31
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s5, s1, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s0, s3
-; GFX1132GISEL-NEXT:    s_add_u32 s4, s4, s5
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
+; GFX11GISEL-LABEL: poison_value_i64:
+; GFX11GISEL:       ; %bb.0: ; %entry
+; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1)
   store i64 %result, ptr addrspace(1) %out
@@ -2890,8 +2325,3 @@ endif:
   store i64 %combine, ptr addrspace(1) %out
   ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX10DAGISEL: {{.*}}
-; GFX10GISEL: {{.*}}
-; GFX11DAGISEL: {{.*}}
-; GFX11GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
index b96954d030fef..a5ad8d592580e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll
@@ -204,27 +204,19 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: const_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: const_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GISEL-NEXT:    s_mulk_i32 s2, 0x7b
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, 0x7b
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
@@ -232,12 +224,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX9DAGISEL-LABEL: const_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
 ; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
@@ -245,79 +233,35 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX9GISEL-LABEL: const_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: const_value:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
+; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
+; GFX10DAGISEL-NEXT:    s_endpgm
+;
+; GFX10GISEL-LABEL: const_value:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
+; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX1164DAGISEL-LABEL: const_value:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
@@ -325,14 +269,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1164GISEL-LABEL: const_value:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
@@ -340,13 +278,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
@@ -354,14 +286,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
 ; GFX1132GISEL-LABEL: const_value:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
@@ -374,165 +299,35 @@ entry:
 define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
 ; GFX8DAGISEL-LABEL: poison_value:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT:    flat_store_dword v[0:1], v2
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: poison_value:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: poison_value:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1032DAGISEL-LABEL: poison_value:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT:    global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
+; GFX10GISEL-LABEL: poison_value:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_endpgm
 ;
-; GFX1032GISEL-LABEL: poison_value:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
+; GFX11DAGISEL-LABEL: poison_value:
+; GFX11DAGISEL:       ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164DAGISEL-LABEL: poison_value:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
-;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
-;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT:    global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
-;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT:    global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
+; GFX11GISEL-LABEL: poison_value:
+; GFX11GISEL:       ; %bb.0: ; %entry
+; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1)
   store i32 %result, ptr addrspace(1) %out
@@ -1499,173 +1294,78 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX8DAGISEL-LABEL: const_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
 ; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT:    s_mul_i32 s4, s3, 0
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s3, s4
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: const_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
 ; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT:    s_mul_i32 s4, s3, 0
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
 ; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: const_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s3, 0
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: const_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
 ; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT:    s_mul_i32 s4, s3, 0
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s3, 0x7b, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: const_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s2, 0
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1064DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s2, 0
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1064GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s2, 0
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1032DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s2, 0
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1032GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: const_value_i64:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT:    global_store_dwordx2 v1, v[0:1], s[0:1]
+; GFX10DAGISEL-NEXT:    s_endpgm
+;
+; GFX10GISEL-LABEL: const_value_i64:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT:    v_mov_b32_e32 v2, 0
+; GFX10GISEL-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10GISEL-NEXT:    s_endpgm
 ;
 ; GFX1164DAGISEL-LABEL: const_value_i64:
 ; GFX1164DAGISEL:       ; %bb.0: ; %entry
 ; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s2, 0
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1164DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, 0
+; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
 ; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1164DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1164DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1164GISEL-LABEL: const_value_i64:
 ; GFX1164GISEL:       ; %bb.0: ; %entry
 ; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s2, 0
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1164GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1164GISEL-NEXT:    s_endpgm
@@ -1673,35 +1373,16 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
 ; GFX1132DAGISEL-LABEL: const_value_i64:
 ; GFX1132DAGISEL:       ; %bb.0: ; %entry
 ; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s2, 0
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1132DAGISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
+; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, 0x7b
 ; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
+; GFX1132DAGISEL-NEXT:    global_store_b64 v1, v[0:1], s[0:1]
 ; GFX1132DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX1132GISEL-LABEL: const_value_i64:
 ; GFX1132GISEL:       ; %bb.0: ; %entry
 ; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s2, 0
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s4, 0x7b, s2
-; GFX1132GISEL-NEXT:    s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX1132GISEL-NEXT:    v_mov_b32_e32 v0, 0x7b
+; GFX1132GISEL-NEXT:    v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0
 ; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
 ; GFX1132GISEL-NEXT:    s_endpgm
@@ -1714,212 +1395,35 @@ entry:
 define amdgpu_kernel void @poison_value_i64(ptr addrspace(1) %out, i64 %in) {
 ; GFX8DAGISEL-LABEL: poison_value_i64:
 ; GFX8DAGISEL:       ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX8DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8DAGISEL-NEXT:    s_mul_i32 s4, s1, s3
-; GFX8DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s3
-; GFX8DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
 ; GFX8DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX8GISEL-LABEL: poison_value_i64:
 ; GFX8GISEL:       ; %bb.0: ; %entry
-; GFX8GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX8GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX8GISEL-NEXT:    s_mul_i32 s4, s1, s3
-; GFX8GISEL-NEXT:    s_mul_hi_u32 s3, s0, s3
-; GFX8GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8GISEL-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8GISEL-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
 ; GFX8GISEL-NEXT:    s_endpgm
 ;
 ; GFX9DAGISEL-LABEL: poison_value_i64:
 ; GFX9DAGISEL:       ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9DAGISEL-NEXT:    s_mul_i32 s4, s1, s3
-; GFX9DAGISEL-NEXT:    s_mul_hi_u32 s3, s0, s3
-; GFX9DAGISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9DAGISEL-NEXT:    s_endpgm
 ;
 ; GFX9GISEL-LABEL: poison_value_i64:
 ; GFX9GISEL:       ; %bb.0: ; %entry
-; GFX9GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT:    s_and_b32 s3, s2, 1
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT:    s_mul_i32 s2, s0, s3
-; GFX9GISEL-NEXT:    s_mul_i32 s4, s1, s3
-; GFX9GISEL-NEXT:    s_mul_hi_u32 s3, s0, s3
-; GFX9GISEL-NEXT:    s_add_u32 s3, s3, s4
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
 ; GFX9GISEL-NEXT:    s_endpgm
 ;
-; GFX1064DAGISEL-LABEL: poison_value_i64:
-; GFX1064DAGISEL:       ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s3, s1, s2
-; GFX1064DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1064DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064DAGISEL-NEXT:    s_endpgm
-;
-; GFX1064GISEL-LABEL: poison_value_i64:
-; GFX1064GISEL:       ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1064GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1064GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT:    s_mul_i32 s3, s1, s2
-; GFX1064GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1064GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1064GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1064GISEL-NEXT:    s_endpgm
-;
-; GFX1032DAGISEL-LABEL: poison_value_i64:
-; GFX1032DAGISEL:       ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s3, s1, s2
-; GFX1032DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1032DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032DAGISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032DAGISEL-NEXT:    s_endpgm
-;
-; GFX1032GISEL-LABEL: poison_value_i64:
-; GFX1032GISEL:       ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1032GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1032GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT:    s_mul_i32 s3, s1, s2
-; GFX1032GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1032GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1032GISEL-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX1032GISEL-NEXT:    s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value_i64:
-; GFX1164DAGISEL:       ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164DAGISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s3, s1, s2
-; GFX1164DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1164DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164DAGISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164DAGISEL-NEXT:    s_endpgm
+; GFX10DAGISEL-LABEL: poison_value_i64:
+; GFX10DAGISEL:       ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1164GISEL-LABEL: poison_value_i64:
-; GFX1164GISEL:       ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT:    s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1164GISEL-NEXT:    s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1164GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT:    s_mul_i32 s3, s1, s2
-; GFX1164GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1164GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT:    v_mov_b32_e32 v1, s3
-; GFX1164GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1164GISEL-NEXT:    s_endpgm
+; GFX10GISEL-LABEL: poison_value_i64:
+; GFX10GISEL:       ; %bb.0: ; %entry
+; GFX10GISEL-NEXT:    s_endpgm
 ;
-; GFX1132DAGISEL-LABEL: poison_value_i64:
-; GFX1132DAGISEL:       ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132DAGISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s3, s1, s2
-; GFX1132DAGISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1132DAGISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132DAGISEL-NEXT:    v_mov_b32_e32 v0, s2
-; GFX1132DAGISEL-NEXT:    v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s3
-; GFX1132DAGISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132DAGISEL-NEXT:    s_endpgm
+; GFX11DAGISEL-LABEL: poison_value_i64:
+; GFX11DAGISEL:       ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT:    s_endpgm
 ;
-; GFX1132GISEL-LABEL: poison_value_i64:
-; GFX1132GISEL:       ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT:    s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT:    s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT:    v_mov_b32_e32 v2, 0
-; GFX1132GISEL-NEXT:    s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    s_and_b32 s2, s2, 1
-; GFX1132GISEL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT:    s_mul_i32 s3, s1, s2
-; GFX1132GISEL-NEXT:    s_mul_hi_u32 s4, s0, s2
-; GFX1132GISEL-NEXT:    s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT:    s_add_u32 s3, s4, s3
-; GFX1132GISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT:    v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
-; GFX1132GISEL-NEXT:    global_store_b64 v2, v[0:1], s[0:1]
-; GFX1132GISEL-NEXT:    s_endpgm
+; GFX11GISEL-LABEL: poison_value_i64:
+; GFX11GISEL:       ; %bb.0: ; %entry
+; GFX11GISEL-NEXT:    s_endpgm
 entry:
   %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1)
   store i64 %result, ptr addrspace(1) %out
@@ -2692,8 +2196,3 @@ endif:
   store i64 %combine, ptr addrspace(1) %out
   ret void
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX10DAGISEL: {{.*}}
-; GFX10GISEL: {{.*}}
-; GFX11DAGISEL: {{.*}}
-; GFX11GISEL: {{.*}}
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
index 863598578ea77..a27a121a3af61 100644
--- a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
@@ -2,13 +2,14 @@
 ; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
 
 ; --------------------------------------------------------------------
-; llvm.amdgcn.wave.reduce.umin.i32
+; llvm.amdgcn.wave.reduce.umin
 ; --------------------------------------------------------------------
 
 declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.umin.i64(i64, i32 immarg)
 
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_poison(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -19,8 +20,8 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i32_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -31,14 +32,94 @@ entry:
   ret void
 }
 
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.umin.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.min
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.min.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.min.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.min.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_min_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_min_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.min.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
 ; --------------------------------------------------------------------
-; llvm.amdgcn.wave.reduce.umin.i32
+; llvm.amdgcn.wave.reduce.umax
 ; --------------------------------------------------------------------
 
 declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.umax.i64(i64, i32 immarg)
 
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_poison(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -49,8 +130,8 @@ entry:
   ret void
 }
 
-define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) {
-; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const(
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i32_const(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
 ; CHECK-NEXT:    ret void
@@ -61,6 +142,30 @@ entry:
   ret void
 }
 
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.umax.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
 @gv = constant i32 0
 define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_gv(ptr addrspace(1) %out) {
 ; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_gv(
@@ -74,3 +179,333 @@ entry:
   store i32 %result, ptr addrspace(1) %out
   ret void
 }
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.max
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.max.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.max.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.max.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_max_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_max_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.max.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.add
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.add.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.add.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_add_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_add_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.add.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.sub
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.sub.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.sub.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.sub.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_sub_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_sub_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.sub.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.and
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.and.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.and.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.and.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_and_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_and_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.and.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.or
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.or.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.or.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.or.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_or_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_or_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.or.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.xor
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.xor.i32(i32, i32 immarg)
+declare i64 @llvm.amdgcn.wave.reduce.xor.i64(i64, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 poison, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i32_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i32_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i32 @llvm.amdgcn.wave.reduce.xor.i32(i32 123, i32 1)
+  store i32 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_poison(ptr addrspace(1) %out, i64 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_poison(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 poison, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_xor_i64_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_xor_i64_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i64 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %result = call i64 @llvm.amdgcn.wave.reduce.xor.i64(i64 123, i32 1)
+  store i64 %result, ptr addrspace(1) %out
+  ret void
+}



More information about the llvm-branch-commits mailing list