[llvm-branch-commits] [llvm] [AMDGPU] Propagate Constants for Wave Reduction Intrinsics (PR #150395)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Jul 24 02:55:13 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
Author: Aaditya (easyonaadit)
<details>
<summary>Changes</summary>
---
Patch is 169.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/150395.diff
9 Files Affected:
- (modified) llvm/lib/Analysis/ConstantFolding.cpp (+14)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll (+88-532)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.and.ll (+8-80)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.max.ll (+8-80)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.min.ll (+8-80)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.or.ll (+8-80)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.sub.ll (+88-658)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.xor.ll (+88-589)
- (modified) llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll (+445-10)
``````````diff
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
old mode 100644
new mode 100755
index e71ba5ea5521e..11d22c75831d0
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1647,6 +1647,13 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::amdgcn_perm:
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_wave_reduce_max:
+ case Intrinsic::amdgcn_wave_reduce_min:
+ case Intrinsic::amdgcn_wave_reduce_add:
+ case Intrinsic::amdgcn_wave_reduce_sub:
+ case Intrinsic::amdgcn_wave_reduce_and:
+ case Intrinsic::amdgcn_wave_reduce_or:
+ case Intrinsic::amdgcn_wave_reduce_xor:
case Intrinsic::amdgcn_s_wqm:
case Intrinsic::amdgcn_s_quadmask:
case Intrinsic::amdgcn_s_bitreplicate:
@@ -3520,6 +3527,13 @@ static Constant *ConstantFoldIntrinsicCall2(Intrinsic::ID IntrinsicID, Type *Ty,
return ConstantInt::get(Ty, C0->abs());
case Intrinsic::amdgcn_wave_reduce_umin:
case Intrinsic::amdgcn_wave_reduce_umax:
+ case Intrinsic::amdgcn_wave_reduce_max:
+ case Intrinsic::amdgcn_wave_reduce_min:
+ case Intrinsic::amdgcn_wave_reduce_add:
+ case Intrinsic::amdgcn_wave_reduce_sub:
+ case Intrinsic::amdgcn_wave_reduce_and:
+ case Intrinsic::amdgcn_wave_reduce_or:
+ case Intrinsic::amdgcn_wave_reduce_xor:
return dyn_cast<Constant>(Operands[0]);
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
index b6af8b4bb798d..d3d9275a566e2 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.add.ll
@@ -191,10 +191,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: const_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
@@ -204,10 +201,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX8GISEL-LABEL: const_value:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
@@ -217,11 +211,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: const_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
@@ -229,72 +220,35 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX9GISEL-LABEL: const_value:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: const_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: const_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: const_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: const_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
+; GFX10DAGISEL-LABEL: const_value:
+; GFX10DAGISEL: ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
+; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10DAGISEL-NEXT: s_endpgm
+;
+; GFX10GISEL-LABEL: const_value:
+; GFX10GISEL: ; %bb.0: ; %entry
+; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1]
+; GFX10GISEL-NEXT: s_endpgm
;
; GFX1164DAGISEL-LABEL: const_value:
; GFX1164DAGISEL: ; %bb.0: ; %entry
; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
+; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b
; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1164DAGISEL-NEXT: s_endpgm
@@ -302,12 +256,8 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1164GISEL-LABEL: const_value:
; GFX1164GISEL: ; %bb.0: ; %entry
; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
+; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1164GISEL-NEXT: s_endpgm
@@ -315,12 +265,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1132DAGISEL-LABEL: const_value:
; GFX1132DAGISEL: ; %bb.0: ; %entry
; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
+; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b
; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX1132DAGISEL-NEXT: s_endpgm
@@ -328,12 +273,7 @@ define amdgpu_kernel void @const_value(ptr addrspace(1) %out) {
; GFX1132GISEL-LABEL: const_value:
; GFX1132GISEL: ; %bb.0: ; %entry
; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: s_mulk_i32 s2, 0x7b
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0
; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
; GFX1132GISEL-NEXT: s_endpgm
@@ -346,152 +286,35 @@ entry:
define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-LABEL: poison_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8DAGISEL-NEXT: s_endpgm
;
; GFX8GISEL-LABEL: poison_value:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2
; GFX8GISEL-NEXT: s_endpgm
;
; GFX9DAGISEL-LABEL: poison_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
; GFX9GISEL-LABEL: poison_value:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
-; GFX1064DAGISEL-LABEL: poison_value:
-; GFX1064DAGISEL: ; %bb.0: ; %entry
-; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1064DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1064DAGISEL-NEXT: s_endpgm
-;
-; GFX1064GISEL-LABEL: poison_value:
-; GFX1064GISEL: ; %bb.0: ; %entry
-; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064GISEL-NEXT: s_endpgm
-;
-; GFX1032DAGISEL-LABEL: poison_value:
-; GFX1032DAGISEL: ; %bb.0: ; %entry
-; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1032DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1]
-; GFX1032DAGISEL-NEXT: s_endpgm
-;
-; GFX1032GISEL-LABEL: poison_value:
-; GFX1032GISEL: ; %bb.0: ; %entry
-; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032GISEL-NEXT: s_endpgm
-;
-; GFX1164DAGISEL-LABEL: poison_value:
-; GFX1164DAGISEL: ; %bb.0: ; %entry
-; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX1164DAGISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2
-; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1164DAGISEL-NEXT: s_endpgm
+; GFX10DAGISEL-LABEL: poison_value:
+; GFX10DAGISEL: ; %bb.0: ; %entry
+; GFX10DAGISEL-NEXT: s_endpgm
;
-; GFX1164GISEL-LABEL: poison_value:
-; GFX1164GISEL: ; %bb.0: ; %entry
-; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1164GISEL-NEXT: s_bcnt1_i32_b64 s2, s[2:3]
-; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1164GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1164GISEL-NEXT: s_endpgm
+; GFX10GISEL-LABEL: poison_value:
+; GFX10GISEL: ; %bb.0: ; %entry
+; GFX10GISEL-NEXT: s_endpgm
;
-; GFX1132DAGISEL-LABEL: poison_value:
-; GFX1132DAGISEL: ; %bb.0: ; %entry
-; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX1132DAGISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132DAGISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2
-; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1]
-; GFX1132DAGISEL-NEXT: s_endpgm
+; GFX11DAGISEL-LABEL: poison_value:
+; GFX11DAGISEL: ; %bb.0: ; %entry
+; GFX11DAGISEL-NEXT: s_endpgm
;
-; GFX1132GISEL-LABEL: poison_value:
-; GFX1132GISEL: ; %bb.0: ; %entry
-; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
-; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v1, 0
-; GFX1132GISEL-NEXT: s_bcnt1_i32_b32 s2, s2
-; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1132GISEL-NEXT: s_mul_i32 s2, s0, s2
-; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
-; GFX1132GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1]
-; GFX1132GISEL-NEXT: s_endpgm
+; GFX11GISEL-LABEL: poison_value:
+; GFX11GISEL: ; %bb.0: ; %entry
+; GFX11GISEL-NEXT: s_endpgm
entry:
%result = call i32 @llvm.amdgcn.wave.reduce.add.i32(i32 poison, i32 1)
store i32 %result, ptr addrspace(1) %out
@@ -1431,33 +1254,21 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: const_value_i64:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX8DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8DAGISEL-NEXT: s_add_u32 s3, s4, s3
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s3
-; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1
+; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0
+; GFX8DAGISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8DAGISEL-NEXT: s_endpgm
;
; GFX8GISEL-LABEL: const_value_i64:
; GFX8GISEL: ; %bb.0: ; %entry
; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX8GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX8GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX8GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX8GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX8GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX8GISEL-NEXT: v_mov_b32_e32 v0, 0x7b
+; GFX8GISEL-NEXT: v_mov_b32_e32 v1, 0
; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX8GISEL-NEXT: v_mov_b32_e32 v3, s1
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s3
; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s0
; GFX8GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
; GFX8GISEL-NEXT: s_endpgm
@@ -1465,129 +1276,56 @@ define amdgpu_kernel void @const_value_i64(ptr addrspace(1) %out) {
; GFX9DAGISEL-LABEL: const_value_i64:
; GFX9DAGISEL: ; %bb.0: ; %entry
; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9DAGISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9DAGISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9DAGISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX9DAGISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9DAGISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v2, 0
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s3
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0
+; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0x7b
; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9DAGISEL-NEXT: global_store_dwordx2 v1, v[0:1], s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
; GFX9GISEL-LABEL: const_value_i64:
; GFX9GISEL: ; %bb.0: ; %entry
; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
-; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec
-; GFX9GISEL-NEXT: s_bcnt1_i32_b64 s3, s[2:3]
-; GFX9GISEL-NEXT: s_mul_i32 s2, s3, 0x7b
-; GFX9GISEL-NEXT: s_mul_hi_u32 s4, 0x7b, s3
-; GFX9GISEL-NEXT: s_mul_i32 s3, s3, 0
-; GFX9GISEL-NEXT: s_add_u32 s3, s4, s3
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX9GISEL-NEXT: v_mov_b32_e32 v1, s...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/150395
More information about the llvm-branch-commits
mailing list