[llvm] 1462053 - [AMDGPU] Propagate constants for llvm.amdgcn.wave.reduce.umin/umax
Pravin Jagtap via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 26 20:49:41 PDT 2023
Author: Pravin Jagtap
Date: 2023-07-26T23:46:01-04:00
New Revision: 1462053608041c34e871b7d0a565f4ca541e2fd1
URL: https://github.com/llvm/llvm-project/commit/1462053608041c34e871b7d0a565f4ca541e2fd1
DIFF: https://github.com/llvm/llvm-project/commit/1462053608041c34e871b7d0a565f4ca541e2fd1.diff
LOG: [AMDGPU] Propagate constants for llvm.amdgcn.wave.reduce.umin/umax
Reviewed By: arsenm, #amdgpu
Differential Revision: https://reviews.llvm.org/D156077
Added:
llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
Modified:
llvm/lib/Analysis/ConstantFolding.cpp
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
Removed:
################################################################################
diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 38cccb3ea3c259..83e654e136222f 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -1548,6 +1548,8 @@ bool llvm::canConstantFoldCallTo(const CallBase *Call, const Function *F) {
case Intrinsic::vector_reduce_umax:
// Target intrinsics
case Intrinsic::amdgcn_perm:
+ case Intrinsic::amdgcn_wave_reduce_umin:
+ case Intrinsic::amdgcn_wave_reduce_umax:
case Intrinsic::arm_mve_vctp8:
case Intrinsic::arm_mve_vctp16:
case Intrinsic::arm_mve_vctp32:
@@ -2839,6 +2841,9 @@ static Constant *ConstantFoldScalarCall2(StringRef Name,
return Constant::getNullValue(Ty);
return ConstantInt::get(Ty, C0->abs());
+ case Intrinsic::amdgcn_wave_reduce_umin:
+ case Intrinsic::amdgcn_wave_reduce_umax:
+ return dyn_cast<Constant>(Operands[0]);
}
return nullptr;
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
index edd2d2e912c9d7..3e2db80db43db9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umax.ll
@@ -242,72 +242,34 @@ entry:
define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
; GFX8DAGISEL-LABEL: poison_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
; GFX8DAGISEL-NEXT: s_endpgm
;
; GFX8GISEL-LABEL: poison_value:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
; GFX8GISEL-NEXT: s_endpgm
;
; GFX9DAGISEL-LABEL: poison_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
; GFX9GISEL-LABEL: poison_value:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
; GFX10DAGISEL-LABEL: poison_value:
; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
; GFX10DAGISEL-NEXT: s_endpgm
;
; GFX10GISEL-LABEL: poison_value:
; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
; GFX10GISEL-NEXT: s_endpgm
;
; GFX11DAGISEL-LABEL: poison_value:
; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT: s_nop 0
-; GFX11DAGISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11DAGISEL-NEXT: s_endpgm
;
; GFX11GISEL-LABEL: poison_value:
; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT: s_nop 0
-; GFX11GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11GISEL-NEXT: s_endpgm
entry:
%result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
index 98e4795c83fc68..19247fa0b84f35 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.reduce.umin.ll
@@ -240,75 +240,37 @@ entry:
ret void
}
-define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) {
+define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: poison_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
-; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v0
; GFX8DAGISEL-NEXT: s_endpgm
;
; GFX8GISEL-LABEL: poison_value:
; GFX8GISEL: ; %bb.0: ; %entry
-; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0
-; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1
-; GFX8GISEL-NEXT: flat_store_dword v[0:1], v0
; GFX8GISEL-NEXT: s_endpgm
;
; GFX9DAGISEL-LABEL: poison_value:
; GFX9DAGISEL: ; %bb.0: ; %entry
-; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
; GFX9DAGISEL-NEXT: s_endpgm
;
; GFX9GISEL-LABEL: poison_value:
; GFX9GISEL: ; %bb.0: ; %entry
-; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9GISEL-NEXT: global_store_dword v0, v0, s[0:1]
; GFX9GISEL-NEXT: s_endpgm
;
; GFX10DAGISEL-LABEL: poison_value:
; GFX10DAGISEL: ; %bb.0: ; %entry
-; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10DAGISEL-NEXT: global_store_dword v0, v0, s[0:1]
; GFX10DAGISEL-NEXT: s_endpgm
;
; GFX10GISEL-LABEL: poison_value:
; GFX10GISEL: ; %bb.0: ; %entry
-; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10GISEL-NEXT: global_store_dword v0, v0, s[0:1]
; GFX10GISEL-NEXT: s_endpgm
;
; GFX11DAGISEL-LABEL: poison_value:
; GFX11DAGISEL: ; %bb.0: ; %entry
-; GFX11DAGISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
-; GFX11DAGISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11DAGISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11DAGISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11DAGISEL-NEXT: s_nop 0
-; GFX11DAGISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11DAGISEL-NEXT: s_endpgm
;
; GFX11GISEL-LABEL: poison_value:
; GFX11GISEL: ; %bb.0: ; %entry
-; GFX11GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
-; GFX11GISEL-NEXT: v_mov_b32_e32 v0, 0
-; GFX11GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11GISEL-NEXT: global_store_b32 v0, v0, s[0:1]
-; GFX11GISEL-NEXT: s_nop 0
-; GFX11GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
; GFX11GISEL-NEXT: s_endpgm
entry:
%result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1)
@@ -316,7 +278,7 @@ entry:
ret void
}
-define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) {
+define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) {
; GFX8DAGISEL-LABEL: divergent_value:
; GFX8DAGISEL: ; %bb.0: ; %entry
; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
diff --git a/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
new file mode 100644
index 00000000000000..863598578ea772
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/ConstProp/AMDGPU/wave.reduce.ll
@@ -0,0 +1,76 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.umin.i32
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umin_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umin_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.wave.reduce.umin.i32
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg)
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_poison(ptr addrspace(1) %out, i32 %in) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_poison(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 poison, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_const(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 123, ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
+
+ at gv = constant i32 0
+define amdgpu_kernel void @test_constant_fold_wave_reduce_umax_gv(ptr addrspace(1) %out) {
+; CHECK-LABEL: @test_constant_fold_wave_reduce_umax_gv(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 ptrtoint (ptr @gv to i32), i32 1)
+; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT:%.*]], align 4
+; CHECK-NEXT: ret void
+;
+entry:
+ %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 ptrtoint (ptr @gv to i32), i32 1)
+ store i32 %result, ptr addrspace(1) %out
+ ret void
+}
More information about the llvm-commits
mailing list