[llvm] 98d0970 - [AMDGPU] Re-enabling divergence predicates for min/max
via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 20 05:08:47 PST 2021
Author: alex-t
Date: 2021-12-20T16:10:55+03:00
New Revision: 98d09705e15c5927f41009e5627c926ba1e87c26
URL: https://github.com/llvm/llvm-project/commit/98d09705e15c5927f41009e5627c926ba1e87c26
DIFF: https://github.com/llvm/llvm-project/commit/98d09705e15c5927f41009e5627c926ba1e87c26.diff
LOG: [AMDGPU] Re-enabling divergence predicates for min/max
This patch enables divergence predicates for min/max nodes.
It makes ISD::MIN/MAX selected to S_MIN_I(U)32/S_MAX_I(U)32 or V_MIN_I(U)32_e64/V_MAX_I(U)32_e64
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D115954
Added:
llvm/test/CodeGen/AMDGPU/divergence-driven-min-max.ll
Modified:
llvm/lib/Target/AMDGPU/SOPInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 3354cbd40d3d..15f7491611e8 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -486,19 +486,18 @@ def S_SUBB_U32 : SOP2_32 <"s_subb_u32",
[(set i32:$sdst, (UniformBinFrag<sube> (i32 SSrc_b32:$src0), (i32 SSrc_b32:$src1)))]>;
} // End Uses = [SCC]
-
let isCommutable = 1 in {
def S_MIN_I32 : SOP2_32 <"s_min_i32",
- [(set i32:$sdst, (smin i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<smin> i32:$src0, i32:$src1))]
>;
def S_MIN_U32 : SOP2_32 <"s_min_u32",
- [(set i32:$sdst, (umin i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<umin> i32:$src0, i32:$src1))]
>;
def S_MAX_I32 : SOP2_32 <"s_max_i32",
- [(set i32:$sdst, (smax i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<smax> i32:$src0, i32:$src1))]
>;
def S_MAX_U32 : SOP2_32 <"s_max_u32",
- [(set i32:$sdst, (umax i32:$src0, i32:$src1))]
+ [(set i32:$sdst, (UniformBinFrag<umax> i32:$src0, i32:$src1))]
>;
} // End isCommutable = 1
} // End Defs = [SCC]
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-min-max.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-min-max.ll
new file mode 100755
index 000000000000..1a9f468c80d2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-min-max.ll
@@ -0,0 +1,73 @@
+; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: name: uniform_imin
+; GCN: S_MIN_I32
+define amdgpu_kernel void @uniform_imin(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %cmp = icmp sle i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: name: divergent_imin
+; GCN: V_MIN_I32_e64
+define void @divergent_imin(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %cmp = icmp sle i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: name: uniform_umin
+; GCN: S_MIN_U32
+define amdgpu_kernel void @uniform_umin(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %tmp = icmp ule i32 %a, %b
+ %val = select i1 %tmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; GCN-LABEL: name: divergent_umin
+; GCN: V_MIN_U32_e64
+define void @divergent_umin(i32 addrspace(1)* %out, i32 %a, i32 %b) #0 {
+ %tmp = icmp ule i32 %a, %b
+ %val = select i1 %tmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 8
+ ret void
+}
+
+; GCN-LABEL: name: uniform_imax
+; GCN: S_MAX_I32
+define amdgpu_kernel void @uniform_imax(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp sge i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: name: divergent_imax
+; GCN: V_MAX_I32_e64
+define void @divergent_imax(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp sge i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: name: uniform_umax
+; GCN: S_MAX_U32
+define amdgpu_kernel void @uniform_umax(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp uge i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; GCN-LABEL: name: divergent_umax
+; GCN: V_MAX_U32_e64
+define void @divergent_umax(i32 addrspace(1)* %out, i32 %a, i32 %b) nounwind {
+ %cmp = icmp uge i32 %a, %b
+ %val = select i1 %cmp, i32 %a, i32 %b
+ store i32 %val, i32 addrspace(1)* %out, align 4
+ ret void
+}
More information about the llvm-commits
mailing list