[llvm] 8020458 - [AMDGPU] Changing S_AND_B32 to V_AND_B32_e64 in the divergent 'trunc' to i1 pattern
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 24 07:22:29 PST 2021
Author: alex-t
Date: 2021-12-24T18:24:49+03:00
New Revision: 8020458c5dc2be841c07d26ff75b5471314e6631
URL: https://github.com/llvm/llvm-project/commit/8020458c5dc2be841c07d26ff75b5471314e6631
DIFF: https://github.com/llvm/llvm-project/commit/8020458c5dc2be841c07d26ff75b5471314e6631.diff
LOG: [AMDGPU] Changing S_AND_B32 to V_AND_B32_e64 in the divergent 'trunc' to i1 pattern
In 'trunc' i16/32/64 to i1 pattern the 'and $src, 1' node supply operand to 'setcc'.
The latter is selected to S_CMP_EQ/V_CMP_EQ dependent on the divergence. In case the 'and' is scalar
and 'setcc' is divergent, we need VGPR to SGPR copy to adjust input operand for V_CMP_EQ.
This patch changes the S_AND_B32 to V_AND_B32_e64 in the 'trunc to i1' divergent patterns.
Reviewed By: rampitec
Differential Revision: https://reviews.llvm.org/D116241
Added:
llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
Modified:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/lib/Target/AMDGPU/VOPInstructions.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 95744b6390c56..636337ede000b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2212,18 +2212,18 @@ def : GCNPat <
>;
def : GCNPat <
- (i1 (trunc i32:$a)),
- (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
+ (i1 (DivergentUnaryFrag<trunc> i32:$a)),
+ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
>;
def : GCNPat <
- (i1 (trunc i16:$a)),
- (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1), $a), (i32 1))
+ (i1 (DivergentUnaryFrag<trunc> i16:$a)),
+ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
>;
def : GCNPat <
- (i1 (trunc i64:$a)),
- (V_CMP_EQ_U32_e64 (S_AND_B32 (i32 1),
+ (i1 (DivergentUnaryFrag<trunc> i64:$a)),
+ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1),
(i32 (EXTRACT_SUBREG $a, sub0))), (i32 1))
>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index a3eccf13cd719..a8368892c5650 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -794,6 +794,18 @@ class VOPPatGen<SDPatternOperator Op, VOPProfile P> {
list<dag> ret = [!con(Outs, (set Ins))];
}
+class DivergentUnaryFrag<SDPatternOperator Op> : PatFrag <
+ (ops node:$src0),
+ (Op $src0),
+ [{ return N->isDivergent(); }]> {
+ // This check is unnecessary as it's captured by the result register
+ // bank constraint.
+ //
+ // FIXME: Should add a way for the emitter to recognize this is a
+ // trivially true predicate to eliminate the check.
+ let GISelPredicateCode = [{return true;}];
+}
+
class VOPPatOrNull<SDPatternOperator Op, VOPProfile P> {
list<dag> ret = !if(!ne(P.NeedPatGen,PatGenMode.NoPattern), VOPPatGen<Op, P>.ret, []);
}
diff --git a/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
new file mode 100644
index 0000000000000..4429ee6f3ba60
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/divergence-driven-trunc-to-i1.ll
@@ -0,0 +1,59 @@
+; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: name: uniform_trunc_i16_to_i1
+; GCN: S_AND_B32 1
+; GCN: S_CMP_EQ_U32
+define amdgpu_kernel void @uniform_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) {
+ %setcc = icmp slt i16 %x, 0
+ %select = select i1 %setcc, i1 true, i1 %z
+ store i1 %select, i1 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: name: divergent_trunc_i16_to_i1
+; GCN: V_AND_B32_e64 1
+; GCN: V_CMP_EQ_U32_e64
+define i1 @divergent_trunc_i16_to_i1(i1 addrspace(1)* %out, i16 %x, i1 %z) {
+ %setcc = icmp slt i16 %x, 0
+ %select = select i1 %setcc, i1 true, i1 %z
+ ret i1 %select
+}
+
+; GCN-LABEL: name: uniform_trunc_i32_to_i1
+; GCN: S_AND_B32 1
+; GCN: S_CMP_EQ_U32
+define amdgpu_kernel void @uniform_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) {
+ %setcc = icmp slt i32 %x, 0
+ %select = select i1 %setcc, i1 true, i1 %z
+ store i1 %select, i1 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: name: divergent_trunc_i32_to_i1
+; GCN: V_AND_B32_e64 1
+; GCN: V_CMP_EQ_U32_e64
+define i1 @divergent_trunc_i32_to_i1(i1 addrspace(1)* %out, i32 %x, i1 %z) {
+ %setcc = icmp slt i32 %x, 0
+ %select = select i1 %setcc, i1 true, i1 %z
+ ret i1 %select
+}
+
+; GCN-LABEL: name: uniform_trunc_i64_to_i1
+; GCN: S_AND_B32 1
+; GCN: S_CMP_EQ_U32
+define amdgpu_kernel void @uniform_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) {
+ %setcc = icmp slt i64 %x, 0
+ %select = select i1 %setcc, i1 true, i1 %z
+ store i1 %select, i1 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: name: divergent_trunc_i64_to_i1
+; GCN: V_AND_B32_e64 1
+; GCN: V_CMP_EQ_U32_e64
+define i1 @divergent_trunc_i64_to_i1(i1 addrspace(1)* %out, i64 %x, i1 %z) {
+ %setcc = icmp slt i64 %x, 0
+ %select = select i1 %setcc, i1 true, i1 %z
+ ret i1 %select
+}
+
More information about the llvm-commits
mailing list