[PATCH] D118461: [AMDGPU] Introduce new ISel combine for trunc-slr patterns
Thomas Symalla via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 31 04:03:39 PST 2022
tsymalla updated this revision to Diff 404477.
tsymalla added a comment.
Added handling for scalar cases, improved test case.
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D118461/new/
https://reviews.llvm.org/D118461
Files:
llvm/lib/Target/AMDGPU/SIInstructions.td
llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
Index: llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -stop-after=amdgpu-isel -verify-machineinstrs -O0 < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: bb.0.entry:
+; GCN-NOT: V_LSHRREV_B32_e64
+; GCN: V_AND_B32_e64 2
+; GCN: V_CMP_EQ_U32_e64 killed {{.*}}, 2
+define i32 @divergent_lshr_and_cmp(i32 %x) {
+entry:
+ %0 = and i32 %x, 2
+ %1 = icmp ne i32 %0, 0
+ ; Prevent removal of truncate in SDag by inserting llvm.amdgcn.if
+ br i1 %1, label %out.true, label %out.else
+
+out.true:
+ %2 = shl i32 %x, 2
+ ret i32 %2
+
+out.else:
+ ret i32 %x
+}
+
+; GCN-LABEL: bb.0.entry:
+; GCN: S_AND_B32 2
+; GCN: S_CMP_EQ_U32 killed %{{.*}}, 2
+define amdgpu_kernel void @uniform_opt_lshr_and_cmp(i1 addrspace(1)* %out, i32 %x) {
+entry:
+ %0 = and i32 %x, 2
+ %1 = icmp ne i32 %0, 0
+ ; Don't optimize the truncate in the SDag away.
+ br i1 %1, label %out.true, label %out.else
+
+out.true:
+ %2 = xor i1 %1, -1
+ store i1 %2, i1 addrspace(1)* %out
+ ret void
+
+out.else:
+ store i1 %1, i1 addrspace(1)* %out
+ ret void
+}
\ No newline at end of file
Index: llvm/lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstructions.td
+++ llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2269,6 +2269,40 @@
(V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
>;
+// Restrict the range to prevent using an additional VGPR
+// for the shifted value.
+def IMMBitSelRange : ImmLeaf <i32, [{
+ return Imm > 0 && Imm < 16;
+}]>;
+
+def IMMBitSelConst : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant((1 << N->getZExtValue()), SDLoc(N),
+ MVT::i32);
+}]>;
+
+// Matching separate SRL and TRUNC instructions
+// with dependent operands (SRL dest is source of TRUNC)
+// generates three instructions. However, by using bit shifts,
+// the V_LSHRREV_B32_e64 result can be directly used in the
+// operand of the V_AND_B32_e64 instruction:
+// (trunc i32 (srl i32 $a, i32 $b)) ->
+// v_and_b32_e64 $a, (1 << $b), $a
+// v_cmp_eq_u32_e64 $a, (1 << $b), $a
+
+// Handle the VALU case.
+def : GCNPat <
+ (i1 (DivergentUnaryFrag<trunc> (i32 (srl i32:$a, IMMBitSelRange:$b)))),
+ (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 (IMMBitSelConst $b)), $a),
+ (i32 (IMMBitSelConst $b)))
+>;
+
+// Handle the scalar case.
+def : GCNPat <
+ (i1 (UniformUnaryFrag<trunc> (i32 (srl i32:$a, IMMBitSelRange:$b)))),
+ (S_CMP_EQ_U32 (S_AND_B32 (i32 (IMMBitSelConst $b)), $a),
+ (i32 (IMMBitSelConst $b)))
+>;
+
def : GCNPat <
(i1 (DivergentUnaryFrag<trunc> i64:$a)),
(V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1),
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D118461.404477.patch
Type: text/x-patch
Size: 2878 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220131/555fca11/attachment.bin>
More information about the llvm-commits
mailing list