[PATCH] D118461: [AMDGPU] Introduce new ISel combine for trunc-slr patterns

Thomas Symalla via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 31 04:03:39 PST 2022


tsymalla updated this revision to Diff 404477.
tsymalla added a comment.

Added handling for scalar cases, improved test case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D118461/new/

https://reviews.llvm.org/D118461

Files:
  llvm/lib/Target/AMDGPU/SIInstructions.td
  llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll


Index: llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/AMDGPU/dagcombine-lshr-and-cmp.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=amdgcn -mtriple=amdgcn-- -stop-after=amdgpu-isel -verify-machineinstrs -O0 < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: bb.0.entry:
+; GCN-NOT:      V_LSHRREV_B32_e64
+; GCN:          V_AND_B32_e64 2
+; GCN:          V_CMP_EQ_U32_e64 killed {{.*}}, 2
+define i32 @divergent_lshr_and_cmp(i32 %x) {
+entry:
+  %0 = and i32 %x, 2
+  %1 = icmp ne i32 %0, 0
+  ; Prevent removal of truncate in SDag by inserting llvm.amdgcn.if
+  br i1 %1, label %out.true, label %out.else
+
+out.true:
+  %2 = shl i32 %x, 2
+  ret i32 %2
+
+out.else:
+  ret i32 %x
+}
+
+; GCN-LABEL: bb.0.entry:
+; GCN:          S_AND_B32 2
+; GCN:          S_CMP_EQ_U32 killed %{{.*}}, 2
+define amdgpu_kernel void @uniform_opt_lshr_and_cmp(i1 addrspace(1)* %out, i32 %x) {
+entry:
+  %0 = and i32 %x, 2
+  %1 = icmp ne i32 %0, 0
+  ; Don't optimize the truncate in the SDag away.
+  br i1 %1, label %out.true, label %out.else
+
+out.true:
+  %2 = xor i1 %1, -1
+  store i1 %2, i1 addrspace(1)* %out
+  ret void
+
+out.else:
+  store i1 %1, i1 addrspace(1)* %out
+  ret void
+}
\ No newline at end of file
Index: llvm/lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- llvm/lib/Target/AMDGPU/SIInstructions.td
+++ llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2269,6 +2269,40 @@
   (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1), $a), (i32 1))
 >;
 
+// Restrict the range to prevent using an additional VGPR
+// for the shifted value.
+def IMMBitSelRange : ImmLeaf <i32, [{
+  return Imm > 0 && Imm < 16;
+}]>;
+
+def IMMBitSelConst : SDNodeXForm<imm, [{
+  return CurDAG->getTargetConstant((1 << N->getZExtValue()), SDLoc(N),
+                                   MVT::i32);
+}]>;
+
+// Matching separate SRL and TRUNC instructions
+// with dependent operands (SRL dest is source of TRUNC)
+// generates three instructions. However, by using bit shifts,
+// the V_LSHRREV_B32_e64 result can be directly used in the
+// operand of the V_AND_B32_e64 instruction:
+// (trunc i32 (srl i32 $a, i32 $b)) ->
+// v_and_b32_e64 $a, (1 << $b), $a
+// v_cmp_eq_u32_e64 $a, (1 << $b), $a 
+
+// Handle the VALU case.
+def : GCNPat <
+  (i1 (DivergentUnaryFrag<trunc> (i32 (srl i32:$a, IMMBitSelRange:$b)))),
+  (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 (IMMBitSelConst $b)), $a),
+    (i32 (IMMBitSelConst $b)))
+>;
+
+// Handle the scalar case.
+def : GCNPat <
+  (i1 (UniformUnaryFrag<trunc> (i32 (srl i32:$a, IMMBitSelRange:$b)))),
+  (S_CMP_EQ_U32 (S_AND_B32 (i32 (IMMBitSelConst $b)), $a),
+    (i32 (IMMBitSelConst $b)))
+>;
+
 def : GCNPat <
   (i1 (DivergentUnaryFrag<trunc> i64:$a)),
   (V_CMP_EQ_U32_e64 (V_AND_B32_e64 (i32 1),


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D118461.404477.patch
Type: text/x-patch
Size: 2878 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220131/555fca11/attachment.bin>


More information about the llvm-commits mailing list