[PATCH] D52060: AMDGPU: Add a fast path for icmp.i1(src, false, NE)

Thu Sep 13 14:30:57 PDT 2018

mareko created this revision.
mareko added reviewers: arsenm, nhaehnle.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, jvesely, kzhuravl.

This allows moving the condition from the intrinsic to the standard ICmp
opcode, so that LLVM can do simplifications on it. The icmp.i1 intrinsic
is an identity for retrieving the SGPR mask.

And we can also get the mask from and i1, or i1, xor i1.


Repository:
  rL LLVM

https://reviews.llvm.org/D52060

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp
  lib/Target/AMDGPU/SIInstructions.td
  test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll


Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
===================================================================

--- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
@@ -4,6 +4,7 @@
 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
 
 ; No crash on invalid input
 ; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
@@ -314,4 +315,21 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}v_icmp_i1_ne0:
+; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]],
+; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]],
+; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]]
+; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2
+define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+  %c0 = icmp ugt i32 %a, 1
+  %c1 = icmp ugt i32 %b, 2
+  %src = and i1 %c0, %c1
+  %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
 attributes #0 = { nounwind readnone convergent }
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -597,6 +597,11 @@
 >;
 // TODO: we could add more variants for other types of conditionals
 
+def : Pat <
+  (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+  (COPY $src) // Return the SGPRs representing i1 src
+>;
+
 //===----------------------------------------------------------------------===//
 // VOP1 Patterns
 //===----------------------------------------------------------------------===//
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5061,6 +5061,10 @@
                        Denominator, Numerator);
   }
   case Intrinsic::amdgcn_icmp: {
+    if (Op.getOperand(1).getValueType() == MVT::i1 &&
+        Op.getConstantOperandVal(2) == 0 &&
+        Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
+      return Op;
     return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
   }
   case Intrinsic::amdgcn_fcmp: {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D52060.165383.patch
Type: text/x-patch
Size: 2411 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180913/3a0b7d18/attachment.bin>