[PATCH] D52060: AMDGPU: Add a fast path for icmp.i1(src, false, NE)

Fri Oct 5 13:31:42 PDT 2018

mareko updated this revision to Diff 168520.
mareko edited the summary of this revision.
mareko added a comment.

AMDGPU: Add a fast path for icmp.i1(src, false, NE)

Summary:
This allows moving the condition from the intrinsic to the standard ICmp
opcode, so that LLVM can do simplifications on it. The icmp.i1 intrinsic
is an identity for retrieving the SGPR mask.

And we can also get the mask from and i1, or i1, xor i1.

Don't fold icmp in InstCombineCalls.

Reviewers: arsenm, nhaehnle

Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits

Differential Revision: https://reviews.llvm.org/D52060


Repository:
  rL LLVM

https://reviews.llvm.org/D52060

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp
  lib/Target/AMDGPU/SIInstructions.td
  lib/Transforms/InstCombine/InstCombineCalls.cpp
  test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll


Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
===================================================================

--- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
@@ -4,6 +4,7 @@
 declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
 declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
 declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
 
 ; No crash on invalid input
 ; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
@@ -314,4 +315,21 @@
   ret void
 }
 
+; GCN-LABEL: {{^}}v_icmp_i1_ne0:
+; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]],
+; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]],
+; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]]
+; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2
+define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+  %c0 = icmp ugt i32 %a, 1
+  %c1 = icmp ugt i32 %b, 2
+  %src = and i1 %c0, %c1
+  %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+  store i64 %result, i64 addrspace(1)* %out
+  ret void
+}
+
 attributes #0 = { nounwind readnone convergent }
Index: lib/Transforms/InstCombine/InstCombineCalls.cpp
===================================================================
--- lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -3652,6 +3652,11 @@
         // Promote to next legal integer type.
         unsigned Width = CmpType->getBitWidth();
         unsigned NewWidth = Width;
+
+        // Don't do anything for i1 comparisons.
+        if (Width == 1)
+          break;
+
         if (Width <= 16)
           NewWidth = 16;
         else if (Width <= 32)
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -597,6 +597,11 @@
 >;
 // TODO: we could add more variants for other types of conditionals
 
+def : Pat <
+  (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+  (COPY $src) // Return the SGPRs representing i1 src
+>;
+
 //===----------------------------------------------------------------------===//
 // VOP1 Patterns
 //===----------------------------------------------------------------------===//
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5065,6 +5065,10 @@
                        Denominator, Numerator);
   }
   case Intrinsic::amdgcn_icmp: {
+    if (Op.getOperand(1).getValueType() == MVT::i1 &&
+        Op.getConstantOperandVal(2) == 0 &&
+        Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
+      return Op;
     return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
   }
   case Intrinsic::amdgcn_fcmp: {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D52060.168520.patch
Type: text/x-patch
Size: 2975 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20181005/bc6a3f7f/attachment.bin>