[PATCH] D52060: AMDGPU: Add a fast path for icmp.i1(src, false, NE)
Marek Olšák via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 13 14:30:57 PDT 2018
mareko created this revision.
mareko added reviewers: arsenm, nhaehnle.
Herald added subscribers: t-tye, tpr, dstuttard, yaxunl, wdng, jvesely, kzhuravl.
This allows moving the condition from the intrinsic to the standard ICmp
opcode, so that LLVM can do simplifications on it. The icmp.i1 intrinsic
is an identity for retrieving the SGPR mask.
And we can also get the mask from and i1, or i1, xor i1.
Repository:
rL LLVM
https://reviews.llvm.org/D52060
Files:
lib/Target/AMDGPU/SIISelLowering.cpp
lib/Target/AMDGPU/SIInstructions.td
test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
Index: test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
===================================================================
--- test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
+++ test/CodeGen/AMDGPU/llvm.amdgcn.icmp.ll
@@ -4,6 +4,7 @@
declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) #0
declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) #0
declare i64 @llvm.amdgcn.icmp.i16(i16, i16, i32) #0
+declare i64 @llvm.amdgcn.icmp.i1(i1, i1, i32) #0
; No crash on invalid input
; GCN-LABEL: {{^}}v_icmp_i32_dynamic_cc:
@@ -314,4 +315,21 @@
ret void
}
+; GCN-LABEL: {{^}}v_icmp_i1_ne0:
+; GCN: v_cmp_gt_u32_e64 s[[C0:\[[0-9]+:[0-9]+\]]],
+; GCN: v_cmp_gt_u32_e64 s[[C1:\[[0-9]+:[0-9]+\]]],
+; GCN: s_and_b64 s[[SRC:\[[0-9]+:[0-9]+\]]], s[[C0]], s[[C1]]
+; SI-NEXT: s_mov_b32 s{{[0-9]+}}, -1
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: v_mov_b32_e32
+; GCN-NEXT: {{global|flat|buffer}}_store_dwordx2
+define amdgpu_kernel void @v_icmp_i1_ne0(i64 addrspace(1)* %out, i32 %a, i32 %b) {
+ %c0 = icmp ugt i32 %a, 1
+ %c1 = icmp ugt i32 %b, 2
+ %src = and i1 %c0, %c1
+ %result = call i64 @llvm.amdgcn.icmp.i1(i1 %src, i1 false, i32 33)
+ store i64 %result, i64 addrspace(1)* %out
+ ret void
+}
+
attributes #0 = { nounwind readnone convergent }
Index: lib/Target/AMDGPU/SIInstructions.td
===================================================================
--- lib/Target/AMDGPU/SIInstructions.td
+++ lib/Target/AMDGPU/SIInstructions.td
@@ -597,6 +597,11 @@
>;
// TODO: we could add more variants for other types of conditionals
+def : Pat <
+ (int_amdgcn_icmp i1:$src, (i1 0), (i32 33)),
+ (COPY $src) // Return the SGPRs representing i1 src
+>;
+
//===----------------------------------------------------------------------===//
// VOP1 Patterns
//===----------------------------------------------------------------------===//
Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5061,6 +5061,10 @@
Denominator, Numerator);
}
case Intrinsic::amdgcn_icmp: {
+ if (Op.getOperand(1).getValueType() == MVT::i1 &&
+ Op.getConstantOperandVal(2) == 0 &&
+ Op.getConstantOperandVal(3) == ICmpInst::Predicate::ICMP_NE)
+ return Op;
return lowerICMPIntrinsic(*this, Op.getNode(), DAG);
}
case Intrinsic::amdgcn_fcmp: {
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D52060.165383.patch
Type: text/x-patch
Size: 2411 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180913/3a0b7d18/attachment.bin>
More information about the llvm-commits
mailing list