[llvm] r306439 - [AMDGPU] Combine and x, (sext cc from i1) => select cc, x, 0
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 27 11:25:26 PDT 2017
Author: rampitec
Date: Tue Jun 27 11:25:26 2017
New Revision: 306439
URL: http://llvm.org/viewvc/llvm-project?rev=306439&view=rev
Log:
[AMDGPU] Combine and x, (sext cc from i1) => select cc, x, 0
Also factored out function to check if a boolean is an already
deserialized value which does not require v_cndmask_b32 to be
loaded. Added binary logical operators to its check.
Differential Revision: https://reviews.llvm.org/D34500
Added:
llvm/trunk/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=306439&r1=306438&r2=306439&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Tue Jun 27 11:25:26 2017
@@ -4314,6 +4314,23 @@ SDValue SITargetLowering::splitBinaryBit
return SDValue();
}
+// Returns true if argument is a boolean value which is not serialized into
+// memory or argument and does not require v_cmdmask_b32 to be deserialized.
+static bool isBoolSGPR(SDValue V) {
+ if (V.getValueType() != MVT::i1)
+ return false;
+ switch (V.getOpcode()) {
+ default: break;
+ case ISD::SETCC:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case AMDGPUISD::FP_CLASS:
+ return true;
+ }
+ return false;
+}
+
SDValue SITargetLowering::performAndCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
if (DCI.isBeforeLegalize())
@@ -4402,6 +4419,16 @@ SDValue SITargetLowering::performAndComb
}
}
+ if (VT == MVT::i32 &&
+ (RHS.getOpcode() == ISD::SIGN_EXTEND || LHS.getOpcode() == ISD::SIGN_EXTEND)) {
+ // and x, (sext cc from i1) => select cc, x, 0
+ if (RHS.getOpcode() != ISD::SIGN_EXTEND)
+ std::swap(LHS, RHS);
+ if (isBoolSGPR(RHS.getOperand(0)))
+ return DAG.getSelect(SDLoc(N), MVT::i32, RHS.getOperand(0),
+ LHS, DAG.getConstant(0, SDLoc(N), MVT::i32));
+ }
+
return SDValue();
}
@@ -4941,8 +4968,7 @@ SDValue SITargetLowering::performAddComb
case ISD::SIGN_EXTEND:
case ISD::ANY_EXTEND: {
auto Cond = RHS.getOperand(0);
- if (Cond.getOpcode() != ISD::SETCC &&
- Cond.getOpcode() != AMDGPUISD::FP_CLASS)
+ if (!isBoolSGPR(Cond))
break;
SDVTList VTList = DAG.getVTList(MVT::i32, MVT::i1);
SDValue Args[] = { LHS, DAG.getConstant(0, SL, MVT::i32), Cond };
Added: llvm/trunk/test/CodeGen/AMDGPU/combine-and-sext-bool.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/combine-and-sext-bool.ll?rev=306439&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/combine-and-sext-bool.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/combine-and-sext-bool.ll Tue Jun 27 11:25:26 2017
@@ -0,0 +1,27 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: {{^}}and_i1_sext_bool:
+; GCN: v_cmp_{{gt|le}}_u32_e{{32|64}} [[CC:[^,]+]], v{{[0-9]+}}, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e{{32|64}} [[VAL:v[0-9]+]], 0, v{{[0-9]+}}, [[CC]]
+; GCN: store_dword {{.*}}[[VAL]]
+; GCN-NOT: v_cndmask_b32_e64 v{{[0-9]+}}, {{0|-1}}, {{0|-1}}
+; GCN-NOT: v_and_b32_e32
+
+define amdgpu_kernel void @and_i1_sext_bool(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+ %v = load i32, i32 addrspace(1)* %gep, align 4
+ %cmp = icmp ugt i32 %x, %y
+ %ext = sext i1 %cmp to i32
+ %and = and i32 %v, %ext
+ store i32 %and, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+
+declare i32 @llvm.amdgcn.workitem.id.y() #0
+
+attributes #0 = { nounwind readnone speculatable }
Modified: llvm/trunk/test/CodeGen/AMDGPU/combine-cond-add-sub.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/combine-cond-add-sub.ll?rev=306439&r1=306438&r2=306439&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/combine-cond-add-sub.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/combine-cond-add-sub.ll Tue Jun 27 11:25:26 2017
@@ -150,6 +150,26 @@ bb:
ret void
}
+; GCN-LABEL: {{^}}add_and:
+; GCN: s_and_b64 [[CC:[^,]+]],
+; GCN: v_addc_u32_e{{32|64}} v{{[0-9]+}}, {{[^,]+}}, 0, v{{[0-9]+}}, [[CC]]
+; GCN-NOT: v_cndmask
+
+define amdgpu_kernel void @add_and(i32 addrspace(1)* nocapture %arg) {
+bb:
+ %x = tail call i32 @llvm.amdgcn.workitem.id.x()
+ %y = tail call i32 @llvm.amdgcn.workitem.id.y()
+ %gep = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 %x
+ %v = load i32, i32 addrspace(1)* %gep, align 4
+ %cmp1 = icmp ugt i32 %x, %y
+ %cmp2 = icmp ugt i32 %x, 1
+ %cmp = and i1 %cmp1, %cmp2
+ %ext = zext i1 %cmp to i32
+ %add = add i32 %v, %ext
+ store i32 %add, i32 addrspace(1)* %gep, align 4
+ ret void
+}
+
declare i1 @llvm.amdgcn.class.f32(float, i32) #0
declare i32 @llvm.amdgcn.workitem.id.x() #0
More information about the llvm-commits
mailing list