[llvm] c22c8b1 - [AArch64] Perform first active true vector combine
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 7 09:13:14 PST 2022
Author: zhongyunde
Date: 2022-03-08T01:10:21+08:00
New Revision: c22c8b151b97a70d9e2fa0a401d07cc49d70b999
URL: https://github.com/llvm/llvm-project/commit/c22c8b151b97a70d9e2fa0a401d07cc49d70b999
DIFF: https://github.com/llvm/llvm-project/commit/c22c8b151b97a70d9e2fa0a401d07cc49d70b999.diff
LOG: [AArch64] Perform first active true vector combine
Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
... into: "ptrue p, all" + PTEST
Test bit of lane 0 can use P register directly, and the instruction “pture all”
is loop invariant, which will beneficial to SVE after hoisting out the loop.
Reviewed By: david-arm, paulwalker-arm
Differential Revision: https://reviews.llvm.org/D120891
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index dd421970e99f4..fcc217f54c37b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14364,7 +14364,46 @@ static bool hasPairwiseAdd(unsigned Opcode, EVT VT, bool FullFP16) {
}
}
-static SDValue performExtractVectorEltCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue getPTest(SelectionDAG &DAG, EVT VT, SDValue Pg, SDValue Op,
+ AArch64CC::CondCode Cond);
+
+// Materialize : i1 = extract_vector_elt t37, Constant:i64<0>
+// ... into: "ptrue p, all" + PTEST
+static SDValue
+performFirstTrueTestVectorCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ // Make sure PTEST can be legalised with illegal types.
+ if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
+ return SDValue();
+
+ SDValue SetCC = N->getOperand(0);
+ EVT VT = SetCC.getValueType();
+
+ if (!VT.isScalableVector() || VT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ // Restricted the DAG combine to only cases where we're extracting from a
+ // flag-setting operation
+ auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ if (!Idx || !Idx->isZero() || SetCC.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Extracts of lane 0 for SVE can be expressed as PTEST(Op, FIRST) ? 1 : 0
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Pg = getPTrue(DAG, SDLoc(N), VT, AArch64SVEPredPattern::all);
+ return getPTest(DAG, N->getValueType(0), Pg, SetCC, AArch64CC::FIRST_ACTIVE);
+}
+
+static SDValue
+performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget))
+ return Res;
+
+ SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
ConstantSDNode *ConstantN1 = dyn_cast<ConstantSDNode>(N1);
@@ -18356,7 +18395,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::INSERT_VECTOR_ELT:
return performInsertVectorEltCombine(N, DCI);
case ISD::EXTRACT_VECTOR_ELT:
- return performExtractVectorEltCombine(N, DAG);
+ return performExtractVectorEltCombine(N, DCI, Subtarget);
case ISD::VECREDUCE_ADD:
return performVecReduceAddCombine(N, DCI.DAG, Subtarget);
case AArch64ISD::UADDV:
diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
index c758889f77edb..a4951c41a3256 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
@@ -52,3 +52,17 @@ define <vscale x 4 x i1> @not_fcmp_uge_nxv4f32(<vscale x 4 x float> %a, <vscale
%not = xor <vscale x 4 x i1> %icmp, %ones
ret <vscale x 4 x i1> %not
}
+
+define i1 @foo(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: foo:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cset w0, mi
+; CHECK-NEXT: ret
+ %vcond = fcmp oeq <vscale x 4 x float> %a, %b
+ %bit = extractelement <vscale x 4 x i1> %vcond, i64 0
+ ret i1 %bit
+}
+
More information about the llvm-commits
mailing list