[llvm] 3568333 - [AArch64] Perform last active true vector combine
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 14 10:28:33 PDT 2022
Author: zhongyunde
Date: 2022-03-15T01:25:03+08:00
New Revision: 3568333815b30dc565ce041c64e871bc1d4e8e21
URL: https://github.com/llvm/llvm-project/commit/3568333815b30dc565ce041c64e871bc1d4e8e21
DIFF: https://github.com/llvm/llvm-project/commit/3568333815b30dc565ce041c64e871bc1d4e8e21.diff
LOG: [AArch64] Perform last active true vector combine
Test bit of lane EC-1 can use P register directly, eg:
Materialize : Idx = (add (mul vscale, NumEls), -1)
i1 = extract_vector_elt t37, Constant:i64<Idx>
... into: "ptrue p, all" + PTEST
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D121180
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0f2d7997c870b..b538e6fff2861 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14410,12 +14410,57 @@ performFirstTrueTestVectorCombine(SDNode *N,
return getPTest(DAG, N->getValueType(0), Pg, SetCC, AArch64CC::FIRST_ACTIVE);
}
+// Materialize : Idx = (add (mul vscale, NumEls), -1)
+// i1 = extract_vector_elt t37, Constant:i64<Idx>
+// ... into: "ptrue p, all" + PTEST
+static SDValue
+performLastTrueTestVectorCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const AArch64Subtarget *Subtarget) {
+ assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+ // Make sure PTEST is legal types.
+ if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
+ return SDValue();
+
+ SDValue SetCC = N->getOperand(0);
+ EVT OpVT = SetCC.getValueType();
+
+ if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
+ return SDValue();
+
+ // Idx == (add (mul vscale, NumEls), -1)
+ SDValue Idx = N->getOperand(1);
+ if (Idx.getOpcode() != ISD::ADD)
+ return SDValue();
+
+ SDValue VS = Idx.getOperand(0);
+ if (VS.getOpcode() != ISD::VSCALE)
+ return SDValue();
+
+ unsigned NumEls = OpVT.getVectorElementCount().getKnownMinValue();
+ if (VS.getConstantOperandVal(0) != NumEls)
+ return SDValue();
+
+ // Restricted the DAG combine to only cases where we're extracting from a
+ // flag-setting operation
+ auto *CI = dyn_cast<ConstantSDNode>(Idx.getOperand(1));
+ if (!CI || !CI->isAllOnes() || SetCC.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Pg = getPTrue(DAG, SDLoc(N), OpVT, AArch64SVEPredPattern::all);
+ return getPTest(DAG, N->getValueType(0), Pg, SetCC, AArch64CC::LAST_ACTIVE);
+}
+
static SDValue
performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
const AArch64Subtarget *Subtarget) {
assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget))
return Res;
+ if (SDValue Res = performLastTrueTestVectorCombine(N, DCI, Subtarget))
+ return Res;
SelectionDAG &DAG = DCI.DAG;
SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
diff --git a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
index a4951c41a3256..90f88b8bbe02b 100644
--- a/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
+++ b/llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
@@ -53,8 +53,8 @@ define <vscale x 4 x i1> @not_fcmp_uge_nxv4f32(<vscale x 4 x float> %a, <vscale
ret <vscale x 4 x i1> %not
}
-define i1 @foo(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
-; CHECK-LABEL: foo:
+define i1 @foo_first(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: foo_first:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p0.s
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
@@ -66,3 +66,21 @@ define i1 @foo(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
ret i1 %bit
}
+define i1 @foo_last(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: foo_last:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT: ptest p0, p1.b
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: ret
+ %vcond = fcmp oeq <vscale x 4 x float> %a, %b
+ %vscale = call i64 @llvm.vscale.i64()
+ %shl2 = shl nuw nsw i64 %vscale, 2
+ %idx = add nuw nsw i64 %shl2, -1
+ %bit = extractelement <vscale x 4 x i1> %vcond, i64 %idx
+ ret i1 %bit
+}
+
+
+declare i64 @llvm.vscale.i64()
More information about the llvm-commits
mailing list