[PATCH] D121180: [AArch64] Perform last active true vector combine

Mon Mar 14 10:28:39 PDT 2022

This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Allen marked an inline comment as done.
Closed by commit rG3568333815b3: [AArch64] Perform last active true vector combine (authored by Allen).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D121180/new/

https://reviews.llvm.org/D121180

Files:
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/test/CodeGen/AArch64/sve-cmp-folds.ll


Index: llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
===================================================================

--- llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
+++ llvm/test/CodeGen/AArch64/sve-cmp-folds.ll
@@ -53,8 +53,8 @@
   ret <vscale x 4 x i1> %not
 }
 
-define i1 @foo(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
-; CHECK-LABEL: foo:
+define i1 @foo_first(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: foo_first:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
@@ -66,3 +66,21 @@
   ret i1 %bit
 }
 
+define i1 @foo_last(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: foo_last:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fcmeq p1.s, p0/z, z0.s, z1.s
+; CHECK-NEXT:    ptest p0, p1.b
+; CHECK-NEXT:    cset w0, lo
+; CHECK-NEXT:    ret
+  %vcond = fcmp oeq <vscale x 4 x float> %a, %b
+  %vscale = call i64 @llvm.vscale.i64()
+  %shl2 = shl nuw nsw i64 %vscale, 2
+  %idx = add nuw nsw i64 %shl2, -1
+  %bit = extractelement <vscale x 4 x i1> %vcond, i64 %idx
+  ret i1 %bit
+}
+
+
+declare i64 @llvm.vscale.i64()
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -14410,12 +14410,57 @@
   return getPTest(DAG, N->getValueType(0), Pg, SetCC, AArch64CC::FIRST_ACTIVE);
 }
 
+// Materialize : Idx = (add (mul vscale, NumEls), -1)
+//               i1 = extract_vector_elt t37, Constant:i64<Idx>
+//     ... into: "ptrue p, all" + PTEST
+static SDValue
+performLastTrueTestVectorCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 const AArch64Subtarget *Subtarget) {
+  assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
+  // Make sure PTEST is legal types.
+  if (!Subtarget->hasSVE() || DCI.isBeforeLegalize())
+    return SDValue();
+
+  SDValue SetCC = N->getOperand(0);
+  EVT OpVT = SetCC.getValueType();
+
+  if (!OpVT.isScalableVector() || OpVT.getVectorElementType() != MVT::i1)
+    return SDValue();
+
+  // Idx == (add (mul vscale, NumEls), -1)
+  SDValue Idx = N->getOperand(1);
+  if (Idx.getOpcode() != ISD::ADD)
+    return SDValue();
+
+  SDValue VS = Idx.getOperand(0);
+  if (VS.getOpcode() != ISD::VSCALE)
+    return SDValue();
+
+  unsigned NumEls = OpVT.getVectorElementCount().getKnownMinValue();
+  if (VS.getConstantOperandVal(0) != NumEls)
+    return SDValue();
+
+  // Restricted the DAG combine to only cases where we're extracting from a
+  // flag-setting operation
+  auto *CI = dyn_cast<ConstantSDNode>(Idx.getOperand(1));
+  if (!CI || !CI->isAllOnes() || SetCC.getOpcode() != ISD::SETCC)
+    return SDValue();
+
+  // Extracts of lane EC-1 for SVE can be expressed as PTEST(Op, LAST) ? 1 : 0
+  SelectionDAG &DAG = DCI.DAG;
+  SDValue Pg = getPTrue(DAG, SDLoc(N), OpVT, AArch64SVEPredPattern::all);
+  return getPTest(DAG, N->getValueType(0), Pg, SetCC, AArch64CC::LAST_ACTIVE);
+}
+
 static SDValue
 performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                                const AArch64Subtarget *Subtarget) {
   assert(N->getOpcode() == ISD::EXTRACT_VECTOR_ELT);
   if (SDValue Res = performFirstTrueTestVectorCombine(N, DCI, Subtarget))
     return Res;
+  if (SDValue Res = performLastTrueTestVectorCombine(N, DCI, Subtarget))
+    return Res;
 
   SelectionDAG &DAG = DCI.DAG;
   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D121180.415151.patch
Type: text/x-patch
Size: 3613 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220314/e5e7221d/attachment.bin>