[llvm] 139f678 - [AArch64] Remove dead tryMLAV64LaneV128 and tryMULLV64LaneV128 code.

Wed Apr 19 06:26:32 PDT 2023

Author: David Green
Date: 2023-04-19T14:26:27+01:00
New Revision: 139f678c7849d90eff715da99d80bbb8b75a79a5

URL: https://github.com/llvm/llvm-project/commit/139f678c7849d90eff715da99d80bbb8b75a79a5
DIFF: https://github.com/llvm/llvm-project/commit/139f678c7849d90eff715da99d80bbb8b75a79a5.diff

LOG: [AArch64] Remove dead tryMLAV64LaneV128 and tryMULLV64LaneV128 code.

As far as I can tell this code is never used, as the pattern recognised by
checkHighLaneIndex (an duplane with insert_subvec and extract_subvec) will not
be generated any more. There are no tests that change from removing it
(including the clang neon tests), and it didn't appear to come up in any
benchmarks I ran. There are already existing tablegen patterns for MLA with
index and s/umull with index.

Removing it also prevents it from causing problems for SVE, as in #62151.

Differential Revision: https://reviews.llvm.org/D148646

Added: 
    llvm/test/CodeGen/AArch64/sve-pr62151.ll

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index fff47258e131..b9714e52f20b 100644

--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -68,8 +68,6 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   template <signed Low, signed High, signed Scale>
   bool SelectRDVLImm(SDValue N, SDValue &Imm);
 
-  bool tryMLAV64LaneV128(SDNode *N);
-  bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
   bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);
   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
@@ -835,135 +833,6 @@ getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
   return AArch64_AM::InvalidShiftExtend;
 }
 
-// Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
-static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
-  if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
-      DL->getOpcode() != AArch64ISD::DUPLANE32)
-    return false;
-
-  SDValue SV = DL->getOperand(0);
-  if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
-    return false;
-
-  SDValue EV = SV.getOperand(1);
-  if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
-    return false;
-
-  ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
-  ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
-  LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
-  LaneOp = EV.getOperand(0);
-
-  return true;
-}
-
-// Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
-// high lane extract.
-static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
-                             SDValue &LaneOp, int &LaneIdx) {
-
-  if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
-    std::swap(Op0, Op1);
-    if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
-      return false;
-  }
-  StdOp = Op1;
-  return true;
-}
-
-/// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
-/// is a lane in the upper half of a 128-bit vector.  Recognize and select this
-/// so that we don't emit unnecessary lane extracts.
-bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
-  SDLoc dl(N);
-  SDValue Op0 = N->getOperand(0);
-  SDValue Op1 = N->getOperand(1);
-  SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
-  SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
-  int LaneIdx = -1; // Will hold the lane index.
-
-  if (Op1.getOpcode() != ISD::MUL ||
-      !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
-                        LaneIdx)) {
-    std::swap(Op0, Op1);
-    if (Op1.getOpcode() != ISD::MUL ||
-        !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
-                          LaneIdx))
-      return false;
-  }
-
-  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
-
-  SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
-
-  unsigned MLAOpc = ~0U;
-
-  switch (N->getSimpleValueType(0).SimpleTy) {
-  default:
-    llvm_unreachable("Unrecognized MLA.");
-  case MVT::v4i16:
-    MLAOpc = AArch64::MLAv4i16_indexed;
-    break;
-  case MVT::v8i16:
-    MLAOpc = AArch64::MLAv8i16_indexed;
-    break;
-  case MVT::v2i32:
-    MLAOpc = AArch64::MLAv2i32_indexed;
-    break;
-  case MVT::v4i32:
-    MLAOpc = AArch64::MLAv4i32_indexed;
-    break;
-  }
-
-  ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
-  return true;
-}
-
-bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
-  SDLoc dl(N);
-  SDValue SMULLOp0;
-  SDValue SMULLOp1;
-  int LaneIdx;
-
-  if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
-                        LaneIdx))
-    return false;
-
-  SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
-
-  SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
-
-  unsigned SMULLOpc = ~0U;
-
-  if (IntNo == Intrinsic::aarch64_neon_smull) {
-    switch (N->getSimpleValueType(0).SimpleTy) {
-    default:
-      llvm_unreachable("Unrecognized SMULL.");
-    case MVT::v4i32:
-      SMULLOpc = AArch64::SMULLv4i16_indexed;
-      break;
-    case MVT::v2i64:
-      SMULLOpc = AArch64::SMULLv2i32_indexed;
-      break;
-    }
-  } else if (IntNo == Intrinsic::aarch64_neon_umull) {
-    switch (N->getSimpleValueType(0).SimpleTy) {
-    default:
-      llvm_unreachable("Unrecognized SMULL.");
-    case MVT::v4i32:
-      SMULLOpc = AArch64::UMULLv4i16_indexed;
-      break;
-    case MVT::v2i64:
-      SMULLOpc = AArch64::UMULLv2i32_indexed;
-      break;
-    }
-  } else
-    llvm_unreachable("Unrecognized intrinsic.");
-
-  ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
-  return true;
-}
-
 /// Instructions that accept extend modifiers like UXTW expect the register
 /// being extended to be a GPR32, but the incoming DAG might be acting on a
 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
@@ -4307,11 +4176,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
       return;
     break;
 
-  case ISD::ADD:
-    if (tryMLAV64LaneV128(Node))
-      return;
-    break;
-
   case ISD::LOAD: {
     // Try to select as an indexed load. Fall through to normal processing
     // if we can't.
@@ -4955,11 +4819,6 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
                                            : AArch64::TBXv16i8Four,
                   true);
       return;
-    case Intrinsic::aarch64_neon_smull:
-    case Intrinsic::aarch64_neon_umull:
-      if (tryMULLV64LaneV128(IntNo, Node))
-        return;
-      break;
     case Intrinsic::aarch64_sve_srshl_single_x2:
       if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(
               Node->getValueType(0),

diff  --git a/llvm/test/CodeGen/AArch64/sve-pr62151.ll b/llvm/test/CodeGen/AArch64/sve-pr62151.ll
new file mode 100644
index 000000000000..07089dd10211
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-pr62151.ll
@@ -0,0 +1,23 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -mtriple=aarch64-none-eabi -mattr=+sve  < %s | FileCheck %s
+
+
+define i32 @build_interpolation(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
+; CHECK-LABEL: build_interpolation:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    mul v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    ptrue p0.s, vl2
+; CHECK-NEXT:    // kill: def $d2 killed $d2 def $z2
+; CHECK-NEXT:    sdiv z0.s, p0/m, z0.s, z2.s
+; CHECK-NEXT:    mla v0.2s, v1.2s, v0.s[1]
+; CHECK-NEXT:    fmov w0, s0
+; CHECK-NEXT:    ret
+entry:
+  %3 = mul nsw <2 x i32> %1, %0
+  %4 = sdiv <2 x i32> %3, %2
+  %shift = shufflevector <2 x i32> %4, <2 x i32> poison, <2 x i32> <i32 1, i32 1>
+  %5 = mul nsw <2 x i32> %shift, %1
+  %6 = add nsw <2 x i32> %5, %4
+  %add = extractelement <2 x i32> %6, i64 0
+  ret i32 %add
+}