[llvm] [AArch64][Codegen]Transform saturating smull to sqdmulh (PR #143671)

Nashe Mncube via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 2 03:10:33 PDT 2025


https://github.com/nasherm updated https://github.com/llvm/llvm-project/pull/143671

>From db2aaabfced58858c9fa98a4617ebbcdfe8cba40 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Tue, 10 Jun 2025 16:20:42 +0100
Subject: [PATCH 1/7] [AArch64][Codegen]Transform saturating smull to sqdmulh

This patch adds a pattern for recognizing saturating vector
smull. Prior to this patch these were performed using a
combination of smull+smull2+uzp+smin  like the following

```
        smull2  v5.2d, v1.4s, v2.4s
        smull   v1.2d, v1.2s, v2.2s
        uzp2    v1.4s, v1.4s, v5.4s
        smin    v1.4s, v1.4s, v0.4s
        add     v1.4s, v1.4s, v1.4s
```

which now optimizes to

```
    sqdmulh v0.4s, v1.4s, v0.4s
    sshr    v0.4s, v0.4s, #1
    add v0.4s, v0.4s, v0.4s
```

This only operates on vectors containing Q31 data types.

Change-Id: Ib7d4d5284d1bd3fdd0907365f9e2f37f4da14671
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 73 +++++++++++++++++++
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  7 ++
 .../CodeGen/AArch64/saturating-vec-smull.ll   | 25 +++++++
 3 files changed, 105 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/saturating-vec-smull.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9f51caef6d228..4abe7af42aba8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -26356,6 +26356,77 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   return NVCAST;
 }
 
+// A special combine for the vqdmulh family of instructions. This is one of the
+// potential set of patterns that could patch this instruction. The base pattern
+// vshl(smin(uzp(smull, smull2), 1) can be reduced to vshl(vshr(sqdmulh(...),
+// 1), 1) when operating on Q31 data types
+static SDValue performVSHLCombine(SDNode *N,
+                                  TargetLowering::DAGCombinerInfo &DCI,
+                                  SelectionDAG &DAG) {
+
+  SDValue Op0 = N->getOperand(0);
+  ConstantSDNode *Splat = isConstOrConstSplat(N->getOperand(1));
+
+  if (Op0.getOpcode() != ISD::SMIN || !Splat || !Splat->isOne())
+    return SDValue();
+
+  auto trySQDMULHCombine = [](SDNode *N, SelectionDAG &DAG) -> SDValue {
+    EVT VT = N->getValueType(0);
+
+    if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
+      return SDValue();
+
+    ConstantSDNode *Clamp;
+
+    if (N->getOpcode() != ISD::SMIN)
+      return SDValue();
+
+    Clamp = isConstOrConstSplat(N->getOperand(1));
+
+    if (!Clamp) {
+      return SDValue();
+    }
+
+    MVT ScalarType;
+    int ShftAmt = 0;
+    // Here we are considering clamped Arm Q format
+    // data types which uses 2 upper bits, one for the
+    // integer part and one for the sign.
+    switch (Clamp->getSExtValue()) {
+    case (1ULL << 30) - 1:
+      ScalarType = MVT::i32;
+      ShftAmt = 32;
+      break;
+    default:
+      return SDValue();
+    }
+
+    SDValue Mulhs = N->getOperand(0);
+    if (Mulhs.getOpcode() != ISD::MULHS)
+      return SDValue();
+
+    SDValue V0 = Mulhs.getOperand(0);
+    SDValue V1 = Mulhs.getOperand(1);
+
+    SDLoc DL(Mulhs);
+    const unsigned LegalLanes = 128 / ShftAmt;
+    EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
+    return DAG.getNode(AArch64ISD::SQDMULH, DL, LegalVecVT, V0, V1);
+  };
+
+  if (SDValue Val = trySQDMULHCombine(Op0.getNode(), DAG)) {
+    SDLoc DL(N);
+    EVT VecVT = N->getOperand(0).getValueType();
+    // Clear lower bits for correctness
+    SDValue RightShift =
+        DAG.getNode(AArch64ISD::VASHR, DL, VecVT, Val, N->getOperand(1));
+    return DAG.getNode(AArch64ISD::VSHL, DL, VecVT, RightShift,
+                       N->getOperand(1));
+  }
+
+  return SDValue();
+}
+
 /// If the operand is a bitwise AND with a constant RHS, and the shift has a
 /// constant RHS and is the only use, we can pull it out of the shift, i.e.
 ///
@@ -26496,6 +26567,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performMaskedGatherScatterCombine(N, DCI, DAG);
   case ISD::FP_EXTEND:
     return performFPExtendCombine(N, DAG, DCI, Subtarget);
+  case AArch64ISD::VSHL:
+    return performVSHLCombine(N, DCI, DAG);
   case AArch64ISD::BRCOND:
     return performBRCONDCombine(N, DCI, DAG);
   case AArch64ISD::TBNZ:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 727831896737d..75910fb47c663 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -992,6 +992,7 @@ def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
                              [SDNPCommutative]>;
 def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
                              [SDNPCommutative]>;
+def AArch64sqdmulh : SDNode<"AArch64ISD::SQDMULH", SDT_AArch64mull>;
 
 // Reciprocal estimates and steps.
 def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
@@ -1194,6 +1195,7 @@ def AArch64gld1q_index_merge_zero
   : SDNode<"AArch64ISD::GLD1Q_INDEX_MERGE_ZERO", SDTypeProfile<1, 4, []>,
            [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 
+
 // Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
 // have no common bits.
 def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
@@ -8262,6 +8264,7 @@ def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
 defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
 
+
 defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
                                      int_aarch64_neon_sqdmulh_laneq>;
 defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
@@ -9365,6 +9368,10 @@ def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
                              (EXTRACT_SUBREG V128:$Rm, dsub)),
            (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
 
+
+def : Pat<(v4i32 (AArch64sqdmulh (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
+          (SQDMULHv4i32 V128:$Rn, V128:$Rm)>;
+
 // Conversions within AdvSIMD types in the same register size are free.
 // But because we need a consistent lane ordering, in big endian many
 // conversions require one or more REV instructions.
diff --git a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
new file mode 100644
index 0000000000000..c1bb370ac3e89
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
@@ -0,0 +1,25 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s
+
+define <4 x i32> @arm_mult_q31(ptr %0, ptr %1){
+; CHECK-LABEL: arm_mult_q31:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ldr q0, [x0]
+; CHECK-NEXT:    ldr q1, [x1]
+; CHECK-NEXT:    sqdmulh v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #1
+; CHECK-NEXT:    add v0.4s, v0.4s, v0.4s
+; CHECK-NEXT:    ret
+  %7 = getelementptr i8, ptr %0, i64 0
+  %9 = getelementptr i8, ptr %1, i64 0
+  %12 = load <4 x i32>, ptr %7, align 4
+  %13 = sext <4 x i32> %12 to <4 x i64>
+  %14 = load <4 x i32>, ptr %9, align 4
+  %15 = sext <4 x i32> %14 to <4 x i64>
+  %16 = mul nsw <4 x i64> %15, %13
+  %17 = lshr <4 x i64> %16, splat (i64 32)
+  %18 = trunc nuw <4 x i64> %17 to <4 x i32>
+  %19 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %18, <4 x i32> splat (i32 1073741823))
+  %20 = shl <4 x i32> %19, splat (i32 1)
+  ret <4 x i32> %20
+}

>From 30ea16bd37c31616fc6b44bfae87b80c6ad49beb Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Thu, 26 Jun 2025 11:27:14 +0100
Subject: [PATCH 2/7] Responding to review comments

Based on the most recent PR comments I've
- refactored the change to work on a reduced pattern
  which is truer to the actual SQDMULH instruction
- written pattern matches for q31, q15 and int32, int16
  data types
- rewritten and extended the tests

Change-Id: I18c05e56b3979b8dd757d533e44a65496434937b
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 153 +++++++++---------
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |   3 +
 .../CodeGen/AArch64/saturating-vec-smull.ll   |  69 +++++---
 3 files changed, 134 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 4abe7af42aba8..6bf65700c9c38 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20717,6 +20717,83 @@ static SDValue performBuildVectorCombine(SDNode *N,
   return SDValue();
 }
 
+// A special combine for the vqdmulh family of instructions.
+// truncate( smin( sra ( mul( sext v0, sext v1 ) ), SHIFT_AMOUNT ),
+// SATURATING_VAL ) can be reduced to sqdmulh(...)
+static SDValue trySQDMULHCombine(SDNode *N,
+                                 TargetLowering::DAGCombinerInfo &DCI,
+                                 SelectionDAG &DAG) {
+
+  if (N->getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  EVT VT = N->getValueType(0);
+
+  if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
+    return SDValue();
+
+  SDValue SMin = N->getOperand(0);
+
+  if (SMin.getOpcode() != ISD::SMIN)
+    return SDValue();
+
+  ConstantSDNode *Clamp = isConstOrConstSplat(SMin.getOperand(1));
+
+  if (!Clamp)
+    return SDValue();
+
+  MVT ScalarType;
+  unsigned ShiftAmt = 0;
+  // Here we are considering clamped Arm Q format
+  // data types which use 2 upper bits, one for the
+  // integer part and one for the sign. We also consider
+  // standard signed integer types
+  switch (Clamp->getSExtValue()) {
+  case (1ULL << 14) - 1: // Q15 saturation
+  case (1ULL << 15) - 1:
+    ScalarType = MVT::i16;
+    ShiftAmt = 16;
+    break;
+  case (1ULL << 30) - 1: // Q31 saturation
+  case (1ULL << 31) - 1:
+    ScalarType = MVT::i32;
+    ShiftAmt = 32;
+    break;
+  default:
+    return SDValue();
+  }
+
+  SDValue Sra = SMin.getOperand(0);
+  if (Sra.getOpcode() != ISD::SRA)
+    return SDValue();
+
+  ConstantSDNode *RightShiftVec = isConstOrConstSplat(Sra.getOperand(1));
+  if (!RightShiftVec)
+    return SDValue();
+  unsigned SExtValue = RightShiftVec->getSExtValue();
+
+  if (SExtValue != ShiftAmt && SExtValue != (ShiftAmt - 1))
+    return SDValue();
+
+  SDValue Mul = Sra.getOperand(0);
+  if (Mul.getOpcode() != ISD::MUL)
+    return SDValue();
+
+  SDValue SExt0 = Mul.getOperand(0);
+  SDValue SExt1 = Mul.getOperand(1);
+
+  if (SExt0.getOpcode() != ISD::SIGN_EXTEND ||
+      SExt1.getOpcode() != ISD::SIGN_EXTEND)
+    return SDValue();
+
+  SDValue V0 = SExt0.getOperand(0);
+  SDValue V1 = SExt1.getOperand(0);
+
+  SDLoc DL(N);
+  EVT VecVT = N->getValueType(0);
+  return DAG.getNode(AArch64ISD::SQDMULH, DL, VecVT, V0, V1);
+}
+
 static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI) {
   SDLoc DL(N);
@@ -20731,6 +20808,9 @@ static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(N0.getOpcode(), DL, VT, Op);
   }
 
+  if (SDValue V = trySQDMULHCombine(N, DCI, DAG))
+    return V;
+
   // Performing the following combine produces a preferable form for ISEL.
   // i32 (trunc (extract Vi64, idx)) -> i32 (extract (nvcast Vi32), idx*2))
   if (DCI.isAfterLegalizeDAG() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -26356,77 +26436,6 @@ performScalarToVectorCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   return NVCAST;
 }
 
-// A special combine for the vqdmulh family of instructions. This is one of the
-// potential set of patterns that could patch this instruction. The base pattern
-// vshl(smin(uzp(smull, smull2), 1) can be reduced to vshl(vshr(sqdmulh(...),
-// 1), 1) when operating on Q31 data types
-static SDValue performVSHLCombine(SDNode *N,
-                                  TargetLowering::DAGCombinerInfo &DCI,
-                                  SelectionDAG &DAG) {
-
-  SDValue Op0 = N->getOperand(0);
-  ConstantSDNode *Splat = isConstOrConstSplat(N->getOperand(1));
-
-  if (Op0.getOpcode() != ISD::SMIN || !Splat || !Splat->isOne())
-    return SDValue();
-
-  auto trySQDMULHCombine = [](SDNode *N, SelectionDAG &DAG) -> SDValue {
-    EVT VT = N->getValueType(0);
-
-    if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
-      return SDValue();
-
-    ConstantSDNode *Clamp;
-
-    if (N->getOpcode() != ISD::SMIN)
-      return SDValue();
-
-    Clamp = isConstOrConstSplat(N->getOperand(1));
-
-    if (!Clamp) {
-      return SDValue();
-    }
-
-    MVT ScalarType;
-    int ShftAmt = 0;
-    // Here we are considering clamped Arm Q format
-    // data types which uses 2 upper bits, one for the
-    // integer part and one for the sign.
-    switch (Clamp->getSExtValue()) {
-    case (1ULL << 30) - 1:
-      ScalarType = MVT::i32;
-      ShftAmt = 32;
-      break;
-    default:
-      return SDValue();
-    }
-
-    SDValue Mulhs = N->getOperand(0);
-    if (Mulhs.getOpcode() != ISD::MULHS)
-      return SDValue();
-
-    SDValue V0 = Mulhs.getOperand(0);
-    SDValue V1 = Mulhs.getOperand(1);
-
-    SDLoc DL(Mulhs);
-    const unsigned LegalLanes = 128 / ShftAmt;
-    EVT LegalVecVT = MVT::getVectorVT(ScalarType, LegalLanes);
-    return DAG.getNode(AArch64ISD::SQDMULH, DL, LegalVecVT, V0, V1);
-  };
-
-  if (SDValue Val = trySQDMULHCombine(Op0.getNode(), DAG)) {
-    SDLoc DL(N);
-    EVT VecVT = N->getOperand(0).getValueType();
-    // Clear lower bits for correctness
-    SDValue RightShift =
-        DAG.getNode(AArch64ISD::VASHR, DL, VecVT, Val, N->getOperand(1));
-    return DAG.getNode(AArch64ISD::VSHL, DL, VecVT, RightShift,
-                       N->getOperand(1));
-  }
-
-  return SDValue();
-}
-
 /// If the operand is a bitwise AND with a constant RHS, and the shift has a
 /// constant RHS and is the only use, we can pull it out of the shift, i.e.
 ///
@@ -26567,8 +26576,6 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performMaskedGatherScatterCombine(N, DCI, DAG);
   case ISD::FP_EXTEND:
     return performFPExtendCombine(N, DAG, DCI, Subtarget);
-  case AArch64ISD::VSHL:
-    return performVSHLCombine(N, DCI, DAG);
   case AArch64ISD::BRCOND:
     return performBRCONDCombine(N, DCI, DAG);
   case AArch64ISD::TBNZ:
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 75910fb47c663..755c3a22ffa89 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9369,6 +9369,9 @@ def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
            (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
 
 
+def : Pat<(v8i16 (AArch64sqdmulh (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
+          (SQDMULHv8i16 V128:$Rn, V128:$Rm)>;
+
 def : Pat<(v4i32 (AArch64sqdmulh (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
           (SQDMULHv4i32 V128:$Rn, V128:$Rm)>;
 
diff --git a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
index c1bb370ac3e89..2bc1a427a6b99 100644
--- a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
+++ b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
@@ -1,25 +1,58 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s
 
-define <4 x i32> @arm_mult_q31(ptr %0, ptr %1){
-; CHECK-LABEL: arm_mult_q31:
+define <8 x i16> @saturating_int16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: saturating_int16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %as = sext <8 x i16> %a to <8 x i32>
+  %bs = sext <8 x i16> %b to <8 x i32>
+  %m = mul <8 x i32> %bs, %as
+  %sh = ashr <8 x i32> %m, splat (i32 15)
+  %ma = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %sh, <8 x i32> splat (i32 32767))
+  %t = trunc <8 x i32> %ma to <8 x i16>
+  ret <8 x i16> %t
+}
+
+define <4 x i32> @saturating_int32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: saturating_int32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh v0.4s, v1.4s, v0.4s
+; CHECK-NEXT:    ret
+  %as = sext <4 x i32> %a to <4 x i64>
+  %bs = sext <4 x i32> %b to <4 x i64>
+  %m = mul <4 x i64> %bs, %as
+  %sh = ashr <4 x i64> %m, splat (i64 31)
+  %ma = tail call <4 x i64> @llvm.smin.v8i32(<4 x i64> %sh, <4 x i64> splat (i64 2147483647))
+  %t = trunc <4 x i64> %ma to <4 x i32>
+  ret <4 x i32> %t
+}
+
+define <8 x i16> @saturating_q15(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: saturating_q15:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    ret
+  %as = sext <8 x i16> %a to <8 x i32>
+  %bs = sext <8 x i16> %b to <8 x i32>
+  %m = mul <8 x i32> %bs, %as
+  %sh = ashr <8 x i32> %m, splat (i32 16)
+  %ma = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %sh, <8 x i32> splat (i32 16383))
+  %t = trunc <8 x i32> %ma to <8 x i16>
+  ret <8 x i16> %t
+}
+
+define <4 x i32> @saturating_q31(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: saturating_q31:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ldr q0, [x0]
-; CHECK-NEXT:    ldr q1, [x1]
 ; CHECK-NEXT:    sqdmulh v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    sshr v0.4s, v0.4s, #1
-; CHECK-NEXT:    add v0.4s, v0.4s, v0.4s
 ; CHECK-NEXT:    ret
-  %7 = getelementptr i8, ptr %0, i64 0
-  %9 = getelementptr i8, ptr %1, i64 0
-  %12 = load <4 x i32>, ptr %7, align 4
-  %13 = sext <4 x i32> %12 to <4 x i64>
-  %14 = load <4 x i32>, ptr %9, align 4
-  %15 = sext <4 x i32> %14 to <4 x i64>
-  %16 = mul nsw <4 x i64> %15, %13
-  %17 = lshr <4 x i64> %16, splat (i64 32)
-  %18 = trunc nuw <4 x i64> %17 to <4 x i32>
-  %19 = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %18, <4 x i32> splat (i32 1073741823))
-  %20 = shl <4 x i32> %19, splat (i32 1)
-  ret <4 x i32> %20
+  %as = sext <4 x i32> %a to <4 x i64>
+  %bs = sext <4 x i32> %b to <4 x i64>
+  %m = mul <4 x i64> %bs, %as
+  %sh = ashr <4 x i64> %m, splat (i64 32)
+  %ma = tail call <4 x i64> @llvm.smin.v8i32(<4 x i64> %sh, <4 x i64> splat (i64 1073741823))
+  %t = trunc <4 x i64> %ma to <4 x i32>
+  ret <4 x i32> %t
 }

>From b37b115118a949528321591fbc3b03e1ff9dcf1b Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Thu, 26 Jun 2025 13:16:10 +0100
Subject: [PATCH 3/7] Arithmetic error for Q types

Spotted and fixed an artihmetic error when working
with Q types

Change-Id: I80f8e04bca08d3e6bc2740201bdd4978446a397f
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp   | 2 +-
 llvm/test/CodeGen/AArch64/saturating-vec-smull.ll | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6bf65700c9c38..3811d51b0c00d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20772,7 +20772,7 @@ static SDValue trySQDMULHCombine(SDNode *N,
     return SDValue();
   unsigned SExtValue = RightShiftVec->getSExtValue();
 
-  if (SExtValue != ShiftAmt && SExtValue != (ShiftAmt - 1))
+  if (SExtValue != (ShiftAmt - 1))
     return SDValue();
 
   SDValue Mul = Sra.getOperand(0);
diff --git a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
index 2bc1a427a6b99..9d478462feae0 100644
--- a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
+++ b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
@@ -37,7 +37,7 @@ define <8 x i16> @saturating_q15(<8 x i16> %a, <8 x i16> %b) {
   %as = sext <8 x i16> %a to <8 x i32>
   %bs = sext <8 x i16> %b to <8 x i32>
   %m = mul <8 x i32> %bs, %as
-  %sh = ashr <8 x i32> %m, splat (i32 16)
+  %sh = ashr <8 x i32> %m, splat (i32 15)
   %ma = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %sh, <8 x i32> splat (i32 16383))
   %t = trunc <8 x i32> %ma to <8 x i16>
   ret <8 x i16> %t
@@ -51,7 +51,7 @@ define <4 x i32> @saturating_q31(<4 x i32> %a, <4 x i32> %b) {
   %as = sext <4 x i32> %a to <4 x i64>
   %bs = sext <4 x i32> %b to <4 x i64>
   %m = mul <4 x i64> %bs, %as
-  %sh = ashr <4 x i64> %m, splat (i64 32)
+  %sh = ashr <4 x i64> %m, splat (i64 31)
   %ma = tail call <4 x i64> @llvm.smin.v8i32(<4 x i64> %sh, <4 x i64> splat (i64 1073741823))
   %t = trunc <4 x i64> %ma to <4 x i32>
   ret <4 x i32> %t

>From ed91299e99bd2c512d2be57b1758d98e235eb3ac Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Fri, 27 Jun 2025 15:29:50 +0100
Subject: [PATCH 4/7] Responding to review comments

- support for v2i32 and v4i16 patterns
- extra type checking on sext
- matching on smin over sext
- cleaning trailing lines

Change-Id: I9f61b8d77a61f3d44ad5073b41555c9ad5653e1a
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 33 ++++----
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  7 +-
 .../CodeGen/AArch64/saturating-vec-smull.ll   | 81 ++++++++++++-------
 3 files changed, 74 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 3811d51b0c00d..1c49b8897185c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -20718,11 +20718,9 @@ static SDValue performBuildVectorCombine(SDNode *N,
 }
 
 // A special combine for the vqdmulh family of instructions.
-// truncate( smin( sra ( mul( sext v0, sext v1 ) ), SHIFT_AMOUNT ),
-// SATURATING_VAL ) can be reduced to sqdmulh(...)
-static SDValue trySQDMULHCombine(SDNode *N,
-                                 TargetLowering::DAGCombinerInfo &DCI,
-                                 SelectionDAG &DAG) {
+// smin( sra ( mul( sext v0, sext v1 ) ), SHIFT_AMOUNT ),
+// SATURATING_VAL ) can be reduced to sext(sqdmulh(...))
+static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
 
   if (N->getOpcode() != ISD::TRUNCATE)
     return SDValue();
@@ -20744,17 +20742,11 @@ static SDValue trySQDMULHCombine(SDNode *N,
 
   MVT ScalarType;
   unsigned ShiftAmt = 0;
-  // Here we are considering clamped Arm Q format
-  // data types which use 2 upper bits, one for the
-  // integer part and one for the sign. We also consider
-  // standard signed integer types
   switch (Clamp->getSExtValue()) {
-  case (1ULL << 14) - 1: // Q15 saturation
   case (1ULL << 15) - 1:
     ScalarType = MVT::i16;
     ShiftAmt = 16;
     break;
-  case (1ULL << 30) - 1: // Q31 saturation
   case (1ULL << 31) - 1:
     ScalarType = MVT::i32;
     ShiftAmt = 32;
@@ -20783,15 +20775,23 @@ static SDValue trySQDMULHCombine(SDNode *N,
   SDValue SExt1 = Mul.getOperand(1);
 
   if (SExt0.getOpcode() != ISD::SIGN_EXTEND ||
-      SExt1.getOpcode() != ISD::SIGN_EXTEND)
+      SExt1.getOpcode() != ISD::SIGN_EXTEND ||
+      SExt0.getValueType() != SExt1.getValueType())
+    return SDValue();
+
+  if ((ShiftAmt == 16 && (SExt0.getValueType() != MVT::v8i32 &&
+                          SExt0.getValueType() != MVT::v4i32)) ||
+      (ShiftAmt == 32 && (SExt0.getValueType() != MVT::v4i64 &&
+                          SExt0.getValueType() != MVT::v2i64)))
     return SDValue();
 
   SDValue V0 = SExt0.getOperand(0);
   SDValue V1 = SExt1.getOperand(0);
 
-  SDLoc DL(N);
+  SDLoc DL(SMin);
   EVT VecVT = N->getValueType(0);
-  return DAG.getNode(AArch64ISD::SQDMULH, DL, VecVT, V0, V1);
+  SDValue SQDMULH = DAG.getNode(AArch64ISD::SQDMULH, DL, VecVT, V0, V1);
+  return DAG.getNode(ISD::SIGN_EXTEND, DL, N->getValueType(0), SQDMULH);
 }
 
 static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
@@ -20808,8 +20808,9 @@ static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(N0.getOpcode(), DL, VT, Op);
   }
 
-  if (SDValue V = trySQDMULHCombine(N, DCI, DAG))
-    return V;
+  if (SDValue V = trySQDMULHCombine(N, DAG)) {
+    return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
+  }
 
   // Performing the following combine produces a preferable form for ISEL.
   // i32 (trunc (extract Vi64, idx)) -> i32 (extract (nvcast Vi32), idx*2))
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 755c3a22ffa89..8a960031ab3c5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -1195,7 +1195,6 @@ def AArch64gld1q_index_merge_zero
   : SDNode<"AArch64ISD::GLD1Q_INDEX_MERGE_ZERO", SDTypeProfile<1, 4, []>,
            [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
 
-
 // Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
 // have no common bits.
 def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
@@ -8264,7 +8263,6 @@ def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
 defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
 defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
 
-
 defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
                                      int_aarch64_neon_sqdmulh_laneq>;
 defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
@@ -9368,6 +9366,11 @@ def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
                              (EXTRACT_SUBREG V128:$Rm, dsub)),
            (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
 
+def : Pat<(v4i16 (AArch64sqdmulh (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
+          (SQDMULHv4i16 V64:$Rn, V64:$Rm)>;
+
+def : Pat<(v2i32 (AArch64sqdmulh (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
+          (SQDMULHv2i32 V64:$Rn, V64:$Rm)>;
 
 def : Pat<(v8i16 (AArch64sqdmulh (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
           (SQDMULHv8i16 V128:$Rn, V128:$Rm)>;
diff --git a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
index 9d478462feae0..7094f6c8aafa7 100644
--- a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
+++ b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
@@ -1,8 +1,22 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s
 
-define <8 x i16> @saturating_int16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: saturating_int16:
+define <4 x i16> @saturating_4xi16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: saturating_4xi16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh v0.4h, v1.4h, v0.4h
+; CHECK-NEXT:    ret
+  %as = sext <4 x i16> %a to <4 x i32>
+  %bs = sext <4 x i16> %b to <4 x i32>
+  %m = mul <4 x i32> %bs, %as
+  %sh = ashr <4 x i32> %m, splat (i32 15)
+  %ma = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %sh, <4 x i32> splat (i32 32767))
+  %t = trunc <4 x i32> %ma to <4 x i16>
+  ret <4 x i16> %t
+}
+
+define <8 x i16> @saturating_8xi16(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: saturating_8xi16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sqdmulh v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
@@ -15,36 +29,22 @@ define <8 x i16> @saturating_int16(<8 x i16> %a, <8 x i16> %b) {
   ret <8 x i16> %t
 }
 
-define <4 x i32> @saturating_int32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: saturating_int32:
-; CHECK:       // %bb.0:
-; CHECK-NEXT:    sqdmulh v0.4s, v1.4s, v0.4s
-; CHECK-NEXT:    ret
-  %as = sext <4 x i32> %a to <4 x i64>
-  %bs = sext <4 x i32> %b to <4 x i64>
-  %m = mul <4 x i64> %bs, %as
-  %sh = ashr <4 x i64> %m, splat (i64 31)
-  %ma = tail call <4 x i64> @llvm.smin.v8i32(<4 x i64> %sh, <4 x i64> splat (i64 2147483647))
-  %t = trunc <4 x i64> %ma to <4 x i32>
-  ret <4 x i32> %t
-}
-
-define <8 x i16> @saturating_q15(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: saturating_q15:
+define <2 x i32> @saturating_2xi32(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: saturating_2xi32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sqdmulh v0.8h, v1.8h, v0.8h
+; CHECK-NEXT:    sqdmulh v0.2s, v1.2s, v0.2s
 ; CHECK-NEXT:    ret
-  %as = sext <8 x i16> %a to <8 x i32>
-  %bs = sext <8 x i16> %b to <8 x i32>
-  %m = mul <8 x i32> %bs, %as
-  %sh = ashr <8 x i32> %m, splat (i32 15)
-  %ma = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %sh, <8 x i32> splat (i32 16383))
-  %t = trunc <8 x i32> %ma to <8 x i16>
-  ret <8 x i16> %t
+  %as = sext <2 x i32> %a to <2 x i64>
+  %bs = sext <2 x i32> %b to <2 x i64>
+  %m = mul <2 x i64> %bs, %as
+  %sh = ashr <2 x i64> %m, splat (i64 31)
+  %ma = tail call <2 x i64> @llvm.smin.v8i64(<2 x i64> %sh, <2 x i64> splat (i64 2147483647))
+  %t = trunc <2 x i64> %ma to <2 x i32>
+  ret <2 x i32> %t
 }
 
-define <4 x i32> @saturating_q31(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: saturating_q31:
+define <4 x i32> @saturating_4xi32(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: saturating_4xi32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    sqdmulh v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
@@ -52,7 +52,30 @@ define <4 x i32> @saturating_q31(<4 x i32> %a, <4 x i32> %b) {
   %bs = sext <4 x i32> %b to <4 x i64>
   %m = mul <4 x i64> %bs, %as
   %sh = ashr <4 x i64> %m, splat (i64 31)
-  %ma = tail call <4 x i64> @llvm.smin.v8i32(<4 x i64> %sh, <4 x i64> splat (i64 1073741823))
+  %ma = tail call <4 x i64> @llvm.smin.v4i64(<4 x i64> %sh, <4 x i64> splat (i64 2147483647))
   %t = trunc <4 x i64> %ma to <4 x i32>
   ret <4 x i32> %t
 }
+
+define <8 x i32> @saturating_8xi32(<8 x i32> %a, <8 x i32> %b) {
+; CHECK-LABEL: saturating_8xi32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
+; CHECK-NEXT:    ext v5.16b, v3.16b, v3.16b, #8
+; CHECK-NEXT:    ext v6.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT:    ext v7.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT:    sqdmulh v1.2s, v3.2s, v1.2s
+; CHECK-NEXT:    sqdmulh v0.2s, v2.2s, v0.2s
+; CHECK-NEXT:    sqdmulh v4.2s, v5.2s, v4.2s
+; CHECK-NEXT:    sqdmulh v3.2s, v7.2s, v6.2s
+; CHECK-NEXT:    mov v1.d[1], v4.d[0]
+; CHECK-NEXT:    mov v0.d[1], v3.d[0]
+; CHECK-NEXT:    ret
+  %as = sext <8 x i32> %a to <8 x i64>
+  %bs = sext <8 x i32> %b to <8 x i64>
+  %m = mul <8 x i64> %bs, %as
+  %sh = ashr <8 x i64> %m, splat (i64 31)
+  %ma = tail call <8 x i64> @llvm.smin.v8i64(<8 x i64> %sh, <8 x i64> splat (i64 2147483647))
+  %t = trunc <8 x i64> %ma to <8 x i32>
+  ret <8 x i32> %t
+}

>From b972638ecc0a07ba0a9fc10cffb443428802ee47 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Mon, 30 Jun 2025 16:05:45 +0100
Subject: [PATCH 5/7] Responding to review comments

- minor cleanup
- allow optimizing concat_vectors(sqdmulh,sqdmulh) -> sqdmulh
- testing EVTs better

Change-Id: I0404fb9900896050baac372b7f7ce3a5b03517b9
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 34 +++++++++++--------
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   |  3 --
 .../CodeGen/AArch64/saturating-vec-smull.ll   | 12 ++-----
 3 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1c49b8897185c..97816ee84d5db 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -2390,6 +2390,15 @@ static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
   return false;
 }
 
+bool isVectorizedBinOp(unsigned Opcode) {
+  switch (Opcode) {
+  case AArch64ISD::SQDMULH:
+    return true;
+  default:
+    return false;
+  }
+}
+
 // isOpcWithIntImmediate - This method tests to see if the node is a specific
 // opcode and that it has a immediate integer right operand.
 // If so Imm will receive the value.
@@ -19857,8 +19866,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   // size, combine into an binop of two contacts of the source vectors. eg:
   // concat(uhadd(a,b), uhadd(c, d)) -> uhadd(concat(a, c), concat(b, d))
   if (N->getNumOperands() == 2 && N0Opc == N1Opc && VT.is128BitVector() &&
-      DAG.getTargetLoweringInfo().isBinOp(N0Opc) && N0->hasOneUse() &&
-      N1->hasOneUse()) {
+      (DAG.getTargetLoweringInfo().isBinOp(N0Opc) ||
+       isVectorizedBinOp(N0Opc)) &&
+      N0->hasOneUse() && N1->hasOneUse()) {
     SDValue N00 = N0->getOperand(0);
     SDValue N01 = N0->getOperand(1);
     SDValue N10 = N1->getOperand(0);
@@ -20717,7 +20727,7 @@ static SDValue performBuildVectorCombine(SDNode *N,
   return SDValue();
 }
 
-// A special combine for the vqdmulh family of instructions.
+// A special combine for the sqdmulh family of instructions.
 // smin( sra ( mul( sext v0, sext v1 ) ), SHIFT_AMOUNT ),
 // SATURATING_VAL ) can be reduced to sext(sqdmulh(...))
 static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
@@ -20774,24 +20784,20 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
   SDValue SExt0 = Mul.getOperand(0);
   SDValue SExt1 = Mul.getOperand(1);
 
-  if (SExt0.getOpcode() != ISD::SIGN_EXTEND ||
-      SExt1.getOpcode() != ISD::SIGN_EXTEND ||
-      SExt0.getValueType() != SExt1.getValueType())
-    return SDValue();
+  EVT SExt0Type = SExt0.getOperand(0).getValueType();
+  EVT SExt1Type = SExt1.getOperand(0).getValueType();
 
-  if ((ShiftAmt == 16 && (SExt0.getValueType() != MVT::v8i32 &&
-                          SExt0.getValueType() != MVT::v4i32)) ||
-      (ShiftAmt == 32 && (SExt0.getValueType() != MVT::v4i64 &&
-                          SExt0.getValueType() != MVT::v2i64)))
+  if (SExt0.getOpcode() != ISD::SIGN_EXTEND ||
+      SExt1.getOpcode() != ISD::SIGN_EXTEND || SExt0Type != SExt1Type ||
+      SExt0Type.getScalarType() != ScalarType ||
+      SExt0Type.getFixedSizeInBits() > 128)
     return SDValue();
 
   SDValue V0 = SExt0.getOperand(0);
   SDValue V1 = SExt1.getOperand(0);
 
   SDLoc DL(SMin);
-  EVT VecVT = N->getValueType(0);
-  SDValue SQDMULH = DAG.getNode(AArch64ISD::SQDMULH, DL, VecVT, V0, V1);
-  return DAG.getNode(ISD::SIGN_EXTEND, DL, N->getValueType(0), SQDMULH);
+  return DAG.getNode(AArch64ISD::SQDMULH, DL, SExt0Type, V0, V1);
 }
 
 static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index 8a960031ab3c5..0ab6cf0cfe6c9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -9368,13 +9368,10 @@ def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
 
 def : Pat<(v4i16 (AArch64sqdmulh (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
           (SQDMULHv4i16 V64:$Rn, V64:$Rm)>;
-
 def : Pat<(v2i32 (AArch64sqdmulh (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
           (SQDMULHv2i32 V64:$Rn, V64:$Rm)>;
-
 def : Pat<(v8i16 (AArch64sqdmulh (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
           (SQDMULHv8i16 V128:$Rn, V128:$Rm)>;
-
 def : Pat<(v4i32 (AArch64sqdmulh (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
           (SQDMULHv4i32 V128:$Rn, V128:$Rm)>;
 
diff --git a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
index 7094f6c8aafa7..4bf689f373db3 100644
--- a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
+++ b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
@@ -60,16 +60,8 @@ define <4 x i32> @saturating_4xi32(<4 x i32> %a, <4 x i32> %b) {
 define <8 x i32> @saturating_8xi32(<8 x i32> %a, <8 x i32> %b) {
 ; CHECK-LABEL: saturating_8xi32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    ext v4.16b, v1.16b, v1.16b, #8
-; CHECK-NEXT:    ext v5.16b, v3.16b, v3.16b, #8
-; CHECK-NEXT:    ext v6.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT:    ext v7.16b, v2.16b, v2.16b, #8
-; CHECK-NEXT:    sqdmulh v1.2s, v3.2s, v1.2s
-; CHECK-NEXT:    sqdmulh v0.2s, v2.2s, v0.2s
-; CHECK-NEXT:    sqdmulh v4.2s, v5.2s, v4.2s
-; CHECK-NEXT:    sqdmulh v3.2s, v7.2s, v6.2s
-; CHECK-NEXT:    mov v1.d[1], v4.d[0]
-; CHECK-NEXT:    mov v0.d[1], v3.d[0]
+; CHECK-NEXT:    sqdmulh v1.4s, v3.4s, v1.4s
+; CHECK-NEXT:    sqdmulh v0.4s, v2.4s, v0.4s
 ; CHECK-NEXT:    ret
   %as = sext <8 x i32> %a to <8 x i64>
   %bs = sext <8 x i32> %b to <8 x i64>

>From 9b780701b625f426690e451f18bb1d7fca443b22 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 2 Jul 2025 10:58:24 +0100
Subject: [PATCH 6/7] Responding to review comments

- making sure transform only operates on smin nodes
- adding extra tests dealing with interesting edge cases

Change-Id: Ia1114ec9b93c4de3552b867e0d745beccdae69f1
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 44 +++++++-----
 .../CodeGen/AArch64/saturating-vec-smull.ll   | 69 +++++++++++++++++++
 2 files changed, 96 insertions(+), 17 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 729f336021d81..259ca78c64fba 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1137,6 +1137,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
                        ISD::SIGN_EXTEND_INREG, ISD::CONCAT_VECTORS,
                        ISD::EXTRACT_SUBVECTOR, ISD::INSERT_SUBVECTOR,
                        ISD::STORE, ISD::BUILD_VECTOR});
+  setTargetDAGCombine(ISD::SMIN);
   setTargetDAGCombine(ISD::TRUNCATE);
   setTargetDAGCombine(ISD::LOAD);
 
@@ -20933,7 +20934,7 @@ static SDValue performBuildVectorCombine(SDNode *N,
 // SATURATING_VAL ) can be reduced to sext(sqdmulh(...))
 static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
 
-  if (N->getOpcode() != ISD::TRUNCATE)
+  if (N->getOpcode() != ISD::SMIN)
     return SDValue();
 
   EVT VT = N->getValueType(0);
@@ -20941,12 +20942,7 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
   if (!VT.isVector() || VT.getScalarSizeInBits() > 64)
     return SDValue();
 
-  SDValue SMin = N->getOperand(0);
-
-  if (SMin.getOpcode() != ISD::SMIN)
-    return SDValue();
-
-  ConstantSDNode *Clamp = isConstOrConstSplat(SMin.getOperand(1));
+  ConstantSDNode *Clamp = isConstOrConstSplat(N->getOperand(1));
 
   if (!Clamp)
     return SDValue();
@@ -20966,8 +20962,8 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
     return SDValue();
   }
 
-  SDValue Sra = SMin.getOperand(0);
-  if (Sra.getOpcode() != ISD::SRA)
+  SDValue Sra = N->getOperand(0);
+  if (Sra.getOpcode() != ISD::SRA || !Sra.hasOneUse())
     return SDValue();
 
   ConstantSDNode *RightShiftVec = isConstOrConstSplat(Sra.getOperand(1));
@@ -20994,11 +20990,27 @@ static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
       SExt0Type.getFixedSizeInBits() > 128)
     return SDValue();
 
-  SDValue V0 = SExt0.getOperand(0);
-  SDValue V1 = SExt1.getOperand(0);
+  // Source vectors with width < 64 are illegal and will need to be extended
+  unsigned SourceVectorWidth = SExt0Type.getFixedSizeInBits();
+  SDValue V0 = (SourceVectorWidth < 64) ? SExt0 : SExt0.getOperand(0);
+  SDValue V1 = (SourceVectorWidth < 64) ? SExt1 : SExt1.getOperand(0);
+
+  SDLoc DL(N);
+  SDValue SQDMULH =
+      DAG.getNode(AArch64ISD::SQDMULH, DL, V0.getValueType(), V0, V1);
+  EVT DestVT = N->getValueType(0);
+  if (DestVT.getScalarSizeInBits() > SExt0Type.getScalarSizeInBits())
+    return DAG.getNode(ISD::SIGN_EXTEND, DL, DestVT, SQDMULH);
+
+  return SQDMULH;
+}
+
+static SDValue performSMINCombine(SDNode *N, SelectionDAG &DAG) {
+  if (SDValue V = trySQDMULHCombine(N, DAG)) {
+    return V;
+  }
 
-  SDLoc DL(SMin);
-  return DAG.getNode(AArch64ISD::SQDMULH, DL, SExt0Type, V0, V1);
+  return SDValue();
 }
 
 static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
@@ -21015,10 +21027,6 @@ static SDValue performTruncateCombine(SDNode *N, SelectionDAG &DAG,
     return DAG.getNode(N0.getOpcode(), DL, VT, Op);
   }
 
-  if (SDValue V = trySQDMULHCombine(N, DAG)) {
-    return DAG.getNode(ISD::TRUNCATE, DL, VT, V);
-  }
-
   // Performing the following combine produces a preferable form for ISEL.
   // i32 (trunc (extract Vi64, idx)) -> i32 (extract (nvcast Vi32), idx*2))
   if (DCI.isAfterLegalizeDAG() && N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
@@ -26740,6 +26748,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
     return performAddSubCombine(N, DCI);
   case ISD::BUILD_VECTOR:
     return performBuildVectorCombine(N, DCI, DAG);
+  case ISD::SMIN:
+    return performSMINCombine(N, DAG);
   case ISD::TRUNCATE:
     return performTruncateCombine(N, DAG, DCI);
   case AArch64ISD::ANDS:
diff --git a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
index 4bf689f373db3..e9ca1769274b3 100644
--- a/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
+++ b/llvm/test/CodeGen/AArch64/saturating-vec-smull.ll
@@ -1,6 +1,25 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -mtriple=aarch64-none-elf < %s | FileCheck %s
 
+
+define <2 x i16> @saturating_2xi16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: saturating_2xi16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    shl v0.2s, v0.2s, #16
+; CHECK-NEXT:    shl v1.2s, v1.2s, #16
+; CHECK-NEXT:    sshr v0.2s, v0.2s, #16
+; CHECK-NEXT:    sshr v1.2s, v1.2s, #16
+; CHECK-NEXT:    sqdmulh v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    ret
+  %as = sext <2 x i16> %a to <2 x i32>
+  %bs = sext <2 x i16> %b to <2 x i32>
+  %m = mul <2 x i32> %bs, %as
+  %sh = ashr <2 x i32> %m, splat (i32 15)
+  %ma = tail call <2 x i32> @llvm.smin.v4i32(<2 x i32> %sh, <2 x i32> splat (i32 32767))
+  %t = trunc <2 x i32> %ma to <2 x i16>
+  ret <2 x i16> %t
+}
+
 define <4 x i16> @saturating_4xi16(<4 x i16> %a, <4 x i16> %b) {
 ; CHECK-LABEL: saturating_4xi16:
 ; CHECK:       // %bb.0:
@@ -71,3 +90,53 @@ define <8 x i32> @saturating_8xi32(<8 x i32> %a, <8 x i32> %b) {
   %t = trunc <8 x i64> %ma to <8 x i32>
   ret <8 x i32> %t
 }
+
+define <2 x i64> @saturating_2xi32_2xi64(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: saturating_2xi32_2xi64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sqdmulh v0.2s, v1.2s, v0.2s
+; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
+; CHECK-NEXT:    ret
+  %as = sext <2 x i32> %a to <2 x i64>
+  %bs = sext <2 x i32> %b to <2 x i64>
+  %m = mul <2 x i64> %bs, %as
+  %sh = ashr <2 x i64> %m, splat (i64 31)
+  %ma = tail call <2 x i64> @llvm.smin.v8i64(<2 x i64> %sh, <2 x i64> splat (i64 2147483647))
+  ret <2 x i64> %ma
+}
+
+define <4 x i16> @unsupported_saturation_value_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: unsupported_saturation_value_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull v0.4s, v1.4h, v0.4h
+; CHECK-NEXT:    movi v1.4s, #42
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #15
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %as = sext <4 x i16> %a to <4 x i32>
+  %bs = sext <4 x i16> %b to <4 x i32>
+  %m = mul <4 x i32> %bs, %as
+  %sh = ashr <4 x i32> %m, splat (i32 15)
+  %ma = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %sh, <4 x i32> splat (i32 42))
+  %t = trunc <4 x i32> %ma to <4 x i16>
+  ret <4 x i16> %t
+}
+
+define <4 x i16> @unsupported_shift_value_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: unsupported_shift_value_v4i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    smull v0.4s, v1.4h, v0.4h
+; CHECK-NEXT:    movi v1.4s, #127, msl #8
+; CHECK-NEXT:    sshr v0.4s, v0.4s, #3
+; CHECK-NEXT:    smin v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    xtn v0.4h, v0.4s
+; CHECK-NEXT:    ret
+  %as = sext <4 x i16> %a to <4 x i32>
+  %bs = sext <4 x i16> %b to <4 x i32>
+  %m = mul <4 x i32> %bs, %as
+  %sh = ashr <4 x i32> %m, splat (i32 3)
+  %ma = tail call <4 x i32> @llvm.smin.v4i32(<4 x i32> %sh, <4 x i32> splat (i32 32767))
+  %t = trunc <4 x i32> %ma to <4 x i16>
+  ret <4 x i16> %t
+}

>From 720aedf28445dd2c691134638e7a55b4558030f2 Mon Sep 17 00:00:00 2001
From: nasmnc01 <nashe.mncube at arm.com>
Date: Wed, 2 Jul 2025 11:09:58 +0100
Subject: [PATCH 7/7] Typo in comment

Change-Id: I2fb4cd9827993dbfc51383eabb55dc84ca7eccad
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 2115 +++++++----------
 1 file changed, 874 insertions(+), 1241 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 259ca78c64fba..328ed4c58bf3c 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -153,13 +153,12 @@ cl::opt<bool> EnableSVEGISel(
     cl::desc("Enable / disable SVE scalable vectors in Global ISel"),
     cl::init(false));
 
-// TODO: This option should be removed once we switch to always using PTRADD in
-// the SelectionDAG.
-static cl::opt<bool> UseFEATCPACodegen(
-    "aarch64-use-featcpa-codegen", cl::Hidden,
-    cl::desc("Generate ISD::PTRADD nodes for pointer arithmetic in "
-             "SelectionDAG for FEAT_CPA"),
-    cl::init(false));
+// FIXME : This is a temporary flag, and is used to help transition to
+// performing lowering the proper way using the new PARTIAL_REDUCE_MLA ISD
+// nodes.
+static cl::opt<bool> EnablePartialReduceNodes(
+    "aarch64-enable-partial-reduce-nodes", cl::init(false), cl::ReallyHidden,
+    cl::desc("Use the new method of lowering partial reductions."));
 
 /// Value type used for condition codes.
 static const MVT MVT_CC = MVT::i32;
@@ -967,6 +966,27 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i16, LibCall);
     setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i32, LibCall);
     setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, LibCall);
+#define LCALLNAMES(A, B, N)                                                    \
+  setLibcallName(A##N##_RELAX, #B #N "_relax");                                \
+  setLibcallName(A##N##_ACQ, #B #N "_acq");                                    \
+  setLibcallName(A##N##_REL, #B #N "_rel");                                    \
+  setLibcallName(A##N##_ACQ_REL, #B #N "_acq_rel");
+#define LCALLNAME4(A, B)                                                       \
+  LCALLNAMES(A, B, 1)                                                          \
+  LCALLNAMES(A, B, 2) LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8)
+#define LCALLNAME5(A, B)                                                       \
+  LCALLNAMES(A, B, 1)                                                          \
+  LCALLNAMES(A, B, 2)                                                          \
+  LCALLNAMES(A, B, 4) LCALLNAMES(A, B, 8) LCALLNAMES(A, B, 16)
+    LCALLNAME5(RTLIB::OUTLINE_ATOMIC_CAS, __aarch64_cas)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_SWP, __aarch64_swp)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDADD, __aarch64_ldadd)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDSET, __aarch64_ldset)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDCLR, __aarch64_ldclr)
+    LCALLNAME4(RTLIB::OUTLINE_ATOMIC_LDEOR, __aarch64_ldeor)
+#undef LCALLNAMES
+#undef LCALLNAME4
+#undef LCALLNAME5
   }
 
   if (Subtarget->outlineAtomics() && !Subtarget->hasLSFE()) {
@@ -1438,23 +1458,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
     for (MVT VT : { MVT::v16f16, MVT::v8f32, MVT::v4f64 })
       setOperationAction(ISD::FADD, VT, Custom);
 
-    if (Subtarget->hasDotProd()) {
+    if (EnablePartialReduceNodes && Subtarget->hasDotProd()) {
       static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
                                         ISD::PARTIAL_REDUCE_UMLA};
 
       setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v16i8, Legal);
       setPartialReduceMLAAction(MLAOps, MVT::v2i32, MVT::v8i8, Legal);
       setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v16i8, Custom);
-
-      if (Subtarget->hasMatMulInt8()) {
-        setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, MVT::v4i32,
-                                  MVT::v16i8, Legal);
-        setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, MVT::v2i64,
-                                  MVT::v16i8, Custom);
-
-        setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, MVT::v2i32,
-                                  MVT::v8i8, Legal);
-      }
     }
 
   } else /* !isNeonAvailable */ {
@@ -1502,10 +1512,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Legal);
     }
 
-    if (Subtarget->hasSVE2p1() ||
-        (Subtarget->hasSME2() && Subtarget->isStreaming()))
-      setOperationAction(ISD::GET_ACTIVE_LANE_MASK, MVT::nxv32i1, Custom);
-
     for (auto VT : {MVT::v16i8, MVT::v8i8, MVT::v4i16, MVT::v2i32})
       setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Custom);
   }
@@ -1771,9 +1777,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
     for (auto Opcode :
          {ISD::FCEIL, ISD::FDIV, ISD::FFLOOR, ISD::FNEARBYINT, ISD::FRINT,
-          ISD::FROUND, ISD::FROUNDEVEN, ISD::FSQRT, ISD::FTRUNC, ISD::SETCC,
-          ISD::VECREDUCE_FADD, ISD::VECREDUCE_FMAX, ISD::VECREDUCE_FMAXIMUM,
-          ISD::VECREDUCE_FMIN, ISD::VECREDUCE_FMINIMUM}) {
+          ISD::FROUND, ISD::FROUNDEVEN, ISD::FSQRT, ISD::FTRUNC, ISD::SETCC}) {
       setOperationPromotedToType(Opcode, MVT::nxv2bf16, MVT::nxv2f32);
       setOperationPromotedToType(Opcode, MVT::nxv4bf16, MVT::nxv4f32);
       setOperationPromotedToType(Opcode, MVT::nxv8bf16, MVT::nxv8f32);
@@ -1878,7 +1882,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   }
 
   // Handle partial reduction operations
-  if (Subtarget->isSVEorStreamingSVEAvailable()) {
+  if (EnablePartialReduceNodes && Subtarget->isSVEorStreamingSVEAvailable()) {
     // Mark known legal pairs as 'Legal' (these will expand to UDOT or SDOT).
     // Other pairs will default to 'Expand'.
     static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
@@ -1888,13 +1892,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 
     setPartialReduceMLAAction(MLAOps, MVT::nxv2i64, MVT::nxv16i8, Custom);
 
-    if (Subtarget->hasMatMulInt8()) {
-      setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, MVT::nxv4i32,
-                                MVT::nxv16i8, Legal);
-      setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, MVT::nxv2i64,
-                                MVT::nxv16i8, Custom);
-    }
-
     // Wide add types
     if (Subtarget->hasSVE2() || Subtarget->hasSME()) {
       setPartialReduceMLAAction(MLAOps, MVT::nxv2i64, MVT::nxv4i32, Legal);
@@ -1940,15 +1937,17 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
       setOperationAction(ISD::EXPERIMENTAL_VECTOR_HISTOGRAM, MVT::nxv2i64,
                          Custom);
 
-      static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
-                                        ISD::PARTIAL_REDUCE_UMLA};
-      // Must be lowered to SVE instructions.
-      setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v4i32, Custom);
-      setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v8i16, Custom);
-      setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v16i8, Custom);
-      setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v8i16, Custom);
-      setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v16i8, Custom);
-      setPartialReduceMLAAction(MLAOps, MVT::v8i16, MVT::v16i8, Custom);
+      if (EnablePartialReduceNodes) {
+        static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
+                                          ISD::PARTIAL_REDUCE_UMLA};
+        // Must be lowered to SVE instructions.
+        setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v4i32, Custom);
+        setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v8i16, Custom);
+        setPartialReduceMLAAction(MLAOps, MVT::v2i64, MVT::v16i8, Custom);
+        setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v8i16, Custom);
+        setPartialReduceMLAAction(MLAOps, MVT::v4i32, MVT::v16i8, Custom);
+        setPartialReduceMLAAction(MLAOps, MVT::v8i16, MVT::v16i8, Custom);
+      }
     }
   }
 
@@ -1984,6 +1983,18 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
   for (ISD::NodeType Op : {ISD::FLDEXP, ISD::STRICT_FLDEXP, ISD::FFREXP})
     if (isOperationExpand(Op, MVT::f16))
       setOperationAction(Op, MVT::f16, Promote);
+
+  if (Subtarget->isWindowsArm64EC()) {
+    // FIXME: are there calls we need to exclude from this?
+#define HANDLE_LIBCALL(code, name)                                             \
+  {                                                                            \
+    const char *libcallName = getLibcallName(RTLIB::code);                     \
+    if (libcallName && libcallName[0] != '#')                                  \
+      setLibcallName(RTLIB::code, "#" #name);                                  \
+  }
+#include "llvm/IR/RuntimeLibcalls.def"
+#undef HANDLE_LIBCALL
+  }
 }
 
 void AArch64TargetLowering::addTypeForNEON(MVT VT) {
@@ -2098,18 +2109,12 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
   setOperationAction(ISD::STRICT_FSETCC, VT, Expand);
   setOperationAction(ISD::STRICT_FSETCCS, VT, Expand);
 
-  // When little-endian we can use ordinary d and q register loads/stores for
-  // vector types, but when big-endian we need to use structure load/store which
-  // only allow post-index addressing.
   if (Subtarget->isLittleEndian()) {
     for (unsigned im = (unsigned)ISD::PRE_INC;
          im != (unsigned)ISD::LAST_INDEXED_MODE; ++im) {
       setIndexedLoadAction(im, VT, Legal);
       setIndexedStoreAction(im, VT, Legal);
     }
-  } else {
-    setIndexedLoadAction(ISD::POST_INC, VT, Legal);
-    setIndexedStoreAction(ISD::POST_INC, VT, Legal);
   }
 
   if (Subtarget->hasD128()) {
@@ -2130,8 +2135,7 @@ void AArch64TargetLowering::addTypeForNEON(MVT VT) {
 bool AArch64TargetLowering::shouldExpandGetActiveLaneMask(EVT ResVT,
                                                           EVT OpVT) const {
   // Only SVE has a 1:1 mapping from intrinsic -> instruction (whilelo).
-  if (!Subtarget->isSVEorStreamingSVEAvailable() ||
-      ResVT.getVectorElementType() != MVT::i1)
+  if (!Subtarget->hasSVE() || ResVT.getVectorElementType() != MVT::i1)
     return true;
 
   // Only support illegal types if the result is scalable and min elements > 1.
@@ -2153,6 +2157,16 @@ bool AArch64TargetLowering::shouldExpandPartialReductionIntrinsic(
   assert(I->getIntrinsicID() ==
              Intrinsic::experimental_vector_partial_reduce_add &&
          "Unexpected intrinsic!");
+  if (EnablePartialReduceNodes)
+    return true;
+
+  EVT VT = EVT::getEVT(I->getType());
+  auto Op1 = I->getOperand(1);
+  EVT Op1VT = EVT::getEVT(Op1->getType());
+  if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
+      (VT.getVectorElementCount() * 4 == Op1VT.getVectorElementCount() ||
+       VT.getVectorElementCount() * 2 == Op1VT.getVectorElementCount()))
+    return false;
   return true;
 }
 
@@ -2230,32 +2244,26 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
   bool PreferNEON = VT.is64BitVector() || VT.is128BitVector();
   bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
 
-  static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
-                                    ISD::PARTIAL_REDUCE_UMLA};
-  unsigned NumElts = VT.getVectorNumElements();
-  if (VT.getVectorElementType() == MVT::i64) {
-    setPartialReduceMLAAction(MLAOps, VT,
-                              MVT::getVectorVT(MVT::i8, NumElts * 8), Custom);
-    setPartialReduceMLAAction(MLAOps, VT,
-                              MVT::getVectorVT(MVT::i16, NumElts * 4), Custom);
-    setPartialReduceMLAAction(MLAOps, VT,
-                              MVT::getVectorVT(MVT::i32, NumElts * 2), Custom);
-  } else if (VT.getVectorElementType() == MVT::i32) {
-    setPartialReduceMLAAction(MLAOps, VT,
-                              MVT::getVectorVT(MVT::i8, NumElts * 4), Custom);
-    setPartialReduceMLAAction(MLAOps, VT,
-                              MVT::getVectorVT(MVT::i16, NumElts * 2), Custom);
-  } else if (VT.getVectorElementType() == MVT::i16) {
-    setPartialReduceMLAAction(MLAOps, VT,
-                              MVT::getVectorVT(MVT::i8, NumElts * 2), Custom);
-  }
-  if (Subtarget->hasMatMulInt8()) {
-    if (VT.getVectorElementType() == MVT::i32)
-      setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, VT,
-                                MVT::getVectorVT(MVT::i8, NumElts * 4), Custom);
-    else if (VT.getVectorElementType() == MVT::i64)
-      setPartialReduceMLAAction(ISD::PARTIAL_REDUCE_SUMLA, VT,
+  if (EnablePartialReduceNodes) {
+    static const unsigned MLAOps[] = {ISD::PARTIAL_REDUCE_SMLA,
+                                      ISD::PARTIAL_REDUCE_UMLA};
+    unsigned NumElts = VT.getVectorNumElements();
+    if (VT.getVectorElementType() == MVT::i64) {
+      setPartialReduceMLAAction(MLAOps, VT,
                                 MVT::getVectorVT(MVT::i8, NumElts * 8), Custom);
+      setPartialReduceMLAAction(
+          MLAOps, VT, MVT::getVectorVT(MVT::i16, NumElts * 4), Custom);
+      setPartialReduceMLAAction(
+          MLAOps, VT, MVT::getVectorVT(MVT::i32, NumElts * 2), Custom);
+    } else if (VT.getVectorElementType() == MVT::i32) {
+      setPartialReduceMLAAction(MLAOps, VT,
+                                MVT::getVectorVT(MVT::i8, NumElts * 4), Custom);
+      setPartialReduceMLAAction(
+          MLAOps, VT, MVT::getVectorVT(MVT::i16, NumElts * 2), Custom);
+    } else if (VT.getVectorElementType() == MVT::i16) {
+      setPartialReduceMLAAction(MLAOps, VT,
+                                MVT::getVectorVT(MVT::i8, NumElts * 2), Custom);
+    }
   }
 
   // Lower fixed length vector operations to scalable equivalents.
@@ -2301,7 +2309,6 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
   setOperationAction(ISD::FSQRT, VT, Default);
   setOperationAction(ISD::FSUB, VT, Default);
   setOperationAction(ISD::FTRUNC, VT, Default);
-  setOperationAction(ISD::GET_ACTIVE_LANE_MASK, VT, Default);
   setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Default);
   setOperationAction(ISD::LOAD, VT, PreferNEON ? Legal : Default);
   setOperationAction(ISD::MGATHER, VT, PreferSVE ? Default : Expand);
@@ -3386,19 +3393,8 @@ bool isLegalCmpImmed(APInt C) {
   return isLegalArithImmed(C.abs().getZExtValue());
 }
 
-static bool isSafeSignedCMN(SDValue Op, SelectionDAG &DAG) {
-  // 0 - INT_MIN sign wraps, so no signed wrap means cmn is safe.
-  if (Op->getFlags().hasNoSignedWrap())
-    return true;
-
-  // We can still figure out if the second operand is safe to use
-  // in a CMN instruction by checking if it is known to be not the minimum
-  // signed value. If it is not, then we can safely use CMN.
-  // Note: We can eventually remove this check and simply rely on
-  // Op->getFlags().hasNoSignedWrap() once SelectionDAG/ISelLowering
-  // consistently sets them appropriately when making said nodes.
-
-  KnownBits KnownSrc = DAG.computeKnownBits(Op.getOperand(1));
+static bool cannotBeIntMin(SDValue CheckedVal, SelectionDAG &DAG) {
+  KnownBits KnownSrc = DAG.computeKnownBits(CheckedVal);
   return !KnownSrc.getSignedMinValue().isMinSignedValue();
 }
 
@@ -3407,7 +3403,7 @@ static bool isSafeSignedCMN(SDValue Op, SelectionDAG &DAG) {
 // can be set differently by this operation. It comes down to whether
 // "SInt(~op2)+1 == SInt(~op2+1)" (and the same for UInt). If they are then
 // everything is fine. If not then the optimization is wrong. Thus general
-// comparisons are only valid if op2 != 0 and op2 != INT_MIN.
+// comparisons are only valid if op2 != 0.
 //
 // So, finally, the only LLVM-native comparisons that don't mention C or V
 // are the ones that aren't unsigned comparisons. They're the only ones we can
@@ -3416,10 +3412,10 @@ static bool isCMN(SDValue Op, ISD::CondCode CC, SelectionDAG &DAG) {
   return Op.getOpcode() == ISD::SUB && isNullConstant(Op.getOperand(0)) &&
          (isIntEqualitySetCC(CC) ||
           (isUnsignedIntSetCC(CC) && DAG.isKnownNeverZero(Op.getOperand(1))) ||
-          (isSignedIntSetCC(CC) && isSafeSignedCMN(Op, DAG)));
+          (isSignedIntSetCC(CC) && cannotBeIntMin(Op.getOperand(1), DAG)));
 }
 
-static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
+static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &dl,
                                       SelectionDAG &DAG, SDValue Chain,
                                       bool IsSignaling) {
   EVT VT = LHS.getValueType();
@@ -3428,29 +3424,29 @@ static SDValue emitStrictFPComparison(SDValue LHS, SDValue RHS, const SDLoc &DL,
   const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
 
   if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
-    LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
+    LHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
                       {Chain, LHS});
-    RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
+    RHS = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
                       {LHS.getValue(1), RHS});
     Chain = RHS.getValue(1);
   }
   unsigned Opcode =
       IsSignaling ? AArch64ISD::STRICT_FCMPE : AArch64ISD::STRICT_FCMP;
-  return DAG.getNode(Opcode, DL, {MVT::i32, MVT::Other}, {Chain, LHS, RHS});
+  return DAG.getNode(Opcode, dl, {MVT::i32, MVT::Other}, {Chain, LHS, RHS});
 }
 
 static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
-                              const SDLoc &DL, SelectionDAG &DAG) {
+                              const SDLoc &dl, SelectionDAG &DAG) {
   EVT VT = LHS.getValueType();
   const bool FullFP16 = DAG.getSubtarget<AArch64Subtarget>().hasFullFP16();
 
   if (VT.isFloatingPoint()) {
     assert(VT != MVT::f128);
     if ((VT == MVT::f16 && !FullFP16) || VT == MVT::bf16) {
-      LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
-      RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
+      LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
+      RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
     }
-    return DAG.getNode(AArch64ISD::FCMP, DL, MVT::i32, LHS, RHS);
+    return DAG.getNode(AArch64ISD::FCMP, dl, MVT::i32, LHS, RHS);
   }
 
   // The CMP instruction is just an alias for SUBS, and representing it as
@@ -3475,7 +3471,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
       // (a.k.a. ANDS) except that the flags are only guaranteed to work for one
       // of the signed comparisons.
       const SDValue ANDSNode =
-          DAG.getNode(AArch64ISD::ANDS, DL, DAG.getVTList(VT, MVT_CC),
+          DAG.getNode(AArch64ISD::ANDS, dl, DAG.getVTList(VT, MVT_CC),
                       LHS.getOperand(0), LHS.getOperand(1));
       // Replace all users of (and X, Y) with newly generated (ands X, Y)
       DAG.ReplaceAllUsesWith(LHS, ANDSNode);
@@ -3486,7 +3482,7 @@ static SDValue emitComparison(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     }
   }
 
-  return DAG.getNode(Opcode, DL, DAG.getVTList(VT, MVT_CC), LHS, RHS)
+  return DAG.getNode(Opcode, dl, DAG.getVTList(VT, MVT_CC), LHS, RHS)
       .getValue(1);
 }
 
@@ -3844,7 +3840,7 @@ static bool shouldBeAdjustedToZero(SDValue LHS, APInt C, ISD::CondCode &CC) {
 
 static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
                              SDValue &AArch64cc, SelectionDAG &DAG,
-                             const SDLoc &DL) {
+                             const SDLoc &dl) {
   if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
     EVT VT = RHS.getValueType();
     APInt C = RHSC->getAPIntValue();
@@ -3853,7 +3849,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
     if (shouldBeAdjustedToZero(LHS, C, CC)) {
       // Adjust the constant to zero.
       // CC has already been adjusted.
-      RHS = DAG.getConstant(0, DL, VT);
+      RHS = DAG.getConstant(0, dl, VT);
     } else if (!isLegalCmpImmed(C)) {
       // Constant does not fit, try adjusting it by one?
       switch (CC) {
@@ -3865,7 +3861,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
           APInt CMinusOne = C - 1;
           if (isLegalCmpImmed(CMinusOne)) {
             CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
-            RHS = DAG.getConstant(CMinusOne, DL, VT);
+            RHS = DAG.getConstant(CMinusOne, dl, VT);
           }
         }
         break;
@@ -3875,7 +3871,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
           APInt CMinusOne = C - 1;
           if (isLegalCmpImmed(CMinusOne)) {
             CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
-            RHS = DAG.getConstant(CMinusOne, DL, VT);
+            RHS = DAG.getConstant(CMinusOne, dl, VT);
           }
         }
         break;
@@ -3885,7 +3881,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
           APInt CPlusOne = C + 1;
           if (isLegalCmpImmed(CPlusOne)) {
             CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
-            RHS = DAG.getConstant(CPlusOne, DL, VT);
+            RHS = DAG.getConstant(CPlusOne, dl, VT);
           }
         }
         break;
@@ -3895,7 +3891,7 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
           APInt CPlusOne = C + 1;
           if (isLegalCmpImmed(CPlusOne)) {
             CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
-            RHS = DAG.getConstant(CPlusOne, DL, VT);
+            RHS = DAG.getConstant(CPlusOne, dl, VT);
           }
         }
         break;
@@ -3954,11 +3950,11 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
       int16_t ValueofRHS = RHS->getAsZExtVal();
       if (ValueofRHS < 0 && isLegalArithImmed(-ValueofRHS)) {
         SDValue SExt =
-            DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, LHS.getValueType(), LHS,
+            DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, LHS.getValueType(), LHS,
                         DAG.getValueType(MVT::i16));
         Cmp = emitComparison(
-            SExt, DAG.getSignedConstant(ValueofRHS, DL, RHS.getValueType()), CC,
-            DL, DAG);
+            SExt, DAG.getSignedConstant(ValueofRHS, dl, RHS.getValueType()), CC,
+            dl, DAG);
         AArch64CC = changeIntCCToAArch64CC(CC);
       }
     }
@@ -3972,10 +3968,10 @@ static SDValue getAArch64Cmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
   }
 
   if (!Cmp) {
-    Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+    Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
     AArch64CC = changeIntCCToAArch64CC(CC);
   }
-  AArch64cc = DAG.getConstant(AArch64CC, DL, MVT_CC);
+  AArch64cc = DAG.getConstant(AArch64CC, dl, MVT_CC);
   return Cmp;
 }
 
@@ -4076,7 +4072,7 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
 
   SDValue Sel = Op.getOperand(0);
   SDValue Other = Op.getOperand(1);
-  SDLoc DL(Sel);
+  SDLoc dl(Sel);
 
   // If the operand is an overflow checking operation, invert the condition
   // code and kill the Not operation. I.e., transform:
@@ -4090,13 +4086,13 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
     if (!DAG.getTargetLoweringInfo().isTypeLegal(Sel->getValueType(0)))
       return SDValue();
 
-    SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
-    SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
+    SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+    SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
     AArch64CC::CondCode CC;
     SDValue Value, Overflow;
     std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Sel.getValue(0), DAG);
-    SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32);
-    return DAG.getNode(AArch64ISD::CSEL, DL, Op.getValueType(), TVal, FVal,
+    SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
+    return DAG.getNode(AArch64ISD::CSEL, dl, Op.getValueType(), TVal, FVal,
                        CCVal, Overflow);
   }
   // If neither operand is a SELECT_CC, give up.
@@ -4140,13 +4136,13 @@ SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
   // If the constants line up, perform the transform!
   if (CTVal->isZero() && CFVal->isAllOnes()) {
     SDValue CCVal;
-    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, DL);
+    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
 
     FVal = Other;
-    TVal = DAG.getNode(ISD::XOR, DL, Other.getValueType(), Other,
-                       DAG.getAllOnesConstant(DL, Other.getValueType()));
+    TVal = DAG.getNode(ISD::XOR, dl, Other.getValueType(), Other,
+                       DAG.getAllOnesConstant(dl, Other.getValueType()));
 
-    return DAG.getNode(AArch64ISD::CSEL, DL, Sel.getValueType(), FVal, TVal,
+    return DAG.getNode(AArch64ISD::CSEL, dl, Sel.getValueType(), FVal, TVal,
                        CCVal, Cmp);
   }
 
@@ -4222,25 +4218,25 @@ static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) {
   if (!DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()))
     return SDValue();
 
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   AArch64CC::CondCode CC;
   // The actual operation that sets the overflow or carry flag.
   SDValue Value, Overflow;
   std::tie(Value, Overflow) = getAArch64XALUOOp(CC, Op, DAG);
 
   // We use 0 and 1 as false and true values.
-  SDValue TVal = DAG.getConstant(1, DL, MVT::i32);
-  SDValue FVal = DAG.getConstant(0, DL, MVT::i32);
+  SDValue TVal = DAG.getConstant(1, dl, MVT::i32);
+  SDValue FVal = DAG.getConstant(0, dl, MVT::i32);
 
   // We use an inverted condition, because the conditional select is inverted
   // too. This will allow it to be selected to a single instruction:
   // CSINC Wd, WZR, WZR, invert(cond).
-  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), DL, MVT::i32);
+  SDValue CCVal = DAG.getConstant(getInvertedCondCode(CC), dl, MVT::i32);
   Overflow =
-      DAG.getNode(AArch64ISD::CSEL, DL, MVT::i32, FVal, TVal, CCVal, Overflow);
+      DAG.getNode(AArch64ISD::CSEL, dl, MVT::i32, FVal, TVal, CCVal, Overflow);
 
   SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
-  return DAG.getNode(ISD::MERGE_VALUES, DL, VTs, Value, Overflow);
+  return DAG.getNode(ISD::MERGE_VALUES, dl, VTs, Value, Overflow);
 }
 
 // Prefetch operands are:
@@ -4279,7 +4275,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG) {
 // a power of 2. This is then lowered to ANDS X (Y & ~(Z - 1)) instead of SUBS
 // (AND X Y) Z which produces a better opt with EmitComparison
 static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS,
-                                SelectionDAG &DAG, const SDLoc DL) {
+                                SelectionDAG &DAG, const SDLoc dl) {
   if (CC == ISD::SETULT && LHS.getOpcode() == ISD::AND && LHS->hasOneUse()) {
     ConstantSDNode *LHSConstOp = dyn_cast<ConstantSDNode>(LHS.getOperand(1));
     ConstantSDNode *RHSConst = dyn_cast<ConstantSDNode>(RHS);
@@ -4289,9 +4285,9 @@ static void simplifySetCCIntoEq(ISD::CondCode &CC, SDValue &LHS, SDValue &RHS,
       if (isPowerOf2_64(RHSConstant)) {
         uint64_t NewMaskValue = LHSConstValue & ~(RHSConstant - 1);
         LHS =
-            DAG.getNode(ISD::AND, DL, LHS.getValueType(), LHS.getOperand(0),
-                        DAG.getConstant(NewMaskValue, DL, LHS.getValueType()));
-        RHS = DAG.getConstant(0, DL, RHS.getValueType());
+            DAG.getNode(ISD::AND, dl, LHS.getValueType(), LHS.getOperand(0),
+                        DAG.getConstant(NewMaskValue, dl, LHS.getValueType()));
+        RHS = DAG.getConstant(0, dl, RHS.getValueType());
         CC = ISD::SETEQ;
       }
     }
@@ -4422,55 +4418,55 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
   if (VT.getScalarType() == MVT::bf16 &&
       !((Subtarget->hasNEON() || Subtarget->hasSME()) &&
         Subtarget->hasBF16())) {
-    SDLoc DL(Op);
+    SDLoc dl(Op);
     SDValue Narrow = SrcVal;
     SDValue NaN;
     EVT I32 = SrcVT.changeElementType(MVT::i32);
     EVT F32 = SrcVT.changeElementType(MVT::f32);
     if (SrcVT.getScalarType() == MVT::f32) {
       bool NeverSNaN = DAG.isKnownNeverSNaN(Narrow);
-      Narrow = DAG.getNode(ISD::BITCAST, DL, I32, Narrow);
+      Narrow = DAG.getNode(ISD::BITCAST, dl, I32, Narrow);
       if (!NeverSNaN) {
         // Set the quiet bit.
-        NaN = DAG.getNode(ISD::OR, DL, I32, Narrow,
-                          DAG.getConstant(0x400000, DL, I32));
+        NaN = DAG.getNode(ISD::OR, dl, I32, Narrow,
+                          DAG.getConstant(0x400000, dl, I32));
       }
     } else if (SrcVT.getScalarType() == MVT::f64) {
-      Narrow = DAG.getNode(AArch64ISD::FCVTXN, DL, F32, Narrow);
-      Narrow = DAG.getNode(ISD::BITCAST, DL, I32, Narrow);
+      Narrow = DAG.getNode(AArch64ISD::FCVTXN, dl, F32, Narrow);
+      Narrow = DAG.getNode(ISD::BITCAST, dl, I32, Narrow);
     } else {
       return SDValue();
     }
     if (!Trunc) {
-      SDValue One = DAG.getConstant(1, DL, I32);
-      SDValue Lsb = DAG.getNode(ISD::SRL, DL, I32, Narrow,
-                                DAG.getShiftAmountConstant(16, I32, DL));
-      Lsb = DAG.getNode(ISD::AND, DL, I32, Lsb, One);
+      SDValue One = DAG.getConstant(1, dl, I32);
+      SDValue Lsb = DAG.getNode(ISD::SRL, dl, I32, Narrow,
+                                DAG.getShiftAmountConstant(16, I32, dl));
+      Lsb = DAG.getNode(ISD::AND, dl, I32, Lsb, One);
       SDValue RoundingBias =
-          DAG.getNode(ISD::ADD, DL, I32, DAG.getConstant(0x7fff, DL, I32), Lsb);
-      Narrow = DAG.getNode(ISD::ADD, DL, I32, Narrow, RoundingBias);
+          DAG.getNode(ISD::ADD, dl, I32, DAG.getConstant(0x7fff, dl, I32), Lsb);
+      Narrow = DAG.getNode(ISD::ADD, dl, I32, Narrow, RoundingBias);
     }
 
     // Don't round if we had a NaN, we don't want to turn 0x7fffffff into
     // 0x80000000.
     if (NaN) {
       SDValue IsNaN = DAG.getSetCC(
-          DL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT),
+          dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT),
           SrcVal, SrcVal, ISD::SETUO);
-      Narrow = DAG.getSelect(DL, I32, IsNaN, NaN, Narrow);
+      Narrow = DAG.getSelect(dl, I32, IsNaN, NaN, Narrow);
     }
 
     // Now that we have rounded, shift the bits into position.
-    Narrow = DAG.getNode(ISD::SRL, DL, I32, Narrow,
-                         DAG.getShiftAmountConstant(16, I32, DL));
+    Narrow = DAG.getNode(ISD::SRL, dl, I32, Narrow,
+                         DAG.getShiftAmountConstant(16, I32, dl));
     if (VT.isVector()) {
       EVT I16 = I32.changeVectorElementType(MVT::i16);
-      Narrow = DAG.getNode(ISD::TRUNCATE, DL, I16, Narrow);
-      return DAG.getNode(ISD::BITCAST, DL, VT, Narrow);
+      Narrow = DAG.getNode(ISD::TRUNCATE, dl, I16, Narrow);
+      return DAG.getNode(ISD::BITCAST, dl, VT, Narrow);
     }
-    Narrow = DAG.getNode(ISD::BITCAST, DL, F32, Narrow);
-    SDValue Result = DAG.getTargetExtractSubreg(AArch64::hsub, DL, VT, Narrow);
-    return IsStrict ? DAG.getMergeValues({Result, Op.getOperand(0)}, DL)
+    Narrow = DAG.getNode(ISD::BITCAST, dl, F32, Narrow);
+    SDValue Result = DAG.getTargetExtractSubreg(AArch64::hsub, dl, VT, Narrow);
+    return IsStrict ? DAG.getMergeValues({Result, Op.getOperand(0)}, dl)
                     : Result;
   }
 
@@ -4502,16 +4498,16 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
   if ((InVT.getVectorElementType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
       InVT.getVectorElementType() == MVT::bf16) {
     EVT NewVT = VT.changeElementType(MVT::f32);
-    SDLoc DL(Op);
+    SDLoc dl(Op);
     if (IsStrict) {
-      SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {NewVT, MVT::Other},
+      SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {NewVT, MVT::Other},
                                 {Op.getOperand(0), Op.getOperand(1)});
-      return DAG.getNode(Op.getOpcode(), DL, {VT, MVT::Other},
+      return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
                          {Ext.getValue(1), Ext.getValue(0)});
     }
     return DAG.getNode(
-        Op.getOpcode(), DL, Op.getValueType(),
-        DAG.getNode(ISD::FP_EXTEND, DL, NewVT, Op.getOperand(0)));
+        Op.getOpcode(), dl, Op.getValueType(),
+        DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0)));
   }
 
   if (VT.isScalableVector()) {
@@ -4540,47 +4536,47 @@ SDValue AArch64TargetLowering::LowerVectorFP_TO_INT(SDValue Op,
   uint64_t VTSize = VT.getFixedSizeInBits();
   uint64_t InVTSize = InVT.getFixedSizeInBits();
   if (VTSize < InVTSize) {
-    SDLoc DL(Op);
+    SDLoc dl(Op);
     if (IsStrict) {
       InVT = InVT.changeVectorElementTypeToInteger();
-      SDValue Cv = DAG.getNode(Op.getOpcode(), DL, {InVT, MVT::Other},
+      SDValue Cv = DAG.getNode(Op.getOpcode(), dl, {InVT, MVT::Other},
                                {Op.getOperand(0), Op.getOperand(1)});
-      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, VT, Cv);
-      return DAG.getMergeValues({Trunc, Cv.getValue(1)}, DL);
+      SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
+      return DAG.getMergeValues({Trunc, Cv.getValue(1)}, dl);
     }
     SDValue Cv =
-        DAG.getNode(Op.getOpcode(), DL, InVT.changeVectorElementTypeToInteger(),
+        DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(),
                     Op.getOperand(0));
-    return DAG.getNode(ISD::TRUNCATE, DL, VT, Cv);
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv);
   }
 
   if (VTSize > InVTSize) {
-    SDLoc DL(Op);
+    SDLoc dl(Op);
     MVT ExtVT =
         MVT::getVectorVT(MVT::getFloatingPointVT(VT.getScalarSizeInBits()),
                          VT.getVectorNumElements());
     if (IsStrict) {
-      SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {ExtVT, MVT::Other},
+      SDValue Ext = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {ExtVT, MVT::Other},
                                 {Op.getOperand(0), Op.getOperand(1)});
-      return DAG.getNode(Op.getOpcode(), DL, {VT, MVT::Other},
+      return DAG.getNode(Op.getOpcode(), dl, {VT, MVT::Other},
                          {Ext.getValue(1), Ext.getValue(0)});
     }
-    SDValue Ext = DAG.getNode(ISD::FP_EXTEND, DL, ExtVT, Op.getOperand(0));
-    return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
+    SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, ExtVT, Op.getOperand(0));
+    return DAG.getNode(Op.getOpcode(), dl, VT, Ext);
   }
 
   // Use a scalar operation for conversions between single-element vectors of
   // the same size.
   if (InVT.getVectorNumElements() == 1) {
-    SDLoc DL(Op);
+    SDLoc dl(Op);
     SDValue Extract = DAG.getNode(
-        ISD::EXTRACT_VECTOR_ELT, DL, InVT.getScalarType(),
-        Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, DL, MVT::i64));
+        ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(),
+        Op.getOperand(IsStrict ? 1 : 0), DAG.getConstant(0, dl, MVT::i64));
     EVT ScalarVT = VT.getScalarType();
     if (IsStrict)
-      return DAG.getNode(Op.getOpcode(), DL, {ScalarVT, MVT::Other},
+      return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
                          {Op.getOperand(0), Extract});
-    return DAG.getNode(Op.getOpcode(), DL, ScalarVT, Extract);
+    return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
   }
 
   // Type changing conversions are illegal.
@@ -4598,16 +4594,16 @@ SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op,
   // f16 conversions are promoted to f32 when full fp16 is not supported.
   if ((SrcVal.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
       SrcVal.getValueType() == MVT::bf16) {
-    SDLoc DL(Op);
+    SDLoc dl(Op);
     if (IsStrict) {
       SDValue Ext =
-          DAG.getNode(ISD::STRICT_FP_EXTEND, DL, {MVT::f32, MVT::Other},
+          DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {MVT::f32, MVT::Other},
                       {Op.getOperand(0), SrcVal});
-      return DAG.getNode(Op.getOpcode(), DL, {Op.getValueType(), MVT::Other},
+      return DAG.getNode(Op.getOpcode(), dl, {Op.getValueType(), MVT::Other},
                          {Ext.getValue(1), Ext.getValue(0)});
     }
-    return DAG.getNode(Op.getOpcode(), DL, Op.getValueType(),
-                       DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, SrcVal));
+    return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(),
+                       DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, SrcVal));
   }
 
   if (SrcVal.getValueType() != MVT::f128) {
@@ -4807,7 +4803,7 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
   // in the cost tables.
   bool IsStrict = Op->isStrictFPOpcode();
   EVT VT = Op.getValueType();
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   SDValue In = Op.getOperand(IsStrict ? 1 : 0);
   EVT InVT = In.getValueType();
   unsigned Opc = Op.getOpcode();
@@ -4818,26 +4814,26 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
 
   // NOTE: i1->bf16 does not require promotion to f32.
   if (VT.isScalableVector() && InVT.getVectorElementType() == MVT::i1) {
-    SDValue FalseVal = DAG.getConstantFP(0.0, DL, VT);
-    SDValue TrueVal = IsSigned ? DAG.getConstantFP(-1.0, DL, VT)
-                               : DAG.getConstantFP(1.0, DL, VT);
-    return DAG.getNode(ISD::VSELECT, DL, VT, In, TrueVal, FalseVal);
+    SDValue FalseVal = DAG.getConstantFP(0.0, dl, VT);
+    SDValue TrueVal = IsSigned ? DAG.getConstantFP(-1.0, dl, VT)
+                               : DAG.getConstantFP(1.0, dl, VT);
+    return DAG.getNode(ISD::VSELECT, dl, VT, In, TrueVal, FalseVal);
   }
 
   // Promote bf16 conversions to f32.
   if (VT.getVectorElementType() == MVT::bf16) {
     EVT F32 = VT.changeElementType(MVT::f32);
     if (IsStrict) {
-      SDValue Val = DAG.getNode(Op.getOpcode(), DL, {F32, MVT::Other},
+      SDValue Val = DAG.getNode(Op.getOpcode(), dl, {F32, MVT::Other},
                                 {Op.getOperand(0), In});
-      return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
+      return DAG.getNode(ISD::STRICT_FP_ROUND, dl,
                          {Op.getValueType(), MVT::Other},
                          {Val.getValue(1), Val.getValue(0),
-                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
+                          DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)});
     }
-    return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(),
-                       DAG.getNode(Op.getOpcode(), DL, F32, In),
-                       DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+    return DAG.getNode(ISD::FP_ROUND, dl, Op.getValueType(),
+                       DAG.getNode(Op.getOpcode(), dl, F32, In),
+                       DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
   }
 
   if (VT.isScalableVector()) {
@@ -4884,36 +4880,36 @@ SDValue AArch64TargetLowering::LowerVectorINT_TO_FP(SDValue Op,
         MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()),
                          InVT.getVectorNumElements());
     if (IsStrict) {
-      In = DAG.getNode(Opc, DL, {CastVT, MVT::Other}, {Op.getOperand(0), In});
-      return DAG.getNode(ISD::STRICT_FP_ROUND, DL, {VT, MVT::Other},
+      In = DAG.getNode(Opc, dl, {CastVT, MVT::Other}, {Op.getOperand(0), In});
+      return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {VT, MVT::Other},
                          {In.getValue(1), In.getValue(0),
-                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
+                          DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)});
     }
-    In = DAG.getNode(Opc, DL, CastVT, In);
-    return DAG.getNode(ISD::FP_ROUND, DL, VT, In,
-                       DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+    In = DAG.getNode(Opc, dl, CastVT, In);
+    return DAG.getNode(ISD::FP_ROUND, dl, VT, In,
+                       DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
   }
 
   if (VTSize > InVTSize) {
     unsigned CastOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
     EVT CastVT = VT.changeVectorElementTypeToInteger();
-    In = DAG.getNode(CastOpc, DL, CastVT, In);
+    In = DAG.getNode(CastOpc, dl, CastVT, In);
     if (IsStrict)
-      return DAG.getNode(Opc, DL, {VT, MVT::Other}, {Op.getOperand(0), In});
-    return DAG.getNode(Opc, DL, VT, In);
+      return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op.getOperand(0), In});
+    return DAG.getNode(Opc, dl, VT, In);
   }
 
   // Use a scalar operation for conversions between single-element vectors of
   // the same size.
   if (VT.getVectorNumElements() == 1) {
     SDValue Extract =
-        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InVT.getScalarType(), In,
-                    DAG.getConstant(0, DL, MVT::i64));
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getScalarType(), In,
+                    DAG.getConstant(0, dl, MVT::i64));
     EVT ScalarVT = VT.getScalarType();
     if (IsStrict)
-      return DAG.getNode(Op.getOpcode(), DL, {ScalarVT, MVT::Other},
+      return DAG.getNode(Op.getOpcode(), dl, {ScalarVT, MVT::Other},
                          {Op.getOperand(0), Extract});
-    return DAG.getNode(Op.getOpcode(), DL, ScalarVT, Extract);
+    return DAG.getNode(Op.getOpcode(), dl, ScalarVT, Extract);
   }
 
   return Op;
@@ -4931,18 +4927,18 @@ SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op,
                   Op->getOpcode() == ISD::SINT_TO_FP;
 
   auto IntToFpViaPromotion = [&](EVT PromoteVT) {
-    SDLoc DL(Op);
+    SDLoc dl(Op);
     if (IsStrict) {
-      SDValue Val = DAG.getNode(Op.getOpcode(), DL, {PromoteVT, MVT::Other},
+      SDValue Val = DAG.getNode(Op.getOpcode(), dl, {PromoteVT, MVT::Other},
                                 {Op.getOperand(0), SrcVal});
-      return DAG.getNode(ISD::STRICT_FP_ROUND, DL,
+      return DAG.getNode(ISD::STRICT_FP_ROUND, dl,
                          {Op.getValueType(), MVT::Other},
                          {Val.getValue(1), Val.getValue(0),
-                          DAG.getIntPtrConstant(0, DL, /*isTarget=*/true)});
+                          DAG.getIntPtrConstant(0, dl, /*isTarget=*/true)});
     }
-    return DAG.getNode(ISD::FP_ROUND, DL, Op.getValueType(),
-                       DAG.getNode(Op.getOpcode(), DL, PromoteVT, SrcVal),
-                       DAG.getIntPtrConstant(0, DL, /*isTarget=*/true));
+    return DAG.getNode(ISD::FP_ROUND, dl, Op.getValueType(),
+                       DAG.getNode(Op.getOpcode(), dl, PromoteVT, SrcVal),
+                       DAG.getIntPtrConstant(0, dl, /*isTarget=*/true));
   };
 
   if (Op.getValueType() == MVT::bf16) {
@@ -5064,7 +5060,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
                                             SelectionDAG &DAG) const {
   // For iOS, we want to call an alternative entry point: __sincos_stret,
   // which returns the values in two S / D registers.
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   SDValue Arg = Op.getOperand(0);
   EVT ArgVT = Arg.getValueType();
   Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
@@ -5086,10 +5082,9 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op,
 
   StructType *RetTy = StructType::get(ArgTy, ArgTy);
   TargetLowering::CallLoweringInfo CLI(DAG);
-  CallingConv::ID CC = getLibcallCallingConv(LC);
-  CLI.setDebugLoc(DL)
+  CLI.setDebugLoc(dl)
       .setChain(DAG.getEntryNode())
-      .setLibCallee(CC, RetTy, Callee, std::move(Args));
+      .setLibCallee(CallingConv::Fast, RetTy, Callee, std::move(Args));
 
   std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
   return CallResult.first;
@@ -5245,21 +5240,21 @@ SDValue AArch64TargetLowering::LowerGET_ROUNDING(SDValue Op,
   // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0
   // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3)
   // so that the shift + and get folded into a bitfield extract.
-  SDLoc DL(Op);
+  SDLoc dl(Op);
 
   SDValue Chain = Op.getOperand(0);
   SDValue FPCR_64 = DAG.getNode(
-      ISD::INTRINSIC_W_CHAIN, DL, {MVT::i64, MVT::Other},
-      {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, DL, MVT::i64)});
+      ISD::INTRINSIC_W_CHAIN, dl, {MVT::i64, MVT::Other},
+      {Chain, DAG.getConstant(Intrinsic::aarch64_get_fpcr, dl, MVT::i64)});
   Chain = FPCR_64.getValue(1);
-  SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPCR_64);
-  SDValue FltRounds = DAG.getNode(ISD::ADD, DL, MVT::i32, FPCR_32,
-                                  DAG.getConstant(1U << 22, DL, MVT::i32));
-  SDValue RMODE = DAG.getNode(ISD::SRL, DL, MVT::i32, FltRounds,
-                              DAG.getConstant(22, DL, MVT::i32));
-  SDValue AND = DAG.getNode(ISD::AND, DL, MVT::i32, RMODE,
-                            DAG.getConstant(3, DL, MVT::i32));
-  return DAG.getMergeValues({AND, Chain}, DL);
+  SDValue FPCR_32 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, FPCR_64);
+  SDValue FltRounds = DAG.getNode(ISD::ADD, dl, MVT::i32, FPCR_32,
+                                  DAG.getConstant(1U << 22, dl, MVT::i32));
+  SDValue RMODE = DAG.getNode(ISD::SRL, dl, MVT::i32, FltRounds,
+                              DAG.getConstant(22, dl, MVT::i32));
+  SDValue AND = DAG.getNode(ISD::AND, dl, MVT::i32, RMODE,
+                            DAG.getConstant(3, dl, MVT::i32));
+  return DAG.getMergeValues({AND, Chain}, dl);
 }
 
 SDValue AArch64TargetLowering::LowerSET_ROUNDING(SDValue Op,
@@ -5387,7 +5382,7 @@ static unsigned selectUmullSmull(SDValue &N0, SDValue &N1, SelectionDAG &DAG,
       return AArch64ISD::UMULL;
   } else if (VT == MVT::v2i64 && DAG.MaskedValueIsZero(N0, Mask) &&
              DAG.MaskedValueIsZero(N1, Mask)) {
-    // For v2i64 we look more aggressively at both operands being zero, to avoid
+    // For v2i64 we look more aggresively at both operands being zero, to avoid
     // scalarization.
     return AArch64ISD::UMULL;
   }
@@ -5516,14 +5511,14 @@ static SDValue optimizeIncrementingWhile(SDNode *N, SelectionDAG &DAG,
   if (!isa<ConstantSDNode>(N->getOperand(Op1)))
     return SDValue();
 
-  SDLoc DL(N);
+  SDLoc dl(N);
   APInt Y = N->getConstantOperandAPInt(Op1);
 
   // When the second operand is the maximum value, comparisons that include
   // equality can never fail and thus we can return an all active predicate.
   if (IsEqual)
     if (IsSigned ? Y.isMaxSignedValue() : Y.isMaxValue())
-      return DAG.getConstant(1, DL, N->getValueType(0));
+      return DAG.getConstant(1, dl, N->getValueType(0));
 
   if (!isa<ConstantSDNode>(N->getOperand(Op0)))
     return SDValue();
@@ -5552,7 +5547,7 @@ static SDValue optimizeIncrementingWhile(SDNode *N, SelectionDAG &DAG,
   unsigned ElementSize = 128 / N->getValueType(0).getVectorMinNumElements();
   if (PredPattern != std::nullopt &&
       NumActiveElems.getZExtValue() <= (MinSVEVectorSize / ElementSize))
-    return getPTrue(DAG, DL, N->getValueType(0), *PredPattern);
+    return getPTrue(DAG, dl, N->getValueType(0), *PredPattern);
 
   return SDValue();
 }
@@ -5706,9 +5701,9 @@ SDValue LowerSMELdrStr(SDValue N, SelectionDAG &DAG, bool IsLoad) {
 }
 
 SDValue LowerVectorMatch(SDValue Op, SelectionDAG &DAG) {
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   SDValue ID =
-      DAG.getTargetConstant(Intrinsic::aarch64_sve_match, DL, MVT::i64);
+      DAG.getTargetConstant(Intrinsic::aarch64_sve_match, dl, MVT::i64);
 
   auto Op1 = Op.getOperand(1);
   auto Op2 = Op.getOperand(2);
@@ -5734,41 +5729,41 @@ SDValue LowerVectorMatch(SDValue Op, SelectionDAG &DAG) {
     Op2 = convertToScalableVector(DAG, OpContainerVT, Op2);
     // Further, if the result is scalable, broadcast Op2 to a full SVE register.
     if (ResVT.isScalableVector())
-      Op2 = DAG.getNode(AArch64ISD::DUPLANE128, DL, OpContainerVT, Op2,
-                        DAG.getTargetConstant(0, DL, MVT::i64));
+      Op2 = DAG.getNode(AArch64ISD::DUPLANE128, dl, OpContainerVT, Op2,
+                        DAG.getTargetConstant(0, dl, MVT::i64));
   } else {
     // If Op2 is not a full 128-bit vector, we always need to broadcast it.
     unsigned Op2BitWidth = Op2VT.getFixedSizeInBits();
     MVT Op2IntVT = MVT::getIntegerVT(Op2BitWidth);
     EVT Op2PromotedVT = getPackedSVEVectorVT(Op2IntVT);
     Op2 = DAG.getBitcast(MVT::getVectorVT(Op2IntVT, 1), Op2);
-    Op2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op2IntVT, Op2,
-                      DAG.getConstant(0, DL, MVT::i64));
-    Op2 = DAG.getSplatVector(Op2PromotedVT, DL, Op2);
+    Op2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op2IntVT, Op2,
+                      DAG.getConstant(0, dl, MVT::i64));
+    Op2 = DAG.getSplatVector(Op2PromotedVT, dl, Op2);
     Op2 = DAG.getBitcast(OpContainerVT, Op2);
   }
 
   // If the result is scalable, we just need to carry out the MATCH.
   if (ResVT.isScalableVector())
-    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ResVT, ID, Mask, Op1, Op2);
+    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, ResVT, ID, Mask, Op1, Op2);
 
   // If the result is fixed, we can still use MATCH but we need to wrap the
   // first operand and the mask in scalable vectors before doing so.
 
   // Wrap the operands.
   Op1 = convertToScalableVector(DAG, OpContainerVT, Op1);
-  Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, Op1VT, Mask);
+  Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, Op1VT, Mask);
   Mask = convertFixedMaskToScalableVector(Mask, DAG);
 
   // Carry out the match.
-  SDValue Match = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Mask.getValueType(),
+  SDValue Match = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, Mask.getValueType(),
                               ID, Mask, Op1, Op2);
 
   // Extract and promote the match result (nxv16i1/nxv8i1) to ResVT
   // (v16i8/v8i8).
-  Match = DAG.getNode(ISD::SIGN_EXTEND, DL, OpContainerVT, Match);
+  Match = DAG.getNode(ISD::SIGN_EXTEND, dl, OpContainerVT, Match);
   Match = convertFromScalableVector(DAG, Op1VT, Match);
-  return DAG.getNode(ISD::TRUNCATE, DL, ResVT, Match);
+  return DAG.getNode(ISD::TRUNCATE, dl, ResVT, Match);
 }
 
 SDValue AArch64TargetLowering::LowerINTRINSIC_VOID(SDValue Op,
@@ -5847,24 +5842,24 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
 SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                                                      SelectionDAG &DAG) const {
   unsigned IntNo = Op.getConstantOperandVal(0);
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   switch (IntNo) {
   default: return SDValue();    // Don't custom lower most intrinsics.
   case Intrinsic::thread_pointer: {
     EVT PtrVT = getPointerTy(DAG.getDataLayout());
-    return DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT);
+    return DAG.getNode(AArch64ISD::THREAD_POINTER, dl, PtrVT);
   }
   case Intrinsic::aarch64_neon_abs: {
     EVT Ty = Op.getValueType();
     if (Ty == MVT::i64) {
       SDValue Result =
-          DAG.getNode(ISD::BITCAST, DL, MVT::v1i64, Op.getOperand(1));
-      Result = DAG.getNode(ISD::ABS, DL, MVT::v1i64, Result);
-      return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Result);
+          DAG.getNode(ISD::BITCAST, dl, MVT::v1i64, Op.getOperand(1));
+      Result = DAG.getNode(ISD::ABS, dl, MVT::v1i64, Result);
+      return DAG.getNode(ISD::BITCAST, dl, MVT::i64, Result);
     } else if (Ty.isVector() && Ty.isInteger() && isTypeLegal(Ty)) {
-      return DAG.getNode(ISD::ABS, DL, Ty, Op.getOperand(1));
+      return DAG.getNode(ISD::ABS, dl, Ty, Op.getOperand(1));
     } else {
-      report_fatal_error("Unexpected type for AArch64 NEON intrinsic");
+      report_fatal_error("Unexpected type for AArch64 NEON intrinic");
     }
   }
   case Intrinsic::aarch64_neon_pmull64: {
@@ -5885,139 +5880,139 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     // registers as opposed to a GPR load followed by a fmov.
     auto TryVectorizeOperand = [](SDValue N, std::optional<uint64_t> NLane,
                                   std::optional<uint64_t> OtherLane,
-                                  const SDLoc &DL,
+                                  const SDLoc &dl,
                                   SelectionDAG &DAG) -> SDValue {
       // If the operand is an higher half itself, rewrite it to
       // extract_high_v2i64; this way aarch64_neon_pmull64 could
       // re-use the dag-combiner function with aarch64_neon_{pmull,smull,umull}.
-      if (NLane == 1)
-        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v1i64,
-                           N.getOperand(0), DAG.getConstant(1, DL, MVT::i64));
+      if (NLane && *NLane == 1)
+        return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i64,
+                           N.getOperand(0), DAG.getConstant(1, dl, MVT::i64));
 
       // Operand N is not a higher half but the other operand is.
-      if (OtherLane == 1) {
+      if (OtherLane && *OtherLane == 1) {
         // If this operand is a lower half, rewrite it to
         // extract_high_v2i64(duplane(<2 x Ty>, 0)). This saves a roundtrip to
         // align lanes of two operands. A roundtrip sequence (to move from lane
         // 1 to lane 0) is like this:
         //   mov x8, v0.d[1]
         //   fmov d0, x8
-        if (NLane == 0)
-          return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v1i64,
-                             DAG.getNode(AArch64ISD::DUPLANE64, DL, MVT::v2i64,
+        if (NLane && *NLane == 0)
+          return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i64,
+                             DAG.getNode(AArch64ISD::DUPLANE64, dl, MVT::v2i64,
                                          N.getOperand(0),
-                                         DAG.getConstant(0, DL, MVT::i64)),
-                             DAG.getConstant(1, DL, MVT::i64));
+                                         DAG.getConstant(0, dl, MVT::i64)),
+                             DAG.getConstant(1, dl, MVT::i64));
 
         // Otherwise just dup from main to all lanes.
-        return DAG.getNode(AArch64ISD::DUP, DL, MVT::v1i64, N);
+        return DAG.getNode(AArch64ISD::DUP, dl, MVT::v1i64, N);
       }
 
       // Neither operand is an extract of higher half, so codegen may just use
       // the non-high version of PMULL instruction. Use v1i64 to represent i64.
       assert(N.getValueType() == MVT::i64 &&
              "Intrinsic aarch64_neon_pmull64 requires i64 parameters");
-      return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i64, N);
+      return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, N);
     };
 
-    LHS = TryVectorizeOperand(LHS, LHSLane, RHSLane, DL, DAG);
-    RHS = TryVectorizeOperand(RHS, RHSLane, LHSLane, DL, DAG);
+    LHS = TryVectorizeOperand(LHS, LHSLane, RHSLane, dl, DAG);
+    RHS = TryVectorizeOperand(RHS, RHSLane, LHSLane, dl, DAG);
 
-    return DAG.getNode(AArch64ISD::PMULL, DL, Op.getValueType(), LHS, RHS);
+    return DAG.getNode(AArch64ISD::PMULL, dl, Op.getValueType(), LHS, RHS);
   }
   case Intrinsic::aarch64_neon_smax:
-    return DAG.getNode(ISD::SMAX, DL, Op.getValueType(), Op.getOperand(1),
+    return DAG.getNode(ISD::SMAX, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   case Intrinsic::aarch64_neon_umax:
-    return DAG.getNode(ISD::UMAX, DL, Op.getValueType(), Op.getOperand(1),
+    return DAG.getNode(ISD::UMAX, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   case Intrinsic::aarch64_neon_smin:
-    return DAG.getNode(ISD::SMIN, DL, Op.getValueType(), Op.getOperand(1),
+    return DAG.getNode(ISD::SMIN, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   case Intrinsic::aarch64_neon_umin:
-    return DAG.getNode(ISD::UMIN, DL, Op.getValueType(), Op.getOperand(1),
+    return DAG.getNode(ISD::UMIN, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   case Intrinsic::aarch64_neon_scalar_sqxtn:
   case Intrinsic::aarch64_neon_scalar_sqxtun:
   case Intrinsic::aarch64_neon_scalar_uqxtn: {
     assert(Op.getValueType() == MVT::i32 || Op.getValueType() == MVT::f32);
     if (Op.getValueType() == MVT::i32)
-      return DAG.getNode(ISD::BITCAST, DL, MVT::i32,
-                         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::f32,
+      return DAG.getNode(ISD::BITCAST, dl, MVT::i32,
+                         DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::f32,
                                      Op.getOperand(0),
-                                     DAG.getNode(ISD::BITCAST, DL, MVT::f64,
+                                     DAG.getNode(ISD::BITCAST, dl, MVT::f64,
                                                  Op.getOperand(1))));
     return SDValue();
   }
   case Intrinsic::aarch64_neon_sqxtn:
-    return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
+    return DAG.getNode(ISD::TRUNCATE_SSAT_S, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_neon_sqxtun:
-    return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
+    return DAG.getNode(ISD::TRUNCATE_SSAT_U, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_neon_uqxtn:
-    return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
+    return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_neon_sqshrn:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
-                         DAG.getNode(AArch64ISD::VASHR, DL,
+      return DAG.getNode(ISD::TRUNCATE_SSAT_S, dl, Op.getValueType(),
+                         DAG.getNode(AArch64ISD::VASHR, dl,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
     return SDValue();
   case Intrinsic::aarch64_neon_sqshrun:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
-                         DAG.getNode(AArch64ISD::VASHR, DL,
+      return DAG.getNode(ISD::TRUNCATE_SSAT_U, dl, Op.getValueType(),
+                         DAG.getNode(AArch64ISD::VASHR, dl,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
     return SDValue();
   case Intrinsic::aarch64_neon_uqshrn:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
-                         DAG.getNode(AArch64ISD::VLSHR, DL,
+      return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(),
+                         DAG.getNode(AArch64ISD::VLSHR, dl,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
     return SDValue();
   case Intrinsic::aarch64_neon_sqrshrn:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::TRUNCATE_SSAT_S, DL, Op.getValueType(),
-                         DAG.getNode(AArch64ISD::SRSHR_I, DL,
+      return DAG.getNode(ISD::TRUNCATE_SSAT_S, dl, Op.getValueType(),
+                         DAG.getNode(AArch64ISD::SRSHR_I, dl,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
     return SDValue();
   case Intrinsic::aarch64_neon_sqrshrun:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::TRUNCATE_SSAT_U, DL, Op.getValueType(),
-                         DAG.getNode(AArch64ISD::SRSHR_I, DL,
+      return DAG.getNode(ISD::TRUNCATE_SSAT_U, dl, Op.getValueType(),
+                         DAG.getNode(AArch64ISD::SRSHR_I, dl,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
     return SDValue();
   case Intrinsic::aarch64_neon_uqrshrn:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::TRUNCATE_USAT_U, DL, Op.getValueType(),
-                         DAG.getNode(AArch64ISD::URSHR_I, DL,
+      return DAG.getNode(ISD::TRUNCATE_USAT_U, dl, Op.getValueType(),
+                         DAG.getNode(AArch64ISD::URSHR_I, dl,
                                      Op.getOperand(1).getValueType(),
                                      Op.getOperand(1), Op.getOperand(2)));
     return SDValue();
   case Intrinsic::aarch64_neon_sqadd:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::SADDSAT, DL, Op.getValueType(), Op.getOperand(1),
+      return DAG.getNode(ISD::SADDSAT, dl, Op.getValueType(), Op.getOperand(1),
                          Op.getOperand(2));
     return SDValue();
   case Intrinsic::aarch64_neon_sqsub:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::SSUBSAT, DL, Op.getValueType(), Op.getOperand(1),
+      return DAG.getNode(ISD::SSUBSAT, dl, Op.getValueType(), Op.getOperand(1),
                          Op.getOperand(2));
     return SDValue();
   case Intrinsic::aarch64_neon_uqadd:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::UADDSAT, DL, Op.getValueType(), Op.getOperand(1),
+      return DAG.getNode(ISD::UADDSAT, dl, Op.getValueType(), Op.getOperand(1),
                          Op.getOperand(2));
     return SDValue();
   case Intrinsic::aarch64_neon_uqsub:
     if (Op.getValueType().isVector())
-      return DAG.getNode(ISD::USUBSAT, DL, Op.getValueType(), Op.getOperand(1),
+      return DAG.getNode(ISD::USUBSAT, dl, Op.getValueType(), Op.getOperand(1),
                          Op.getOperand(2));
     return SDValue();
   case Intrinsic::aarch64_sve_whilelt:
@@ -6030,224 +6025,224 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     return optimizeIncrementingWhile(Op.getNode(), DAG, /*IsSigned=*/true,
                                      /*IsEqual=*/true);
   case Intrinsic::aarch64_sve_sunpkhi:
-    return DAG.getNode(AArch64ISD::SUNPKHI, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::SUNPKHI, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_sunpklo:
-    return DAG.getNode(AArch64ISD::SUNPKLO, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::SUNPKLO, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_uunpkhi:
-    return DAG.getNode(AArch64ISD::UUNPKHI, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::UUNPKHI, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_uunpklo:
-    return DAG.getNode(AArch64ISD::UUNPKLO, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_clasta_n:
-    return DAG.getNode(AArch64ISD::CLASTA_N, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::CLASTA_N, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
   case Intrinsic::aarch64_sve_clastb_n:
-    return DAG.getNode(AArch64ISD::CLASTB_N, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::CLASTB_N, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
   case Intrinsic::aarch64_sve_lasta:
-    return DAG.getNode(AArch64ISD::LASTA, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::LASTA, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_lastb:
-    return DAG.getNode(AArch64ISD::LASTB, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::LASTB, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_rev:
-    return DAG.getNode(ISD::VECTOR_REVERSE, DL, Op.getValueType(),
+    return DAG.getNode(ISD::VECTOR_REVERSE, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_tbl:
-    return DAG.getNode(AArch64ISD::TBL, DL, Op.getValueType(), Op.getOperand(1),
+    return DAG.getNode(AArch64ISD::TBL, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   case Intrinsic::aarch64_sve_trn1:
-    return DAG.getNode(AArch64ISD::TRN1, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::TRN1, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_trn2:
-    return DAG.getNode(AArch64ISD::TRN2, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::TRN2, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_uzp1:
-    return DAG.getNode(AArch64ISD::UZP1, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::UZP1, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_uzp2:
-    return DAG.getNode(AArch64ISD::UZP2, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::UZP2, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_zip1:
-    return DAG.getNode(AArch64ISD::ZIP1, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::ZIP1, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_zip2:
-    return DAG.getNode(AArch64ISD::ZIP2, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::ZIP2, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_splice:
-    return DAG.getNode(AArch64ISD::SPLICE, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::SPLICE, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
   case Intrinsic::aarch64_sve_ptrue:
-    return getPTrue(DAG, DL, Op.getValueType(), Op.getConstantOperandVal(1));
+    return getPTrue(DAG, dl, Op.getValueType(), Op.getConstantOperandVal(1));
   case Intrinsic::aarch64_sve_clz:
-    return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::CTLZ_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sme_cntsb:
-    return DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
-                       DAG.getConstant(1, DL, MVT::i32));
+    return DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
+                       DAG.getConstant(1, dl, MVT::i32));
   case Intrinsic::aarch64_sme_cntsh: {
-    SDValue One = DAG.getConstant(1, DL, MVT::i32);
-    SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(), One);
-    return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes, One);
+    SDValue One = DAG.getConstant(1, dl, MVT::i32);
+    SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(), One);
+    return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes, One);
   }
   case Intrinsic::aarch64_sme_cntsw: {
-    SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
-                                DAG.getConstant(1, DL, MVT::i32));
-    return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes,
-                       DAG.getConstant(2, DL, MVT::i32));
+    SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
+                                DAG.getConstant(1, dl, MVT::i32));
+    return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
+                       DAG.getConstant(2, dl, MVT::i32));
   }
   case Intrinsic::aarch64_sme_cntsd: {
-    SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, DL, Op.getValueType(),
-                                DAG.getConstant(1, DL, MVT::i32));
-    return DAG.getNode(ISD::SRL, DL, Op.getValueType(), Bytes,
-                       DAG.getConstant(3, DL, MVT::i32));
+    SDValue Bytes = DAG.getNode(AArch64ISD::RDSVL, dl, Op.getValueType(),
+                                DAG.getConstant(1, dl, MVT::i32));
+    return DAG.getNode(ISD::SRL, dl, Op.getValueType(), Bytes,
+                       DAG.getConstant(3, dl, MVT::i32));
   }
   case Intrinsic::aarch64_sve_cnt: {
     SDValue Data = Op.getOperand(3);
     // CTPOP only supports integer operands.
     if (Data.getValueType().isFloatingPoint())
-      Data = DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Data);
-    return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, DL, Op.getValueType(),
+      Data = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Data);
+    return DAG.getNode(AArch64ISD::CTPOP_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Data, Op.getOperand(1));
   }
   case Intrinsic::aarch64_sve_dupq_lane:
     return LowerDUPQLane(Op, DAG);
   case Intrinsic::aarch64_sve_convert_from_svbool:
     if (Op.getValueType() == MVT::aarch64svcount)
-      return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Op.getOperand(1));
+      return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Op.getOperand(1));
     return getSVEPredicateBitCast(Op.getValueType(), Op.getOperand(1), DAG);
   case Intrinsic::aarch64_sve_convert_to_svbool:
     if (Op.getOperand(1).getValueType() == MVT::aarch64svcount)
-      return DAG.getNode(ISD::BITCAST, DL, MVT::nxv16i1, Op.getOperand(1));
+      return DAG.getNode(ISD::BITCAST, dl, MVT::nxv16i1, Op.getOperand(1));
     return getSVEPredicateBitCast(MVT::nxv16i1, Op.getOperand(1), DAG);
   case Intrinsic::aarch64_sve_fneg:
-    return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frintp:
-    return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frintm:
-    return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FFLOOR_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frinti:
-    return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, DL,
+    return DAG.getNode(AArch64ISD::FNEARBYINT_MERGE_PASSTHRU, dl,
                        Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_frintx:
-    return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FRINT_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frinta:
-    return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FROUND_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frintn:
-    return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, DL,
+    return DAG.getNode(AArch64ISD::FROUNDEVEN_MERGE_PASSTHRU, dl,
                        Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_frintz:
-    return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FTRUNC_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_ucvtf:
-    return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, DL,
+    return DAG.getNode(AArch64ISD::UINT_TO_FP_MERGE_PASSTHRU, dl,
                        Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_scvtf:
-    return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, DL,
+    return DAG.getNode(AArch64ISD::SINT_TO_FP_MERGE_PASSTHRU, dl,
                        Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_fcvtzu:
-    return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FCVTZU_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_fcvtzs:
-    return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FCVTZS_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_fsqrt:
-    return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FSQRT_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frecpx:
-    return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FRECPX_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_frecpe_x:
-    return DAG.getNode(AArch64ISD::FRECPE, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FRECPE, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_frecps_x:
-    return DAG.getNode(AArch64ISD::FRECPS, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FRECPS, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_frsqrte_x:
-    return DAG.getNode(AArch64ISD::FRSQRTE, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FRSQRTE, dl, Op.getValueType(),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_frsqrts_x:
-    return DAG.getNode(AArch64ISD::FRSQRTS, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FRSQRTS, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2));
   case Intrinsic::aarch64_sve_fabs:
-    return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::FABS_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_abs:
-    return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::ABS_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_neg:
-    return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::NEG_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_insr: {
     SDValue Scalar = Op.getOperand(2);
     EVT ScalarTy = Scalar.getValueType();
     if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
-      Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Scalar);
+      Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
 
-    return DAG.getNode(AArch64ISD::INSR, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
                        Op.getOperand(1), Scalar);
   }
   case Intrinsic::aarch64_sve_rbit:
-    return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, DL,
+    return DAG.getNode(AArch64ISD::BITREVERSE_MERGE_PASSTHRU, dl,
                        Op.getValueType(), Op.getOperand(2), Op.getOperand(3),
                        Op.getOperand(1));
   case Intrinsic::aarch64_sve_revb:
-    return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::BSWAP_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_revh:
-    return DAG.getNode(AArch64ISD::REVH_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::REVH_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_revw:
-    return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::REVW_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_revd:
-    return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::REVD_MERGE_PASSTHRU, dl, Op.getValueType(),
                        Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
   case Intrinsic::aarch64_sve_sxtb:
     return DAG.getNode(
-        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, DL, Op.getValueType(),
+        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
         Op.getOperand(2), Op.getOperand(3),
         DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
         Op.getOperand(1));
   case Intrinsic::aarch64_sve_sxth:
     return DAG.getNode(
-        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, DL, Op.getValueType(),
+        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
         Op.getOperand(2), Op.getOperand(3),
         DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
         Op.getOperand(1));
   case Intrinsic::aarch64_sve_sxtw:
     return DAG.getNode(
-        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, DL, Op.getValueType(),
+        AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
         Op.getOperand(2), Op.getOperand(3),
         DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
         Op.getOperand(1));
   case Intrinsic::aarch64_sve_uxtb:
     return DAG.getNode(
-        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, DL, Op.getValueType(),
+        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
         Op.getOperand(2), Op.getOperand(3),
         DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i8)),
         Op.getOperand(1));
   case Intrinsic::aarch64_sve_uxth:
     return DAG.getNode(
-        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, DL, Op.getValueType(),
+        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
         Op.getOperand(2), Op.getOperand(3),
         DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i16)),
         Op.getOperand(1));
   case Intrinsic::aarch64_sve_uxtw:
     return DAG.getNode(
-        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, DL, Op.getValueType(),
+        AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU, dl, Op.getValueType(),
         Op.getOperand(2), Op.getOperand(3),
         DAG.getValueType(Op.getValueType().changeVectorElementType(MVT::i32)),
         Op.getOperand(1));
@@ -6255,7 +6250,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     const auto &MF = DAG.getMachineFunction();
     const auto *RegInfo = Subtarget->getRegisterInfo();
     unsigned Reg = RegInfo->getLocalAddressRegister(MF);
-    return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg,
+    return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg,
                               Op.getSimpleValueType());
   }
 
@@ -6287,7 +6282,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri ||
                         IntNo == Intrinsic::aarch64_sve_sri;
     unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
-    return DAG.getNode(Opcode, DL, Ty, Op.getOperand(1), Op.getOperand(2),
+    return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
                        Op.getOperand(3));
   }
 
@@ -6302,7 +6297,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     unsigned Opcode = IsSignedAdd
                           ? (IsRoundingAdd ? ISD::AVGCEILS : ISD::AVGFLOORS)
                           : (IsRoundingAdd ? ISD::AVGCEILU : ISD::AVGFLOORU);
-    return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(1),
+    return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2));
   }
   case Intrinsic::aarch64_neon_saddlp:
@@ -6310,7 +6305,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     unsigned Opcode = IntNo == Intrinsic::aarch64_neon_uaddlp
                           ? AArch64ISD::UADDLP
                           : AArch64ISD::SADDLP;
-    return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(1));
+    return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1));
   }
   case Intrinsic::aarch64_neon_sdot:
   case Intrinsic::aarch64_neon_udot:
@@ -6320,12 +6315,12 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
                        IntNo == Intrinsic::aarch64_sve_udot)
                           ? AArch64ISD::UDOT
                           : AArch64ISD::SDOT;
-    return DAG.getNode(Opcode, DL, Op.getValueType(), Op.getOperand(1),
+    return DAG.getNode(Opcode, dl, Op.getValueType(), Op.getOperand(1),
                        Op.getOperand(2), Op.getOperand(3));
   }
   case Intrinsic::aarch64_neon_usdot:
   case Intrinsic::aarch64_sve_usdot: {
-    return DAG.getNode(AArch64ISD::USDOT, DL, Op.getValueType(),
+    return DAG.getNode(AArch64ISD::USDOT, dl, Op.getValueType(),
                        Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
   }
   case Intrinsic::aarch64_neon_saddlv:
@@ -6342,10 +6337,10 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     SDValue ADDLV = DAG.getNode(
         IntNo == Intrinsic::aarch64_neon_uaddlv ? AArch64ISD::UADDLV
                                                 : AArch64ISD::SADDLV,
-        DL, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64, Op.getOperand(1));
+        dl, ResVT == MVT::i32 ? MVT::v4i32 : MVT::v2i64, Op.getOperand(1));
     SDValue EXTRACT_VEC_ELT = DAG.getNode(
-        ISD::EXTRACT_VECTOR_ELT, DL, ResVT == MVT::i32 ? MVT::i32 : MVT::i64,
-        ADDLV, DAG.getConstant(0, DL, MVT::i64));
+        ISD::EXTRACT_VECTOR_ELT, dl, ResVT == MVT::i32 ? MVT::i32 : MVT::i64,
+        ADDLV, DAG.getConstant(0, dl, MVT::i64));
     return EXTRACT_VEC_ELT;
   }
   case Intrinsic::experimental_cttz_elts: {
@@ -6357,13 +6352,13 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
       // We can use SVE instructions to lower this intrinsic by first creating
       // an SVE predicate register mask from the fixed-width vector.
       EVT NewVT = getTypeToTransformTo(*DAG.getContext(), VT);
-      SDValue Mask = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, CttzOp);
+      SDValue Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, NewVT, CttzOp);
       CttzOp = convertFixedMaskToScalableVector(Mask, DAG);
     }
 
     SDValue NewCttzElts =
-        DAG.getNode(AArch64ISD::CTTZ_ELTS, DL, MVT::i64, CttzOp);
-    return DAG.getZExtOrTrunc(NewCttzElts, DL, Op.getValueType());
+        DAG.getNode(AArch64ISD::CTTZ_ELTS, dl, MVT::i64, CttzOp);
+    return DAG.getZExtOrTrunc(NewCttzElts, dl, Op.getValueType());
   }
   case Intrinsic::experimental_vector_match: {
     return LowerVectorMatch(Op, DAG);
@@ -6710,7 +6705,7 @@ static SDValue LowerTruncateVectorStore(SDLoc DL, StoreSDNode *ST,
 }
 
 static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   SDValue Src = Op.getOperand(0);
   MVT DestVT = Op.getSimpleValueType();
   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -6726,15 +6721,15 @@ static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) {
   (void)TLI;
 
   if (SrcAS == ARM64AS::PTR32_SPTR) {
-    return DAG.getNode(ISD::SIGN_EXTEND, DL, DestVT, Src,
-                       DAG.getTargetConstant(0, DL, DestVT));
+    return DAG.getNode(ISD::SIGN_EXTEND, dl, DestVT, Src,
+                       DAG.getTargetConstant(0, dl, DestVT));
   } else if (SrcAS == ARM64AS::PTR32_UPTR) {
-    return DAG.getNode(ISD::ZERO_EXTEND, DL, DestVT, Src,
-                       DAG.getTargetConstant(0, DL, DestVT));
+    return DAG.getNode(ISD::ZERO_EXTEND, dl, DestVT, Src,
+                       DAG.getTargetConstant(0, dl, DestVT));
   } else if ((DestAS == ARM64AS::PTR32_SPTR) ||
              (DestAS == ARM64AS::PTR32_UPTR)) {
-    SDValue Ext = DAG.getAnyExtOrTrunc(Src, DL, DestVT);
-    SDValue Trunc = DAG.getZeroExtendInReg(Ext, DL, DestVT);
+    SDValue Ext = DAG.getAnyExtOrTrunc(Src, dl, DestVT);
+    SDValue Trunc = DAG.getZeroExtendInReg(Ext, dl, DestVT);
     return Trunc;
   } else {
     return Src;
@@ -7034,9 +7029,9 @@ static SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
 
   AArch64CC::CondCode CC;
   if (SDValue Cmp = emitConjunction(DAG, Cond, CC)) {
-    SDLoc DL(Op);
-    SDValue CCVal = DAG.getConstant(CC, DL, MVT::i32);
-    return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CCVal,
+    SDLoc dl(Op);
+    SDValue CCVal = DAG.getConstant(CC, dl, MVT::i32);
+    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
                        Cmp);
   }
 
@@ -7127,80 +7122,59 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
 
 SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
                                                       SelectionDAG &DAG) const {
+  // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+    report_fatal_error(
+        "ADJUST_TRAMPOLINE operation is only supported on Linux.");
+
   return Op.getOperand(0);
 }
 
 SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
                                                     SelectionDAG &DAG) const {
+
+  // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
+  if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
+    report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
+
   SDValue Chain = Op.getOperand(0);
-  SDValue Trmp = Op.getOperand(1); // trampoline, >=32 bytes
+  SDValue Trmp = Op.getOperand(1); // trampoline
   SDValue FPtr = Op.getOperand(2); // nested function
   SDValue Nest = Op.getOperand(3); // 'nest' parameter value
+  SDLoc dl(Op);
 
-  const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
-
-  // ldr NestReg, .+16
-  // ldr x17, .+20
-  // br x17
-  // .word 0
-  // .nest: .qword nest
-  // .fptr: .qword fptr
-  SDValue OutChains[5];
-
-  const Function *Func =
-      cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
-  CallingConv::ID CC = Func->getCallingConv();
-  unsigned NestReg;
-
-  switch (CC) {
-  default:
-    NestReg = 0x0f; // X15
-    break;
-  case CallingConv::ARM64EC_Thunk_X64:
-    // Must be kept in sync with AArch64CallingConv.td
-    NestReg = 0x04; // X4
-    break;
-  }
-
-  const char FptrReg = 0x11; // X17
-
-  SDValue Addr = Trmp;
-
-  SDLoc DL(Op);
-  OutChains[0] = DAG.getStore(
-      Chain, DL, DAG.getConstant(0x58000080u | NestReg, DL, MVT::i32), Addr,
-      MachinePointerInfo(TrmpAddr));
-
-  Addr = DAG.getNode(ISD::ADD, DL, MVT::i64, Trmp,
-                     DAG.getConstant(4, DL, MVT::i64));
-  OutChains[1] = DAG.getStore(
-      Chain, DL, DAG.getConstant(0x580000b0u | FptrReg, DL, MVT::i32), Addr,
-      MachinePointerInfo(TrmpAddr, 4));
+  EVT PtrVT = getPointerTy(DAG.getDataLayout());
+  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
 
-  Addr = DAG.getNode(ISD::ADD, DL, MVT::i64, Trmp,
-                     DAG.getConstant(8, DL, MVT::i64));
-  OutChains[2] =
-      DAG.getStore(Chain, DL, DAG.getConstant(0xd61f0220u, DL, MVT::i32), Addr,
-                   MachinePointerInfo(TrmpAddr, 8));
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
 
-  Addr = DAG.getNode(ISD::ADD, DL, MVT::i64, Trmp,
-                     DAG.getConstant(16, DL, MVT::i64));
-  OutChains[3] =
-      DAG.getStore(Chain, DL, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
+  Entry.Ty = IntPtrTy;
+  Entry.Node = Trmp;
+  Args.push_back(Entry);
 
-  Addr = DAG.getNode(ISD::ADD, DL, MVT::i64, Trmp,
-                     DAG.getConstant(24, DL, MVT::i64));
-  OutChains[4] =
-      DAG.getStore(Chain, DL, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
+  if (auto *FI = dyn_cast<FrameIndexSDNode>(Trmp.getNode())) {
+    MachineFunction &MF = DAG.getMachineFunction();
+    MachineFrameInfo &MFI = MF.getFrameInfo();
+    Entry.Node =
+        DAG.getConstant(MFI.getObjectSize(FI->getIndex()), dl, MVT::i64);
+  } else
+    Entry.Node = DAG.getConstant(36, dl, MVT::i64);
 
-  SDValue StoreToken = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
+  Args.push_back(Entry);
+  Entry.Node = FPtr;
+  Args.push_back(Entry);
+  Entry.Node = Nest;
+  Args.push_back(Entry);
 
-  SDValue EndOfTrmp = DAG.getNode(ISD::ADD, DL, MVT::i64, Trmp,
-                                  DAG.getConstant(12, DL, MVT::i64));
+  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
+      CallingConv::C, Type::getVoidTy(*DAG.getContext()),
+      DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
 
-  // Call clear cache on the trampoline instructions.
-  return DAG.getNode(ISD::CLEAR_CACHE, DL, MVT::Other, StoreToken, Trmp,
-                     EndOfTrmp);
+  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
+  return CallResult.second;
 }
 
 SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
@@ -7555,7 +7529,6 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
     return LowerVECTOR_HISTOGRAM(Op, DAG);
   case ISD::PARTIAL_REDUCE_SMLA:
   case ISD::PARTIAL_REDUCE_UMLA:
-  case ISD::PARTIAL_REDUCE_SUMLA:
     return LowerPARTIAL_REDUCE_MLA(Op, DAG);
   }
 }
@@ -7649,7 +7622,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
                                                      bool IsVarArg) const {
   switch (CC) {
   default:
-    reportFatalUsageError("unsupported calling convention");
+    report_fatal_error("Unsupported calling convention.");
   case CallingConv::GHC:
     return CC_AArch64_GHC;
   case CallingConv::PreserveNone:
@@ -7758,12 +7731,6 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
   unsigned NumArgs = Ins.size();
   Function::const_arg_iterator CurOrigArg = F.arg_begin();
   unsigned CurArgIdx = 0;
-  bool UseVarArgCC = false;
-  if (IsWin64)
-    UseVarArgCC = isVarArg;
-
-  CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
-
   for (unsigned i = 0; i != NumArgs; ++i) {
     MVT ValVT = Ins[i].VT;
     if (Ins[i].isOrigArg()) {
@@ -7780,6 +7747,10 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
       else if (ActualMVT == MVT::i16)
         ValVT = MVT::i16;
     }
+    bool UseVarArgCC = false;
+    if (IsWin64)
+      UseVarArgCC = isVarArg;
+    CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC);
     bool Res =
         AssignFn(i, ValVT, ValVT, CCValAssign::Full, Ins[i].Flags, CCInfo);
     assert(!Res && "Call operand has unhandled type");
@@ -8448,8 +8419,6 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
         ArgVT = MVT::i16;
     }
 
-    // FIXME: CCAssignFnForCall should be called once, for the call and not per
-    // argument. This logic should exactly mirror LowerFormalArguments.
     CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CalleeCC, UseVarArgCC);
     bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo);
     assert(!Res && "Call operand has unhandled type");
@@ -8674,9 +8643,9 @@ static bool checkZExtBool(SDValue Arg, const SelectionDAG &DAG) {
   if (SizeInBits < 8)
     return false;
 
-  APInt RequiredZero(SizeInBits, 0xFE);
+  APInt RequredZero(SizeInBits, 0xFE);
   KnownBits Bits = DAG.computeKnownBits(Arg, 4);
-  bool ZExtBool = (Bits.Zero & RequiredZero) == RequiredZero;
+  bool ZExtBool = (Bits.Zero & RequredZero) == RequredZero;
   return ZExtBool;
 }
 
@@ -8797,45 +8766,6 @@ getSMToggleCondition(const SMECallAttrs &CallAttrs) {
   llvm_unreachable("Unsupported attributes");
 }
 
-/// Check whether a stack argument requires lowering in a tail call.
-static bool shouldLowerTailCallStackArg(const MachineFunction &MF,
-                                        const CCValAssign &VA, SDValue Arg,
-                                        ISD::ArgFlagsTy Flags, int CallOffset) {
-  // FIXME: We should be able to handle this case, but it's not clear how to.
-  if (Flags.isZExt() || Flags.isSExt())
-    return true;
-
-  for (;;) {
-    // Look through nodes that don't alter the bits of the incoming value.
-    unsigned Op = Arg.getOpcode();
-    if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
-        Arg->isAssert() || Op == AArch64ISD::ASSERT_ZEXT_BOOL) {
-      Arg = Arg.getOperand(0);
-      continue;
-    }
-    break;
-  }
-
-  // If the argument is a load from the same immutable stack slot, we can reuse
-  // it.
-  if (auto *LoadNode = dyn_cast<LoadSDNode>(Arg)) {
-    if (auto *FINode = dyn_cast<FrameIndexSDNode>(LoadNode->getBasePtr())) {
-      const MachineFrameInfo &MFI = MF.getFrameInfo();
-      int FI = FINode->getIndex();
-      if (!MFI.isImmutableObjectIndex(FI))
-        return true;
-      if (CallOffset != MFI.getObjectOffset(FI))
-        return true;
-      uint64_t SizeInBits = LoadNode->getMemoryVT().getFixedSizeInBits();
-      if (SizeInBits / 8 != MFI.getObjectSize(FI))
-        return true;
-      return false;
-    }
-  }
-
-  return true;
-}
-
 /// LowerCall - Lower a call to a callseq_start + CALL + callseq_end chain,
 /// and add input and output parameter nodes.
 SDValue
@@ -9258,13 +9188,10 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
       }
       unsigned LocMemOffset = VA.getLocMemOffset();
       int32_t Offset = LocMemOffset + BEAlign;
+      SDValue PtrOff = DAG.getIntPtrConstant(Offset, DL);
+      PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
 
       if (IsTailCall) {
-        // When the frame pointer is perfectly aligned for the tail call and the
-        // same stack argument is passed down intact, we can reuse it.
-        if (!FPDiff && !shouldLowerTailCallStackArg(MF, VA, Arg, Flags, Offset))
-          continue;
-
         Offset = Offset + FPDiff;
         int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
 
@@ -10431,7 +10358,7 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(2);
   SDValue RHS = Op.getOperand(3);
   SDValue Dest = Op.getOperand(4);
-  SDLoc DL(Op);
+  SDLoc dl(Op);
 
   MachineFunction &MF = DAG.getMachineFunction();
   // Speculation tracking/SLH assumes that optimized TB(N)Z/CB(N)Z instructions
@@ -10444,12 +10371,12 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
   // value of a libcall against zero, which is just what the rest of LowerBR_CC
   // is expecting to deal with.
   if (LHS.getValueType() == MVT::f128) {
-    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, DL, LHS, RHS);
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
 
     // If softenSetCCOperands returned a scalar, we need to compare the result
     // against zero to select between true and false values.
     if (!RHS.getNode()) {
-      RHS = DAG.getConstant(0, DL, LHS.getValueType());
+      RHS = DAG.getConstant(0, dl, LHS.getValueType());
       CC = ISD::SETNE;
     }
   }
@@ -10469,9 +10396,9 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 
     if (CC == ISD::SETNE)
       OFCC = getInvertedCondCode(OFCC);
-    SDValue CCVal = DAG.getConstant(OFCC, DL, MVT::i32);
+    SDValue CCVal = DAG.getConstant(OFCC, dl, MVT::i32);
 
-    return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CCVal,
+    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
                        Overflow);
   }
 
@@ -10493,12 +10420,12 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
             isPowerOf2_64(LHS.getConstantOperandVal(1))) {
           SDValue Test = LHS.getOperand(0);
           uint64_t Mask = LHS.getConstantOperandVal(1);
-          return DAG.getNode(AArch64ISD::TBZ, DL, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), DL, MVT::i64),
+          return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, Test,
+                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
                              Dest);
         }
 
-        return DAG.getNode(AArch64ISD::CBZ, DL, MVT::Other, Chain, LHS, Dest);
+        return DAG.getNode(AArch64ISD::CBZ, dl, MVT::Other, Chain, LHS, Dest);
       } else if (CC == ISD::SETNE) {
         // See if we can use a TBZ to fold in an AND as well.
         // TBZ has a smaller branch displacement than CBZ.  If the offset is
@@ -10509,20 +10436,20 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
             isPowerOf2_64(LHS.getConstantOperandVal(1))) {
           SDValue Test = LHS.getOperand(0);
           uint64_t Mask = LHS.getConstantOperandVal(1);
-          return DAG.getNode(AArch64ISD::TBNZ, DL, MVT::Other, Chain, Test,
-                             DAG.getConstant(Log2_64(Mask), DL, MVT::i64),
+          return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, Test,
+                             DAG.getConstant(Log2_64(Mask), dl, MVT::i64),
                              Dest);
         }
 
-        return DAG.getNode(AArch64ISD::CBNZ, DL, MVT::Other, Chain, LHS, Dest);
+        return DAG.getNode(AArch64ISD::CBNZ, dl, MVT::Other, Chain, LHS, Dest);
       } else if (CC == ISD::SETLT && LHS.getOpcode() != ISD::AND) {
         // Don't combine AND since emitComparison converts the AND to an ANDS
         // (a.k.a. TST) and the test in the test bit and branch instruction
         // becomes redundant.  This would also increase register pressure.
         uint64_t SignBitPos;
         std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
-        return DAG.getNode(AArch64ISD::TBNZ, DL, MVT::Other, Chain, LHS,
-                           DAG.getConstant(SignBitPos, DL, MVT::i64), Dest);
+        return DAG.getNode(AArch64ISD::TBNZ, dl, MVT::Other, Chain, LHS,
+                           DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
       }
     }
     if (RHSC && RHSC->getSExtValue() == -1 && CC == ISD::SETGT &&
@@ -10532,8 +10459,8 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
       // becomes redundant.  This would also increase register pressure.
       uint64_t SignBitPos;
       std::tie(LHS, SignBitPos) = lookThroughSignExtension(LHS);
-      return DAG.getNode(AArch64ISD::TBZ, DL, MVT::Other, Chain, LHS,
-                         DAG.getConstant(SignBitPos, DL, MVT::i64), Dest);
+      return DAG.getNode(AArch64ISD::TBZ, dl, MVT::Other, Chain, LHS,
+                         DAG.getConstant(SignBitPos, dl, MVT::i64), Dest);
     }
 
     // Try to emit Armv9.6 CB instructions. We prefer tb{n}z/cb{n}z due to their
@@ -10542,14 +10469,14 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
         AArch64CC::isValidCBCond(changeIntCCToAArch64CC(CC)) &&
         ProduceNonFlagSettingCondBr) {
       SDValue Cond =
-          DAG.getTargetConstant(changeIntCCToAArch64CC(CC), DL, MVT::i32);
-      return DAG.getNode(AArch64ISD::CB, DL, MVT::Other, Chain, Cond, LHS, RHS,
+          DAG.getTargetConstant(changeIntCCToAArch64CC(CC), dl, MVT::i32);
+      return DAG.getNode(AArch64ISD::CB, dl, MVT::Other, Chain, Cond, LHS, RHS,
                          Dest);
     }
 
     SDValue CCVal;
-    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, DL);
-    return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CCVal,
+    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
+    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal,
                        Cmp);
   }
 
@@ -10558,15 +10485,15 @@ SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
 
   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two branches to implement.
-  SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
   AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
-  SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
+  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
   SDValue BR1 =
-      DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, Chain, Dest, CC1Val, Cmp);
+      DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, Chain, Dest, CC1Val, Cmp);
   if (CC2 != AArch64CC::AL) {
-    SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32);
-    return DAG.getNode(AArch64ISD::BRCOND, DL, MVT::Other, BR1, Dest, CC2Val,
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
+    return DAG.getNode(AArch64ISD::BRCOND, dl, MVT::Other, BR1, Dest, CC2Val,
                        Cmp);
   }
 
@@ -10693,9 +10620,12 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
 
   EVT VT = Op.getValueType();
   if (VT.isScalableVector() ||
-      useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
+      useSVEForFixedLengthVectorVT(VT, !Subtarget->isNeonAvailable()))
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::CTPOP_MERGE_PASSTHRU);
 
+  if (!Subtarget->isNeonAvailable())
+    return SDValue();
+
   bool IsParity = Op.getOpcode() == ISD::PARITY;
   SDValue Val = Op.getOperand(0);
   SDLoc DL(Op);
@@ -10705,36 +10635,6 @@ SDValue AArch64TargetLowering::LowerCTPOP_PARITY(SDValue Op,
   if (VT == MVT::i32 && IsParity)
     return SDValue();
 
-  if (Subtarget->isSVEorStreamingSVEAvailable()) {
-    if (VT == MVT::i32 || VT == MVT::i64) {
-      EVT ContainerVT = VT == MVT::i32 ? MVT::nxv4i32 : MVT::nxv2i64;
-      Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ContainerVT,
-                        DAG.getUNDEF(ContainerVT), Val,
-                        DAG.getVectorIdxConstant(0, DL));
-      Val = DAG.getNode(ISD::CTPOP, DL, ContainerVT, Val);
-      Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Val,
-                        DAG.getVectorIdxConstant(0, DL));
-      if (IsParity)
-        Val = DAG.getNode(ISD::AND, DL, VT, Val, DAG.getConstant(1, DL, VT));
-      return Val;
-    }
-
-    if (VT == MVT::i128) {
-      Val = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Val);
-      Val = convertToScalableVector(DAG, MVT::nxv2i64, Val);
-      Val = DAG.getNode(ISD::CTPOP, DL, MVT::nxv2i64, Val);
-      Val = convertFromScalableVector(DAG, MVT::v2i64, Val);
-      Val = DAG.getNode(ISD::VECREDUCE_ADD, DL, MVT::i64, Val);
-      Val = DAG.getZExtOrTrunc(Val, DL, VT);
-      if (IsParity)
-        Val = DAG.getNode(ISD::AND, DL, VT, Val, DAG.getConstant(1, DL, VT));
-      return Val;
-    }
-  }
-
-  if (!Subtarget->isNeonAvailable())
-    return SDValue();
-
   // If there is no CNT instruction available, GPR popcount can
   // be more efficiently lowered to the following sequence that uses
   // AdvSIMD registers/instructions as long as the copies to/from
@@ -11000,40 +10900,40 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   SDValue LHS = Op.getOperand(OpNo + 0);
   SDValue RHS = Op.getOperand(OpNo + 1);
   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(OpNo + 2))->get();
-  SDLoc DL(Op);
+  SDLoc dl(Op);
 
   // We chose ZeroOrOneBooleanContents, so use zero and one.
   EVT VT = Op.getValueType();
-  SDValue TVal = DAG.getConstant(1, DL, VT);
-  SDValue FVal = DAG.getConstant(0, DL, VT);
+  SDValue TVal = DAG.getConstant(1, dl, VT);
+  SDValue FVal = DAG.getConstant(0, dl, VT);
 
   // Handle f128 first, since one possible outcome is a normal integer
   // comparison which gets picked up by the next if statement.
   if (LHS.getValueType() == MVT::f128) {
-    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, DL, LHS, RHS, Chain,
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS, Chain,
                         IsSignaling);
 
     // If softenSetCCOperands returned a scalar, use it.
     if (!RHS.getNode()) {
       assert(LHS.getValueType() == Op.getValueType() &&
              "Unexpected setcc expansion!");
-      return IsStrict ? DAG.getMergeValues({LHS, Chain}, DL) : LHS;
+      return IsStrict ? DAG.getMergeValues({LHS, Chain}, dl) : LHS;
     }
   }
 
   if (LHS.getValueType().isInteger()) {
 
-    simplifySetCCIntoEq(CC, LHS, RHS, DAG, DL);
+    simplifySetCCIntoEq(CC, LHS, RHS, DAG, dl);
 
     SDValue CCVal;
     SDValue Cmp = getAArch64Cmp(
-        LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, DL);
+        LHS, RHS, ISD::getSetCCInverse(CC, LHS.getValueType()), CCVal, DAG, dl);
 
     // Note that we inverted the condition above, so we reverse the order of
     // the true and false operands here.  This will allow the setcc to be
     // matched to a single CSINC instruction.
-    SDValue Res = DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, CCVal, Cmp);
-    return IsStrict ? DAG.getMergeValues({Res, Chain}, DL) : Res;
+    SDValue Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CCVal, Cmp);
+    return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res;
   }
 
   // Now we know we're dealing with FP values.
@@ -11044,9 +10944,9 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   // and do the comparison.
   SDValue Cmp;
   if (IsStrict)
-    Cmp = emitStrictFPComparison(LHS, RHS, DL, DAG, Chain, IsSignaling);
+    Cmp = emitStrictFPComparison(LHS, RHS, dl, DAG, Chain, IsSignaling);
   else
-    Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+    Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
 
   AArch64CC::CondCode CC1, CC2;
   changeFPCCToAArch64CC(CC, CC1, CC2);
@@ -11054,12 +10954,12 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   if (CC2 == AArch64CC::AL) {
     changeFPCCToAArch64CC(ISD::getSetCCInverse(CC, LHS.getValueType()), CC1,
                           CC2);
-    SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
+    SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
 
     // Note that we inverted the condition above, so we reverse the order of
     // the true and false operands here.  This will allow the setcc to be
     // matched to a single CSINC instruction.
-    Res = DAG.getNode(AArch64ISD::CSEL, DL, VT, FVal, TVal, CC1Val, Cmp);
+    Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, FVal, TVal, CC1Val, Cmp);
   } else {
     // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't
     // totally clean.  Some of them require two CSELs to implement.  As is in
@@ -11067,14 +10967,14 @@ SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
     // of the first as the RHS.  We're effectively OR'ing the two CC's together.
 
     // FIXME: It would be nice if we could match the two CSELs to two CSINCs.
-    SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
+    SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
     SDValue CS1 =
-        DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, FVal, CC1Val, Cmp);
+        DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
 
-    SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32);
-    Res = DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, CS1, CC2Val, Cmp);
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
+    Res = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
   }
-  return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, DL) : Res;
+  return IsStrict ? DAG.getMergeValues({Res, Cmp.getValue(1)}, dl) : Res;
 }
 
 SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op,
@@ -11107,134 +11007,19 @@ SDValue AArch64TargetLowering::LowerSETCCCARRY(SDValue Op,
                      Cmp.getValue(1));
 }
 
-/// Emit vector comparison for floating-point values, producing a mask.
-static SDValue emitVectorComparison(SDValue LHS, SDValue RHS,
-                                    AArch64CC::CondCode CC, bool NoNans, EVT VT,
-                                    const SDLoc &DL, SelectionDAG &DAG) {
-  assert(VT.getSizeInBits() == LHS.getValueType().getSizeInBits() &&
-         "function only supposed to emit natural comparisons");
-
-  switch (CC) {
-  default:
-    return SDValue();
-  case AArch64CC::NE: {
-    SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, DL, VT, LHS, RHS);
-    // Use vector semantics for the inversion to potentially save a copy between
-    // SIMD and regular registers.
-    if (!LHS.getValueType().isVector()) {
-      EVT VecVT =
-          EVT::getVectorVT(*DAG.getContext(), VT, 128 / VT.getSizeInBits());
-      SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
-      SDValue MaskVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT,
-                                    DAG.getUNDEF(VecVT), Fcmeq, Zero);
-      SDValue InvertedMask = DAG.getNOT(DL, MaskVec, VecVT);
-      return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, InvertedMask, Zero);
-    }
-    return DAG.getNOT(DL, Fcmeq, VT);
-  }
-  case AArch64CC::EQ:
-    return DAG.getNode(AArch64ISD::FCMEQ, DL, VT, LHS, RHS);
-  case AArch64CC::GE:
-    return DAG.getNode(AArch64ISD::FCMGE, DL, VT, LHS, RHS);
-  case AArch64CC::GT:
-    return DAG.getNode(AArch64ISD::FCMGT, DL, VT, LHS, RHS);
-  case AArch64CC::LE:
-    if (!NoNans)
-      return SDValue();
-    // If we ignore NaNs then we can use to the LS implementation.
-    [[fallthrough]];
-  case AArch64CC::LS:
-    return DAG.getNode(AArch64ISD::FCMGE, DL, VT, RHS, LHS);
-  case AArch64CC::LT:
-    if (!NoNans)
-      return SDValue();
-    // If we ignore NaNs then we can use to the MI implementation.
-    [[fallthrough]];
-  case AArch64CC::MI:
-    return DAG.getNode(AArch64ISD::FCMGT, DL, VT, RHS, LHS);
-  }
-}
-
-/// For SELECT_CC, when the true/false values are (-1, 0) and the compared
-/// values are scalars, try to emit a mask generating vector instruction.
-static SDValue emitFloatCompareMask(SDValue LHS, SDValue RHS, SDValue TVal,
-                                    SDValue FVal, ISD::CondCode CC, bool NoNaNs,
-                                    const SDLoc &DL, SelectionDAG &DAG) {
-  assert(!LHS.getValueType().isVector());
-  assert(!RHS.getValueType().isVector());
-
-  auto *CTVal = dyn_cast<ConstantSDNode>(TVal);
-  auto *CFVal = dyn_cast<ConstantSDNode>(FVal);
-  if (!CTVal || !CFVal)
-    return {};
-  if (!(CTVal->isAllOnes() && CFVal->isZero()) &&
-      !(CTVal->isZero() && CFVal->isAllOnes()))
-    return {};
-
-  if (CTVal->isZero())
-    CC = ISD::getSetCCInverse(CC, LHS.getValueType());
-
-  EVT VT = TVal.getValueType();
-  if (VT.getSizeInBits() != LHS.getValueType().getSizeInBits())
-    return {};
-
-  if (!NoNaNs && (CC == ISD::SETUO || CC == ISD::SETO)) {
-    bool OneNaN = false;
-    if (LHS == RHS) {
-      OneNaN = true;
-    } else if (DAG.isKnownNeverNaN(RHS)) {
-      OneNaN = true;
-      RHS = LHS;
-    } else if (DAG.isKnownNeverNaN(LHS)) {
-      OneNaN = true;
-      LHS = RHS;
-    }
-    if (OneNaN)
-      CC = (CC == ISD::SETUO) ? ISD::SETUNE : ISD::SETOEQ;
-  }
-
-  AArch64CC::CondCode CC1;
-  AArch64CC::CondCode CC2;
-  bool ShouldInvert = false;
-  changeVectorFPCCToAArch64CC(CC, CC1, CC2, ShouldInvert);
-  SDValue Cmp = emitVectorComparison(LHS, RHS, CC1, NoNaNs, VT, DL, DAG);
-  SDValue Cmp2;
-  if (CC2 != AArch64CC::AL) {
-    Cmp2 = emitVectorComparison(LHS, RHS, CC2, NoNaNs, VT, DL, DAG);
-    if (!Cmp2)
-      return {};
-  }
-  if (!Cmp2 && !ShouldInvert)
-    return Cmp;
-
-  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), VT, 128 / VT.getSizeInBits());
-  SDValue Zero = DAG.getConstant(0, DL, MVT::i64);
-  Cmp = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, DAG.getUNDEF(VecVT), Cmp,
-                    Zero);
-  if (Cmp2) {
-    Cmp2 = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VecVT, DAG.getUNDEF(VecVT),
-                       Cmp2, Zero);
-    Cmp = DAG.getNode(ISD::OR, DL, VecVT, Cmp, Cmp2);
-  }
-  if (ShouldInvert)
-    Cmp = DAG.getNOT(DL, Cmp, VecVT);
-  Cmp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Cmp, Zero);
-  return Cmp;
-}
-
-SDValue AArch64TargetLowering::LowerSELECT_CC(
-    ISD::CondCode CC, SDValue LHS, SDValue RHS, SDValue TVal, SDValue FVal,
-    iterator_range<SDNode::user_iterator> Users, bool HasNoNaNs,
-    const SDLoc &DL, SelectionDAG &DAG) const {
+SDValue AArch64TargetLowering::LowerSELECT_CC(ISD::CondCode CC, SDValue LHS,
+                                              SDValue RHS, SDValue TVal,
+                                              SDValue FVal, const SDLoc &dl,
+                                              SelectionDAG &DAG) const {
   // Handle f128 first, because it will result in a comparison of some RTLIB
   // call result against zero.
   if (LHS.getValueType() == MVT::f128) {
-    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, DL, LHS, RHS);
+    softenSetCCOperands(DAG, MVT::f128, LHS, RHS, CC, dl, LHS, RHS);
 
     // If softenSetCCOperands returned a scalar, we need to compare the result
     // against zero to select between true and false values.
     if (!RHS.getNode()) {
-      RHS = DAG.getConstant(0, DL, LHS.getValueType());
+      RHS = DAG.getConstant(0, dl, LHS.getValueType());
       CC = ISD::SETNE;
     }
   }
@@ -11242,8 +11027,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
   // Also handle f16, for which we need to do a f32 comparison.
   if ((LHS.getValueType() == MVT::f16 && !Subtarget->hasFullFP16()) ||
       LHS.getValueType() == MVT::bf16) {
-    LHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, LHS);
-    RHS = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, RHS);
+    LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, LHS);
+    RHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f32, RHS);
   }
 
   // Next, handle integers.
@@ -11262,9 +11047,9 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
         LHS.getValueType() == TVal.getValueType()) {
       EVT VT = LHS.getValueType();
       SDValue Shift =
-          DAG.getNode(ISD::SRA, DL, VT, LHS,
-                      DAG.getConstant(VT.getSizeInBits() - 1, DL, VT));
-      return DAG.getNode(ISD::OR, DL, VT, Shift, DAG.getConstant(1, DL, VT));
+          DAG.getNode(ISD::SRA, dl, VT, LHS,
+                      DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
+      return DAG.getNode(ISD::OR, dl, VT, Shift, DAG.getConstant(1, dl, VT));
     }
 
     // Check for SMAX(lhs, 0) and SMIN(lhs, 0) patterns.
@@ -11276,13 +11061,13 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
         LHS.getValueType() == RHS.getValueType()) {
       EVT VT = LHS.getValueType();
       SDValue Shift =
-          DAG.getNode(ISD::SRA, DL, VT, LHS,
-                      DAG.getConstant(VT.getSizeInBits() - 1, DL, VT));
+          DAG.getNode(ISD::SRA, dl, VT, LHS,
+                      DAG.getConstant(VT.getSizeInBits() - 1, dl, VT));
 
       if (CC == ISD::SETGT)
-        Shift = DAG.getNOT(DL, Shift, VT);
+        Shift = DAG.getNOT(dl, Shift, VT);
 
-      return DAG.getNode(ISD::AND, DL, VT, LHS, Shift);
+      return DAG.getNode(ISD::AND, dl, VT, LHS, Shift);
     }
 
     unsigned Opcode = AArch64ISD::CSEL;
@@ -11393,14 +11178,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
       if (CTVal == RHSVal && AArch64CC == AArch64CC::EQ) {
         Opcode = AArch64ISD::CSINV;
         TVal = LHS;
-        FVal = DAG.getConstant(0, DL, FVal.getValueType());
+        FVal = DAG.getConstant(0, dl, FVal.getValueType());
       }
     }
 
     SDValue CCVal;
-    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, DL);
+    SDValue Cmp = getAArch64Cmp(LHS, RHS, CC, CCVal, DAG, dl);
     EVT VT = TVal.getValueType();
-    return DAG.getNode(Opcode, DL, VT, TVal, FVal, CCVal, Cmp);
+    return DAG.getNode(Opcode, dl, VT, TVal, FVal, CCVal, Cmp);
   }
 
   // Now we know we're dealing with FP values.
@@ -11408,28 +11193,7 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
          LHS.getValueType() == MVT::f64);
   assert(LHS.getValueType() == RHS.getValueType());
   EVT VT = TVal.getValueType();
-
-  // If the purpose of the comparison is to select between all ones
-  // or all zeros, try to use a vector comparison because the operands are
-  // already stored in SIMD registers.
-  if (Subtarget->isNeonAvailable() && all_of(Users, [](const SDNode *U) {
-        switch (U->getOpcode()) {
-        default:
-          return false;
-        case ISD::INSERT_VECTOR_ELT:
-        case ISD::SCALAR_TO_VECTOR:
-        case AArch64ISD::DUP:
-          return true;
-        }
-      })) {
-    bool NoNaNs = getTargetMachine().Options.NoNaNsFPMath || HasNoNaNs;
-    SDValue VectorCmp =
-        emitFloatCompareMask(LHS, RHS, TVal, FVal, CC, NoNaNs, DL, DAG);
-    if (VectorCmp)
-      return VectorCmp;
-  }
-
-  SDValue Cmp = emitComparison(LHS, RHS, CC, DL, DAG);
+  SDValue Cmp = emitComparison(LHS, RHS, CC, dl, DAG);
 
   // Unfortunately, the mapping of LLVM FP CC's onto AArch64 CC's isn't totally
   // clean.  Some of them require two CSELs to implement.
@@ -11455,14 +11219,14 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(
   }
 
   // Emit first, and possibly only, CSEL.
-  SDValue CC1Val = DAG.getConstant(CC1, DL, MVT::i32);
-  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, FVal, CC1Val, Cmp);
+  SDValue CC1Val = DAG.getConstant(CC1, dl, MVT::i32);
+  SDValue CS1 = DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, FVal, CC1Val, Cmp);
 
   // If we need a second CSEL, emit it, using the output of the first as the
   // RHS.  We're effectively OR'ing the two CC's together.
   if (CC2 != AArch64CC::AL) {
-    SDValue CC2Val = DAG.getConstant(CC2, DL, MVT::i32);
-    return DAG.getNode(AArch64ISD::CSEL, DL, VT, TVal, CS1, CC2Val, Cmp);
+    SDValue CC2Val = DAG.getConstant(CC2, dl, MVT::i32);
+    return DAG.getNode(AArch64ISD::CSEL, dl, VT, TVal, CS1, CC2Val, Cmp);
   }
 
   // Otherwise, return the output of the first CSEL.
@@ -11515,10 +11279,8 @@ SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op,
   SDValue RHS = Op.getOperand(1);
   SDValue TVal = Op.getOperand(2);
   SDValue FVal = Op.getOperand(3);
-  bool HasNoNans = Op->getFlags().hasNoNaNs();
   SDLoc DL(Op);
-  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), HasNoNans, DL,
-                        DAG);
+  return LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
 }
 
 SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
@@ -11526,7 +11288,6 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
   SDValue CCVal = Op->getOperand(0);
   SDValue TVal = Op->getOperand(1);
   SDValue FVal = Op->getOperand(2);
-  bool HasNoNans = Op->getFlags().hasNoNaNs();
   SDLoc DL(Op);
 
   EVT Ty = Op.getValueType();
@@ -11593,8 +11354,7 @@ SDValue AArch64TargetLowering::LowerSELECT(SDValue Op,
                                      DAG.getUNDEF(MVT::f32), FVal);
   }
 
-  SDValue Res =
-      LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, Op->users(), HasNoNans, DL, DAG);
+  SDValue Res = LowerSELECT_CC(CC, LHS, RHS, TVal, FVal, DL, DAG);
 
   if ((Ty == MVT::f16 || Ty == MVT::bf16) && !Subtarget->hasFullFP16()) {
     return DAG.getTargetExtractSubreg(AArch64::hsub, DL, Ty, Res);
@@ -12011,9 +11771,12 @@ getRegisterByName(const char* RegName, LLT VT, const MachineFunction &MF) const
     unsigned DwarfRegNum = MRI->getDwarfRegNum(Reg, false);
     if (!Subtarget->isXRegisterReserved(DwarfRegNum) &&
         !MRI->isReservedReg(MF, Reg))
-      Reg = Register();
+      Reg = 0;
   }
-  return Reg;
+  if (Reg)
+    return Reg;
+  report_fatal_error(
+      Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
 }
 
 SDValue AArch64TargetLowering::LowerADDROFRETURNADDR(SDValue Op,
@@ -12298,14 +12061,13 @@ enum class PredicateConstraint { Uph, Upl, Upa };
 // not what we want. The code here pre-empts this by matching the register
 // explicitly.
 static std::optional<std::pair<unsigned, const TargetRegisterClass *>>
-parseSVERegAsConstraint(StringRef Constraint) {
+parsePredicateRegAsConstraint(StringRef Constraint) {
   if (!Constraint.starts_with('{') || !Constraint.ends_with('}') ||
-      (Constraint[1] != 'p' && Constraint[1] != 'z'))
+      Constraint[1] != 'p')
     return std::nullopt;
 
-  bool IsPredicate = Constraint[1] == 'p';
   Constraint = Constraint.substr(2, Constraint.size() - 3);
-  bool IsPredicateAsCount = IsPredicate && Constraint.starts_with("n");
+  bool IsPredicateAsCount = Constraint.starts_with("n");
   if (IsPredicateAsCount)
     Constraint = Constraint.drop_front(1);
 
@@ -12315,9 +12077,8 @@ parseSVERegAsConstraint(StringRef Constraint) {
 
   if (IsPredicateAsCount)
     return std::make_pair(AArch64::PN0 + V, &AArch64::PNRRegClass);
-  if (IsPredicate)
+  else
     return std::make_pair(AArch64::P0 + V, &AArch64::PPRRegClass);
-  return std::make_pair(AArch64::Z0 + V, &AArch64::ZPRRegClass);
 }
 
 static std::optional<PredicateConstraint>
@@ -12567,16 +12328,8 @@ AArch64TargetLowering::getRegForInlineAsmConstraint(
       break;
     }
   } else {
-    if (const auto P = parseSVERegAsConstraint(Constraint)) {
-      // SME functions that are not in streaming mode, should
-      // still observe clobbers of Z-registers by clobbering
-      // the lower 128bits of those registers.
-      if (AArch64::ZPRRegClass.hasSubClassEq(P->second) &&
-          !Subtarget->isSVEorStreamingSVEAvailable())
-        return std::make_pair(TRI->getSubReg(P->first, AArch64::zsub),
-                              &AArch64::FPR128RegClass);
+    if (const auto P = parsePredicateRegAsConstraint(Constraint))
       return *P;
-    }
     if (const auto PC = parsePredicateConstraint(Constraint))
       if (const auto *RegClass = getPredicateRegisterClass(*PC, VT))
         return std::make_pair(0U, RegClass);
@@ -12813,7 +12566,7 @@ static unsigned getExtFactor(SDValue &V) {
 // within range. This can be reconstructed using AND and NEON's TBL1.
 SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG) {
   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   EVT VT = Op.getValueType();
   assert(!VT.isScalableVector() &&
          "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
@@ -12886,17 +12639,17 @@ SDValue ReconstructShuffleWithRuntimeMask(SDValue Op, SelectionDAG &DAG) {
   // insert, we know that the index in the mask must be smaller than the number
   // of elements in the source, or we would have an out-of-bounds access.
   if (NumElts == 8)
-    SourceVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, SourceVec,
+    SourceVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, SourceVec,
                             DAG.getUNDEF(VT));
 
   // Preconditions met, so we can use a vector (AND +) TBL to build this vector.
   if (!AndMaskConstants.empty())
-    MaskSourceVec = DAG.getNode(ISD::AND, DL, VT, MaskSourceVec,
-                                DAG.getBuildVector(VT, DL, AndMaskConstants));
+    MaskSourceVec = DAG.getNode(ISD::AND, dl, VT, MaskSourceVec,
+                                DAG.getBuildVector(VT, dl, AndMaskConstants));
 
   return DAG.getNode(
-      ISD::INTRINSIC_WO_CHAIN, DL, VT,
-      DAG.getConstant(Intrinsic::aarch64_neon_tbl1, DL, MVT::i32), SourceVec,
+      ISD::INTRINSIC_WO_CHAIN, dl, VT,
+      DAG.getConstant(Intrinsic::aarch64_neon_tbl1, dl, MVT::i32), SourceVec,
       MaskSourceVec);
 }
 
@@ -12906,7 +12659,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
                                                   SelectionDAG &DAG) const {
   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
   LLVM_DEBUG(dbgs() << "AArch64TargetLowering::ReconstructShuffle\n");
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   EVT VT = Op.getValueType();
   assert(!VT.isScalableVector() &&
          "Scalable vectors cannot be used with ISD::BUILD_VECTOR");
@@ -12999,7 +12752,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     TBLOperands.push_back(DAG.getConstant(Sources.size() == 3
                                               ? Intrinsic::aarch64_neon_tbl3
                                               : Intrinsic::aarch64_neon_tbl4,
-                                          DL, MVT::i32));
+                                          dl, MVT::i32));
     for (unsigned i = 0; i < Sources.size(); i++) {
       SDValue Src = Sources[i].Vec;
       EVT SrcVT = Src.getValueType();
@@ -13007,21 +12760,21 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
       assert((SrcVT.is64BitVector() || SrcVT.is128BitVector()) &&
              "Expected a legally typed vector");
       if (SrcVT.is64BitVector())
-        Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Src,
+        Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, Src,
                           DAG.getUNDEF(MVT::v8i8));
       TBLOperands.push_back(Src);
     }
 
     SmallVector<SDValue, 16> TBLMask;
     for (unsigned i = 0; i < Mask.size(); i++)
-      TBLMask.push_back(DAG.getConstant(Mask[i], DL, MVT::i32));
+      TBLMask.push_back(DAG.getConstant(Mask[i], dl, MVT::i32));
     assert((Mask.size() == 8 || Mask.size() == 16) &&
            "Expected a v8i8 or v16i8 Mask");
     TBLOperands.push_back(DAG.getBuildVector(
-        Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, DL, TBLMask));
+        Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, dl, TBLMask));
 
     SDValue Shuffle =
-        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL,
+        DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl,
                     Mask.size() == 8 ? MVT::v8i8 : MVT::v16i8, TBLOperands);
     return DAG.getBitcast(VT, Shuffle);
   }
@@ -13069,7 +12822,7 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
       // We can pad out the smaller vector for free, so if it's part of a
       // shuffle...
       Src.ShuffleVec =
-          DAG.getNode(ISD::CONCAT_VECTORS, DL, DestVT, Src.ShuffleVec,
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, Src.ShuffleVec,
                       DAG.getUNDEF(Src.ShuffleVec.getValueType()));
       continue;
     }
@@ -13089,22 +12842,22 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     if (Src.MinElt >= NumSrcElts) {
       // The extraction can just take the second half
       Src.ShuffleVec =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, DestVT, Src.ShuffleVec,
-                      DAG.getConstant(NumSrcElts, DL, MVT::i64));
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(NumSrcElts, dl, MVT::i64));
       Src.WindowBase = -NumSrcElts;
     } else if (Src.MaxElt < NumSrcElts) {
       // The extraction can just take the first half
       Src.ShuffleVec =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, DestVT, Src.ShuffleVec,
-                      DAG.getConstant(0, DL, MVT::i64));
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(0, dl, MVT::i64));
     } else {
       // An actual VEXT is needed
       SDValue VEXTSrc1 =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, DestVT, Src.ShuffleVec,
-                      DAG.getConstant(0, DL, MVT::i64));
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(0, dl, MVT::i64));
       SDValue VEXTSrc2 =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, DestVT, Src.ShuffleVec,
-                      DAG.getConstant(NumSrcElts, DL, MVT::i64));
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, Src.ShuffleVec,
+                      DAG.getConstant(NumSrcElts, dl, MVT::i64));
       unsigned Imm = Src.MinElt * getExtFactor(VEXTSrc1);
 
       if (!SrcVT.is64BitVector()) {
@@ -13115,8 +12868,8 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
       }
 
       Src.ShuffleVec =
-          DAG.getNode(AArch64ISD::EXT, DL, DestVT, VEXTSrc1, VEXTSrc2,
-                      DAG.getConstant(Imm, DL, MVT::i32));
+          DAG.getNode(AArch64ISD::EXT, dl, DestVT, VEXTSrc1, VEXTSrc2,
+                      DAG.getConstant(Imm, dl, MVT::i32));
       Src.WindowBase = -Src.MinElt;
     }
   }
@@ -13131,9 +12884,9 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     assert(ShuffleVT.getVectorElementType() == SmallestEltTy);
     if (DAG.getDataLayout().isBigEndian()) {
       Src.ShuffleVec =
-          DAG.getNode(AArch64ISD::NVCAST, DL, ShuffleVT, Src.ShuffleVec);
+          DAG.getNode(AArch64ISD::NVCAST, dl, ShuffleVT, Src.ShuffleVec);
     } else {
-      Src.ShuffleVec = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Src.ShuffleVec);
+      Src.ShuffleVec = DAG.getNode(ISD::BITCAST, dl, ShuffleVT, Src.ShuffleVec);
     }
     Src.WindowScale =
         SrcEltTy.getFixedSizeInBits() / SmallestEltTy.getFixedSizeInBits();
@@ -13186,12 +12939,12 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op,
     ShuffleOps[i] = Sources[i].ShuffleVec;
 
   SDValue Shuffle =
-      DAG.getVectorShuffle(ShuffleVT, DL, ShuffleOps[0], ShuffleOps[1], Mask);
+      DAG.getVectorShuffle(ShuffleVT, dl, ShuffleOps[0], ShuffleOps[1], Mask);
   SDValue V;
   if (DAG.getDataLayout().isBigEndian()) {
-    V = DAG.getNode(AArch64ISD::NVCAST, DL, VT, Shuffle);
+    V = DAG.getNode(AArch64ISD::NVCAST, dl, VT, Shuffle);
   } else {
-    V = DAG.getNode(ISD::BITCAST, DL, VT, Shuffle);
+    V = DAG.getNode(ISD::BITCAST, dl, VT, Shuffle);
   }
 
   LLVM_DEBUG(dbgs() << "Reshuffle, creating node: "; Shuffle.dump();
@@ -13547,7 +13300,7 @@ static SDValue tryFormConcatFromShuffle(SDValue Op, SelectionDAG &DAG) {
 static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
                                       unsigned PFEntry, SDValue LHS,
                                       SDValue RHS, SelectionDAG &DAG,
-                                      const SDLoc &DL) {
+                                      const SDLoc &dl) {
   unsigned OpNum = (PFEntry >> 26) & 0x0F;
   unsigned LHSID = (PFEntry >> 13) & ((1 << 13) - 1);
   unsigned RHSID = (PFEntry >> 0) & ((1 << 13) - 1);
@@ -13594,7 +13347,7 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
     // get the lane to move from the PFID, which is always from the
     // original vectors (V1 or V2).
     SDValue OpLHS = GeneratePerfectShuffle(
-        LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, DL);
+        LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
     EVT VT = OpLHS.getValueType();
     assert(RHSID < 8 && "Expected a lane index for RHSID!");
     unsigned ExtLane = 0;
@@ -13614,7 +13367,7 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
         OpLHS = DAG.getBitcast(MVT::v2f32, OpLHS);
       } else {
         assert(VT.getScalarSizeInBits() == 32 &&
-               "Expected 16 or 32 bit shuffle elements");
+               "Expected 16 or 32 bit shuffle elemements");
         Input = DAG.getBitcast(MVT::v2f64, Input);
         OpLHS = DAG.getBitcast(MVT::v2f64, OpLHS);
       }
@@ -13629,20 +13382,20 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
         OpLHS = DAG.getBitcast(MVT::v4f16, OpLHS);
       }
     }
-    SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+    SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
                               Input.getValueType().getVectorElementType(),
-                              Input, DAG.getVectorIdxConstant(ExtLane, DL));
+                              Input, DAG.getVectorIdxConstant(ExtLane, dl));
     SDValue Ins =
-        DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Input.getValueType(), OpLHS,
-                    Ext, DAG.getVectorIdxConstant(RHSID & 0x3, DL));
+        DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Input.getValueType(), OpLHS,
+                    Ext, DAG.getVectorIdxConstant(RHSID & 0x3, dl));
     return DAG.getBitcast(VT, Ins);
   }
 
   SDValue OpLHS, OpRHS;
   OpLHS = GeneratePerfectShuffle(LHSID, V1, V2, PerfectShuffleTable[LHSID], LHS,
-                                 RHS, DAG, DL);
+                                 RHS, DAG, dl);
   OpRHS = GeneratePerfectShuffle(RHSID, V1, V2, PerfectShuffleTable[RHSID], LHS,
-                                 RHS, DAG, DL);
+                                 RHS, DAG, dl);
   EVT VT = OpLHS.getValueType();
 
   switch (OpNum) {
@@ -13652,15 +13405,15 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
     // VREV divides the vector in half and swaps within the half.
     if (VT.getVectorElementType() == MVT::i32 ||
         VT.getVectorElementType() == MVT::f32)
-      return DAG.getNode(AArch64ISD::REV64, DL, VT, OpLHS);
+      return DAG.getNode(AArch64ISD::REV64, dl, VT, OpLHS);
     // vrev <4 x i16> -> REV32
     if (VT.getVectorElementType() == MVT::i16 ||
         VT.getVectorElementType() == MVT::f16 ||
         VT.getVectorElementType() == MVT::bf16)
-      return DAG.getNode(AArch64ISD::REV32, DL, VT, OpLHS);
+      return DAG.getNode(AArch64ISD::REV32, dl, VT, OpLHS);
     // vrev <4 x i8> -> REV16
     assert(VT.getVectorElementType() == MVT::i8);
-    return DAG.getNode(AArch64ISD::REV16, DL, VT, OpLHS);
+    return DAG.getNode(AArch64ISD::REV16, dl, VT, OpLHS);
   case OP_VDUP0:
   case OP_VDUP1:
   case OP_VDUP2:
@@ -13680,28 +13433,28 @@ static SDValue GeneratePerfectShuffle(unsigned ID, SDValue V1, SDValue V2,
 
     if (VT.getSizeInBits() == 64)
       OpLHS = WidenVector(OpLHS, DAG);
-    SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, DL, MVT::i64);
-    return DAG.getNode(Opcode, DL, VT, OpLHS, Lane);
+    SDValue Lane = DAG.getConstant(OpNum - OP_VDUP0, dl, MVT::i64);
+    return DAG.getNode(Opcode, dl, VT, OpLHS, Lane);
   }
   case OP_VEXT1:
   case OP_VEXT2:
   case OP_VEXT3: {
     unsigned Imm = (OpNum - OP_VEXT1 + 1) * getExtFactor(OpLHS);
-    return DAG.getNode(AArch64ISD::EXT, DL, VT, OpLHS, OpRHS,
-                       DAG.getConstant(Imm, DL, MVT::i32));
+    return DAG.getNode(AArch64ISD::EXT, dl, VT, OpLHS, OpRHS,
+                       DAG.getConstant(Imm, dl, MVT::i32));
   }
   case OP_VUZPL:
-    return DAG.getNode(AArch64ISD::UZP1, DL, VT, OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::UZP1, dl, VT, OpLHS, OpRHS);
   case OP_VUZPR:
-    return DAG.getNode(AArch64ISD::UZP2, DL, VT, OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::UZP2, dl, VT, OpLHS, OpRHS);
   case OP_VZIPL:
-    return DAG.getNode(AArch64ISD::ZIP1, DL, VT, OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::ZIP1, dl, VT, OpLHS, OpRHS);
   case OP_VZIPR:
-    return DAG.getNode(AArch64ISD::ZIP2, DL, VT, OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::ZIP2, dl, VT, OpLHS, OpRHS);
   case OP_VTRNL:
-    return DAG.getNode(AArch64ISD::TRN1, DL, VT, OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::TRN1, dl, VT, OpLHS, OpRHS);
   case OP_VTRNR:
-    return DAG.getNode(AArch64ISD::TRN2, DL, VT, OpLHS, OpRHS);
+    return DAG.getNode(AArch64ISD::TRN2, dl, VT, OpLHS, OpRHS);
   }
 }
 
@@ -13792,7 +13545,7 @@ static unsigned getDUPLANEOp(EVT EltType) {
   llvm_unreachable("Invalid vector element type?");
 }
 
-static SDValue constructDup(SDValue V, int Lane, SDLoc DL, EVT VT,
+static SDValue constructDup(SDValue V, int Lane, SDLoc dl, EVT VT,
                             unsigned Opcode, SelectionDAG &DAG) {
   // Try to eliminate a bitcasted extract subvector before a DUPLANE.
   auto getScaledOffsetDup = [](SDValue BitCast, int &LaneC, MVT &CastVT) {
@@ -13848,7 +13601,7 @@ static SDValue constructDup(SDValue V, int Lane, SDLoc DL, EVT VT,
     // Widen the operand to 128-bit register with undef.
     V = WidenVector(V, DAG);
   }
-  return DAG.getNode(Opcode, DL, VT, V, DAG.getConstant(Lane, DL, MVT::i64));
+  return DAG.getNode(Opcode, dl, VT, V, DAG.getConstant(Lane, dl, MVT::i64));
 }
 
 // Try to widen element type to get a new mask value for a better permutation
@@ -13899,9 +13652,9 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
                                                SelectionDAG &DAG) {
   SDValue Tbl1 = Op->getOperand(0);
   SDValue Tbl2 = Op->getOperand(1);
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   SDValue Tbl2ID =
-      DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, DL, MVT::i64);
+      DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl2, dl, MVT::i64);
 
   EVT VT = Op.getValueType();
   if (Tbl1.getOpcode() != ISD::INTRINSIC_WO_CHAIN ||
@@ -13927,15 +13680,15 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
       auto *C = dyn_cast<ConstantSDNode>(Mask2.getOperand(ShuffleMask[I] - 16));
       if (!C)
         return SDValue();
-      TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, DL, MVT::i32);
+      TBLMaskParts[I] = DAG.getConstant(C->getSExtValue() + 32, dl, MVT::i32);
     }
   }
 
-  SDValue TBLMask = DAG.getBuildVector(VT, DL, TBLMaskParts);
+  SDValue TBLMask = DAG.getBuildVector(VT, dl, TBLMaskParts);
   SDValue ID =
-      DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl4, DL, MVT::i64);
+      DAG.getTargetConstant(Intrinsic::aarch64_neon_tbl4, dl, MVT::i64);
 
-  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::v16i8,
+  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::v16i8,
                      {ID, Tbl1->getOperand(1), Tbl1->getOperand(2),
                       Tbl2->getOperand(1), Tbl2->getOperand(2), TBLMask});
 }
@@ -13946,7 +13699,7 @@ static SDValue tryToConvertShuffleOfTbl2ToTbl4(SDValue Op,
 SDValue
 AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
                                                      SelectionDAG &DAG) const {
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   EVT VT = Op.getValueType();
   SDValue SrcOp = Op.getOperand(0);
   EVT SrcVT = SrcOp.getValueType();
@@ -13956,14 +13709,14 @@ AArch64TargetLowering::LowerZERO_EXTEND_VECTOR_INREG(SDValue Op,
   // FIXME: support multi-step zipping?
   if (Scale != 2)
     return SDValue();
-  SDValue Zeros = DAG.getConstant(0, DL, SrcVT);
+  SDValue Zeros = DAG.getConstant(0, dl, SrcVT);
   return DAG.getBitcast(VT,
-                        DAG.getNode(AArch64ISD::ZIP1, DL, SrcVT, SrcOp, Zeros));
+                        DAG.getNode(AArch64ISD::ZIP1, dl, SrcVT, SrcOp, Zeros));
 }
 
 SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                                                    SelectionDAG &DAG) const {
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   EVT VT = Op.getValueType();
 
   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
@@ -13994,17 +13747,17 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       Lane = 0;
 
     if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR)
-      return DAG.getNode(AArch64ISD::DUP, DL, V1.getValueType(),
+      return DAG.getNode(AArch64ISD::DUP, dl, V1.getValueType(),
                          V1.getOperand(0));
     // Test if V1 is a BUILD_VECTOR and the lane being referenced is a non-
     // constant. If so, we can just reference the lane's definition directly.
     if (V1.getOpcode() == ISD::BUILD_VECTOR &&
         !isa<ConstantSDNode>(V1.getOperand(Lane)))
-      return DAG.getNode(AArch64ISD::DUP, DL, VT, V1.getOperand(Lane));
+      return DAG.getNode(AArch64ISD::DUP, dl, VT, V1.getOperand(Lane));
 
     // Otherwise, duplicate from the lane of the input vector.
     unsigned Opcode = getDUPLANEOp(V1.getValueType().getVectorElementType());
-    return constructDup(V1, Lane, DL, VT, Opcode, DAG);
+    return constructDup(V1, Lane, dl, VT, Opcode, DAG);
   }
 
   // Check if the mask matches a DUP for a wider element
@@ -14019,8 +13772,8 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       unsigned NewEltCount = VT.getSizeInBits() / LaneSize;
       MVT NewVecTy = MVT::getVectorVT(NewEltTy, NewEltCount);
       V1 = DAG.getBitcast(NewVecTy, V1);
-      // Construct the DUP instruction
-      V1 = constructDup(V1, Lane, DL, NewVecTy, Opcode, DAG);
+      // Constuct the DUP instruction
+      V1 = constructDup(V1, Lane, dl, NewVecTy, Opcode, DAG);
       // Cast back to the original type
       return DAG.getBitcast(VT, V1);
     }
@@ -14029,17 +13782,17 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   unsigned NumElts = VT.getVectorNumElements();
   unsigned EltSize = VT.getScalarSizeInBits();
   if (isREVMask(ShuffleMask, EltSize, NumElts, 64))
-    return DAG.getNode(AArch64ISD::REV64, DL, V1.getValueType(), V1);
+    return DAG.getNode(AArch64ISD::REV64, dl, V1.getValueType(), V1);
   if (isREVMask(ShuffleMask, EltSize, NumElts, 32))
-    return DAG.getNode(AArch64ISD::REV32, DL, V1.getValueType(), V1);
+    return DAG.getNode(AArch64ISD::REV32, dl, V1.getValueType(), V1);
   if (isREVMask(ShuffleMask, EltSize, NumElts, 16))
-    return DAG.getNode(AArch64ISD::REV16, DL, V1.getValueType(), V1);
+    return DAG.getNode(AArch64ISD::REV16, dl, V1.getValueType(), V1);
 
   if (((NumElts == 8 && EltSize == 16) || (NumElts == 16 && EltSize == 8)) &&
       ShuffleVectorInst::isReverseMask(ShuffleMask, ShuffleMask.size())) {
-    SDValue Rev = DAG.getNode(AArch64ISD::REV64, DL, VT, V1);
-    return DAG.getNode(AArch64ISD::EXT, DL, VT, Rev, Rev,
-                       DAG.getConstant(8, DL, MVT::i32));
+    SDValue Rev = DAG.getNode(AArch64ISD::REV64, dl, VT, V1);
+    return DAG.getNode(AArch64ISD::EXT, dl, VT, Rev, Rev,
+                       DAG.getConstant(8, dl, MVT::i32));
   }
 
   bool ReverseEXT = false;
@@ -14048,39 +13801,39 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
     if (ReverseEXT)
       std::swap(V1, V2);
     Imm *= getExtFactor(V1);
-    return DAG.getNode(AArch64ISD::EXT, DL, V1.getValueType(), V1, V2,
-                       DAG.getConstant(Imm, DL, MVT::i32));
+    return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V2,
+                       DAG.getConstant(Imm, dl, MVT::i32));
   } else if (V2->isUndef() && isSingletonEXTMask(ShuffleMask, VT, Imm)) {
     Imm *= getExtFactor(V1);
-    return DAG.getNode(AArch64ISD::EXT, DL, V1.getValueType(), V1, V1,
-                       DAG.getConstant(Imm, DL, MVT::i32));
+    return DAG.getNode(AArch64ISD::EXT, dl, V1.getValueType(), V1, V1,
+                       DAG.getConstant(Imm, dl, MVT::i32));
   }
 
   unsigned WhichResult;
   if (isZIPMask(ShuffleMask, NumElts, WhichResult)) {
     unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
-    return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
+    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
   }
   if (isUZPMask(ShuffleMask, NumElts, WhichResult)) {
     unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
-    return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
+    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
   }
   if (isTRNMask(ShuffleMask, NumElts, WhichResult)) {
     unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
-    return DAG.getNode(Opc, DL, V1.getValueType(), V1, V2);
+    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V2);
   }
 
   if (isZIP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
     unsigned Opc = (WhichResult == 0) ? AArch64ISD::ZIP1 : AArch64ISD::ZIP2;
-    return DAG.getNode(Opc, DL, V1.getValueType(), V1, V1);
+    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
   }
   if (isUZP_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
     unsigned Opc = (WhichResult == 0) ? AArch64ISD::UZP1 : AArch64ISD::UZP2;
-    return DAG.getNode(Opc, DL, V1.getValueType(), V1, V1);
+    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
   }
   if (isTRN_v_undef_Mask(ShuffleMask, VT, WhichResult)) {
     unsigned Opc = (WhichResult == 0) ? AArch64ISD::TRN1 : AArch64ISD::TRN2;
-    return DAG.getNode(Opc, DL, V1.getValueType(), V1, V1);
+    return DAG.getNode(Opc, dl, V1.getValueType(), V1, V1);
   }
 
   if (SDValue Concat = tryFormConcatFromShuffle(Op, DAG))
@@ -14091,7 +13844,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   int NumInputElements = V1.getValueType().getVectorNumElements();
   if (isINSMask(ShuffleMask, NumInputElements, DstIsLeft, Anomaly)) {
     SDValue DstVec = DstIsLeft ? V1 : V2;
-    SDValue DstLaneV = DAG.getConstant(Anomaly, DL, MVT::i64);
+    SDValue DstLaneV = DAG.getConstant(Anomaly, dl, MVT::i64);
 
     SDValue SrcVec = V1;
     int SrcLane = ShuffleMask[Anomaly];
@@ -14099,7 +13852,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       SrcVec = V2;
       SrcLane -= NumElts;
     }
-    SDValue SrcLaneV = DAG.getConstant(SrcLane, DL, MVT::i64);
+    SDValue SrcLaneV = DAG.getConstant(SrcLane, dl, MVT::i64);
 
     EVT ScalarVT = VT.getVectorElementType();
 
@@ -14107,8 +13860,8 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
       ScalarVT = MVT::i32;
 
     return DAG.getNode(
-        ISD::INSERT_VECTOR_ELT, DL, VT, DstVec,
-        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, SrcVec, SrcLaneV),
+        ISD::INSERT_VECTOR_ELT, dl, VT, DstVec,
+        DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, SrcVec, SrcLaneV),
         DstLaneV);
   }
 
@@ -14131,7 +13884,7 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
                             PFIndexes[2] * 9 + PFIndexes[3];
     unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
     return GeneratePerfectShuffle(PFTableIndex, V1, V2, PFEntry, V1, V2, DAG,
-                                  DL);
+                                  dl);
   }
 
   // Check for a "select shuffle", generating a BSL to pick between lanes in
@@ -14142,10 +13895,10 @@ SDValue AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
     SmallVector<SDValue> MaskElts;
     for (int M : ShuffleMask)
       MaskElts.push_back(DAG.getConstant(
-          M >= static_cast<int>(NumElts) ? 0 : 0xffffffff, DL, MVT::i32));
+          M >= static_cast<int>(NumElts) ? 0 : 0xffffffff, dl, MVT::i32));
     EVT IVT = VT.changeVectorElementTypeToInteger();
-    SDValue MaskConst = DAG.getBuildVector(IVT, DL, MaskElts);
-    return DAG.getBitcast(VT, DAG.getNode(AArch64ISD::BSP, DL, IVT, MaskConst,
+    SDValue MaskConst = DAG.getBuildVector(IVT, dl, MaskElts);
+    return DAG.getBitcast(VT, DAG.getNode(AArch64ISD::BSP, dl, IVT, MaskConst,
                                           DAG.getBitcast(IVT, V1),
                                           DAG.getBitcast(IVT, V2)));
   }
@@ -14264,10 +14017,10 @@ static SDValue tryAdvSIMDModImm64(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
     if (AArch64_AM::isAdvSIMDModImmType10(Value)) {
       Value = AArch64_AM::encodeAdvSIMDModImmType10(Value);
 
-      SDLoc DL(Op);
+      SDLoc dl(Op);
       SDValue Mov =
-          DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
+          DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
+      return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
 
@@ -14307,20 +14060,20 @@ static SDValue tryAdvSIMDModImm32(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
     }
 
     if (isAdvSIMDModImm) {
-      SDLoc DL(Op);
+      SDLoc dl(Op);
       SDValue Mov;
 
       if (LHS)
-        Mov = DAG.getNode(NewOp, DL, MovTy,
-                          DAG.getNode(AArch64ISD::NVCAST, DL, MovTy, *LHS),
-                          DAG.getConstant(Value, DL, MVT::i32),
-                          DAG.getConstant(Shift, DL, MVT::i32));
+        Mov = DAG.getNode(NewOp, dl, MovTy,
+                          DAG.getNode(AArch64ISD::NVCAST, dl, MovTy, *LHS),
+                          DAG.getConstant(Value, dl, MVT::i32),
+                          DAG.getConstant(Shift, dl, MVT::i32));
       else
         Mov =
-            DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32),
-                        DAG.getConstant(Shift, DL, MVT::i32));
+            DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+                        DAG.getConstant(Shift, dl, MVT::i32));
 
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
+      return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
 
@@ -14352,20 +14105,20 @@ static SDValue tryAdvSIMDModImm16(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
     }
 
     if (isAdvSIMDModImm) {
-      SDLoc DL(Op);
+      SDLoc dl(Op);
       SDValue Mov;
 
       if (LHS)
-        Mov = DAG.getNode(NewOp, DL, MovTy,
-                          DAG.getNode(AArch64ISD::NVCAST, DL, MovTy, *LHS),
-                          DAG.getConstant(Value, DL, MVT::i32),
-                          DAG.getConstant(Shift, DL, MVT::i32));
+        Mov = DAG.getNode(NewOp, dl, MovTy,
+                          DAG.getNode(AArch64ISD::NVCAST, dl, MovTy, *LHS),
+                          DAG.getConstant(Value, dl, MVT::i32),
+                          DAG.getConstant(Shift, dl, MVT::i32));
       else
         Mov =
-            DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32),
-                        DAG.getConstant(Shift, DL, MVT::i32));
+            DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+                        DAG.getConstant(Shift, dl, MVT::i32));
 
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
+      return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
 
@@ -14392,11 +14145,11 @@ static SDValue tryAdvSIMDModImm321s(unsigned NewOp, SDValue Op,
     }
 
     if (isAdvSIMDModImm) {
-      SDLoc DL(Op);
+      SDLoc dl(Op);
       SDValue Mov =
-          DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32),
-                      DAG.getConstant(Shift, DL, MVT::i32));
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
+          DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32),
+                      DAG.getConstant(Shift, dl, MVT::i32));
+      return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
 
@@ -14414,10 +14167,10 @@ static SDValue tryAdvSIMDModImm8(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
     if (AArch64_AM::isAdvSIMDModImmType9(Value)) {
       Value = AArch64_AM::encodeAdvSIMDModImmType9(Value);
 
-      SDLoc DL(Op);
+      SDLoc dl(Op);
       SDValue Mov =
-          DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
+          DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
+      return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
 
@@ -14445,10 +14198,10 @@ static SDValue tryAdvSIMDModImmFP(unsigned NewOp, SDValue Op, SelectionDAG &DAG,
     }
 
     if (isAdvSIMDModImm) {
-      SDLoc DL(Op);
+      SDLoc dl(Op);
       SDValue Mov =
-          DAG.getNode(NewOp, DL, MovTy, DAG.getConstant(Value, DL, MVT::i32));
-      return DAG.getNode(AArch64ISD::NVCAST, DL, VT, Mov);
+          DAG.getNode(NewOp, dl, MovTy, DAG.getConstant(Value, dl, MVT::i32));
+      return DAG.getNode(AArch64ISD::NVCAST, dl, VT, Mov);
     }
   }
 
@@ -14675,7 +14428,7 @@ SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
 static SDValue NormalizeBuildVector(SDValue Op,
                                     SelectionDAG &DAG) {
   assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!");
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   EVT VT = Op.getValueType();
   EVT EltTy= VT.getVectorElementType();
 
@@ -14691,7 +14444,7 @@ static SDValue NormalizeBuildVector(SDValue Op,
     if (auto *CstLane = dyn_cast<ConstantSDNode>(Lane)) {
       Lane = DAG.getConstant(
           CstLane->getAPIntValue().trunc(EltTy.getSizeInBits()).getZExtValue(),
-          DL, MVT::i32);
+          dl, MVT::i32);
     } else if (Lane.getNode()->isUndef()) {
       Lane = DAG.getUNDEF(MVT::i32);
     } else {
@@ -14700,7 +14453,7 @@ static SDValue NormalizeBuildVector(SDValue Op,
     }
     Ops.push_back(Lane);
   }
-  return DAG.getBuildVector(VT, DL, Ops);
+  return DAG.getBuildVector(VT, dl, Ops);
 }
 
 static SDValue ConstantBuildVector(SDValue Op, SelectionDAG &DAG,
@@ -14884,7 +14637,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
   //             lanes such that we can directly materialize the vector
   //             some other way (MOVI, e.g.), we can be sneaky.
   //   5) if all operands are EXTRACT_VECTOR_ELT, check for VUZP.
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   unsigned NumElts = VT.getVectorNumElements();
   bool isOnlyLowElement = true;
   bool usesOnlyOneValue = true;
@@ -14958,7 +14711,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
   if (isOnlyLowElement && !(NumElts == 1 && isIntOrFPConstant(Value))) {
     LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: only low element used, creating 1 "
                          "SCALAR_TO_VECTOR node\n");
-    return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
+    return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value);
   }
 
   if (AllLanesExtractElt) {
@@ -15010,16 +14763,16 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
     }
     if (Even || Odd) {
       SDValue LHS =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(Vector, 0),
-                      DAG.getConstant(0, DL, MVT::i64));
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
+                      DAG.getConstant(0, dl, MVT::i64));
       SDValue RHS =
-          DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SDValue(Vector, 0),
-                      DAG.getConstant(NumElts, DL, MVT::i64));
+          DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SDValue(Vector, 0),
+                      DAG.getConstant(NumElts, dl, MVT::i64));
 
       if (Even && !Odd)
-        return DAG.getNode(AArch64ISD::UZP1, DL, VT, LHS, RHS);
+        return DAG.getNode(AArch64ISD::UZP1, dl, VT, LHS, RHS);
       if (Odd && !Even)
-        return DAG.getNode(AArch64ISD::UZP2, DL, VT, LHS, RHS);
+        return DAG.getNode(AArch64ISD::UZP2, dl, VT, LHS, RHS);
     }
   }
 
@@ -15031,7 +14784,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
           Value.getValueType() != VT) {
         LLVM_DEBUG(
             dbgs() << "LowerBUILD_VECTOR: use DUP for non-constant splats\n");
-        return DAG.getNode(AArch64ISD::DUP, DL, VT, Value);
+        return DAG.getNode(AArch64ISD::DUP, dl, VT, Value);
       }
 
       // This is actually a DUPLANExx operation, which keeps everything vectory.
@@ -15046,7 +14799,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
       }
 
       unsigned Opcode = getDUPLANEOp(VT.getVectorElementType());
-      return DAG.getNode(Opcode, DL, VT, Value, Lane);
+      return DAG.getNode(Opcode, dl, VT, Value, Lane);
     }
 
     if (VT.getVectorElementType().isFloatingPoint()) {
@@ -15059,14 +14812,14 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
                     "BITCASTS, and try again\n");
       MVT NewType = MVT::getIntegerVT(EltTy.getSizeInBits());
       for (unsigned i = 0; i < NumElts; ++i)
-        Ops.push_back(DAG.getNode(ISD::BITCAST, DL, NewType, Op.getOperand(i)));
+        Ops.push_back(DAG.getNode(ISD::BITCAST, dl, NewType, Op.getOperand(i)));
       EVT VecVT = EVT::getVectorVT(*DAG.getContext(), NewType, NumElts);
-      SDValue Val = DAG.getBuildVector(VecVT, DL, Ops);
+      SDValue Val = DAG.getBuildVector(VecVT, dl, Ops);
       LLVM_DEBUG(dbgs() << "LowerBUILD_VECTOR: trying to lower new vector: ";
                  Val.dump(););
       Val = LowerBUILD_VECTOR(Val, DAG);
       if (Val.getNode())
-        return DAG.getNode(ISD::BITCAST, DL, VT, Val);
+        return DAG.getNode(ISD::BITCAST, dl, VT, Val);
     }
   }
 
@@ -15085,7 +14838,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
   // for each lane.
   if (!PreferDUPAndInsert && NumConstantLanes > 0 && usesOnlyOneConstantValue) {
     // Firstly, try to materialize the splat constant.
-    SDValue Val = DAG.getSplatBuildVector(VT, DL, ConstantValue);
+    SDValue Val = DAG.getSplatBuildVector(VT, dl, ConstantValue);
     unsigned BitSize = VT.getScalarSizeInBits();
     APInt ConstantValueAPInt(1, 0);
     if (auto *C = dyn_cast<ConstantSDNode>(ConstantValue))
@@ -15095,17 +14848,17 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
       Val = ConstantBuildVector(Val, DAG, Subtarget);
       if (!Val)
         // Otherwise, materialize the constant and splat it.
-        Val = DAG.getNode(AArch64ISD::DUP, DL, VT, ConstantValue);
+        Val = DAG.getNode(AArch64ISD::DUP, dl, VT, ConstantValue);
     }
 
     // Now insert the non-constant lanes.
     for (unsigned i = 0; i < NumElts; ++i) {
       SDValue V = Op.getOperand(i);
-      SDValue LaneIdx = DAG.getConstant(i, DL, MVT::i64);
+      SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
       if (!isIntOrFPConstant(V))
         // Note that type legalization likely mucked about with the VT of the
         // source operand, so we may have to convert it here before inserting.
-        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Val, V, LaneIdx);
+        Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Val, V, LaneIdx);
     }
     return Val;
   }
@@ -15136,13 +14889,13 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
   if (PreferDUPAndInsert) {
     // First, build a constant vector with the common element.
     SmallVector<SDValue, 8> Ops(NumElts, Value);
-    SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, DL, Ops), DAG);
+    SDValue NewVector = LowerBUILD_VECTOR(DAG.getBuildVector(VT, dl, Ops), DAG);
     // Next, insert the elements that do not match the common value.
     for (unsigned I = 0; I < NumElts; ++I)
       if (Op.getOperand(I) != Value)
         NewVector =
-            DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NewVector,
-                        Op.getOperand(I), DAG.getConstant(I, DL, MVT::i64));
+            DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, NewVector,
+                        Op.getOperand(I), DAG.getConstant(I, dl, MVT::i64));
 
     return NewVector;
   }
@@ -15185,11 +14938,11 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
         SmallVector<SDValue, 8> Ops1(NumElts / 2, Vals[0]);
         SmallVector<SDValue, 8> Ops2(NumElts / 2, Vals[1]);
         SDValue DUP1 =
-            LowerBUILD_VECTOR(DAG.getBuildVector(SubVT, DL, Ops1), DAG);
+            LowerBUILD_VECTOR(DAG.getBuildVector(SubVT, dl, Ops1), DAG);
         SDValue DUP2 =
-            LowerBUILD_VECTOR(DAG.getBuildVector(SubVT, DL, Ops2), DAG);
+            LowerBUILD_VECTOR(DAG.getBuildVector(SubVT, dl, Ops2), DAG);
         SDValue CONCAT_VECTORS =
-            DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, DUP1, DUP2);
+            DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, DUP1, DUP2);
         return CONCAT_VECTORS;
       }
     }
@@ -15215,10 +14968,10 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
 
       SmallVector<SDValue, 8> Ops1(NumElts, Vals[0]);
       SmallVector<SDValue, 8> Ops2(NumElts, Vals[1]);
-      SDValue VEC1 = DAG.getBuildVector(VT, DL, Ops1);
-      SDValue VEC2 = DAG.getBuildVector(VT, DL, Ops2);
+      SDValue VEC1 = DAG.getBuildVector(VT, dl, Ops1);
+      SDValue VEC2 = DAG.getBuildVector(VT, dl, Ops2);
       SDValue VECTOR_SHUFFLE =
-          DAG.getVectorShuffle(VT, DL, VEC1, VEC2, MaskVec);
+          DAG.getVectorShuffle(VT, dl, VEC1, VEC2, MaskVec);
       return VECTOR_SHUFFLE;
     }
   }
@@ -15250,7 +15003,7 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
     // the upper bits of the lowest lane (e.g. v8i8, v4i16).
     if (!Op0.isUndef()) {
       LLVM_DEBUG(dbgs() << "Creating node for op0, it is not undefined:\n");
-      Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Op0);
+      Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op0);
       ++i;
     }
     LLVM_DEBUG({
@@ -15261,8 +15014,8 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
       SDValue V = Op.getOperand(i);
       if (V.isUndef())
         continue;
-      SDValue LaneIdx = DAG.getConstant(i, DL, MVT::i64);
-      Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx);
+      SDValue LaneIdx = DAG.getConstant(i, dl, MVT::i64);
+      Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx);
     }
     return Vec;
   }
@@ -15579,7 +15332,7 @@ static bool isPow2Splat(SDValue Op, uint64_t &SplatVal, bool &Negated) {
 
 SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
-  SDLoc DL(Op);
+  SDLoc dl(Op);
 
   if (useSVEForFixedLengthVectorVT(VT, /*OverrideNEON=*/true))
     return LowerFixedLengthVectorIntDivideToSVE(Op, DAG);
@@ -15592,12 +15345,12 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
   bool Negated;
   uint64_t SplatVal;
   if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
-    SDValue Pg = getPredicateForScalableVector(DAG, DL, VT);
+    SDValue Pg = getPredicateForScalableVector(DAG, dl, VT);
     SDValue Res =
-        DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, VT, Pg, Op->getOperand(0),
-                    DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32));
+        DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, VT, Pg, Op->getOperand(0),
+                    DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32));
     if (Negated)
-      Res = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
+      Res = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT), Res);
 
     return Res;
   }
@@ -15617,15 +15370,15 @@ SDValue AArch64TargetLowering::LowerDIV(SDValue Op, SelectionDAG &DAG) const {
 
   unsigned UnpkLo = Signed ? AArch64ISD::SUNPKLO : AArch64ISD::UUNPKLO;
   unsigned UnpkHi = Signed ? AArch64ISD::SUNPKHI : AArch64ISD::UUNPKHI;
-  SDValue Op0Lo = DAG.getNode(UnpkLo, DL, WidenedVT, Op.getOperand(0));
-  SDValue Op1Lo = DAG.getNode(UnpkLo, DL, WidenedVT, Op.getOperand(1));
-  SDValue Op0Hi = DAG.getNode(UnpkHi, DL, WidenedVT, Op.getOperand(0));
-  SDValue Op1Hi = DAG.getNode(UnpkHi, DL, WidenedVT, Op.getOperand(1));
-  SDValue ResultLo = DAG.getNode(Op.getOpcode(), DL, WidenedVT, Op0Lo, Op1Lo);
-  SDValue ResultHi = DAG.getNode(Op.getOpcode(), DL, WidenedVT, Op0Hi, Op1Hi);
-  SDValue ResultLoCast = DAG.getNode(AArch64ISD::NVCAST, DL, VT, ResultLo);
-  SDValue ResultHiCast = DAG.getNode(AArch64ISD::NVCAST, DL, VT, ResultHi);
-  return DAG.getNode(AArch64ISD::UZP1, DL, VT, ResultLoCast, ResultHiCast);
+  SDValue Op0Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(0));
+  SDValue Op1Lo = DAG.getNode(UnpkLo, dl, WidenedVT, Op.getOperand(1));
+  SDValue Op0Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(0));
+  SDValue Op1Hi = DAG.getNode(UnpkHi, dl, WidenedVT, Op.getOperand(1));
+  SDValue ResultLo = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Lo, Op1Lo);
+  SDValue ResultHi = DAG.getNode(Op.getOpcode(), dl, WidenedVT, Op0Hi, Op1Hi);
+  SDValue ResultLoCast = DAG.getNode(AArch64ISD::NVCAST, dl, VT, ResultLo);
+  SDValue ResultHiCast = DAG.getNode(AArch64ISD::NVCAST, dl, VT, ResultHi);
+  return DAG.getNode(AArch64ISD::UZP1, dl, VT, ResultLoCast, ResultHiCast);
 }
 
 bool AArch64TargetLowering::shouldExpandBuildVectorWithShuffles(
@@ -15722,12 +15475,12 @@ SDValue AArch64TargetLowering::LowerTRUNCATE(SDValue Op,
 
   if (VT.getScalarType() == MVT::i1) {
     // Lower i1 truncate to `(x & 1) != 0`.
-    SDLoc DL(Op);
+    SDLoc dl(Op);
     EVT OpVT = Op.getOperand(0).getValueType();
-    SDValue Zero = DAG.getConstant(0, DL, OpVT);
-    SDValue One = DAG.getConstant(1, DL, OpVT);
-    SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Op.getOperand(0), One);
-    return DAG.getSetCC(DL, VT, And, Zero, ISD::SETNE);
+    SDValue Zero = DAG.getConstant(0, dl, OpVT);
+    SDValue One = DAG.getConstant(1, dl, OpVT);
+    SDValue And = DAG.getNode(ISD::AND, dl, OpVT, Op.getOperand(0), One);
+    return DAG.getSetCC(dl, VT, And, Zero, ISD::SETNE);
   }
 
   if (!VT.isVector() || VT.isScalableVector())
@@ -15854,6 +15607,47 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
   llvm_unreachable("unexpected shift opcode");
 }
 
+static SDValue EmitVectorComparison(SDValue LHS, SDValue RHS,
+                                    AArch64CC::CondCode CC, bool NoNans, EVT VT,
+                                    const SDLoc &dl, SelectionDAG &DAG) {
+  EVT SrcVT = LHS.getValueType();
+  assert(VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+         "function only supposed to emit natural comparisons");
+
+  if (SrcVT.getVectorElementType().isFloatingPoint()) {
+    switch (CC) {
+    default:
+      return SDValue();
+    case AArch64CC::NE: {
+      SDValue Fcmeq = DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+      return DAG.getNOT(dl, Fcmeq, VT);
+    }
+    case AArch64CC::EQ:
+      return DAG.getNode(AArch64ISD::FCMEQ, dl, VT, LHS, RHS);
+    case AArch64CC::GE:
+      return DAG.getNode(AArch64ISD::FCMGE, dl, VT, LHS, RHS);
+    case AArch64CC::GT:
+      return DAG.getNode(AArch64ISD::FCMGT, dl, VT, LHS, RHS);
+    case AArch64CC::LE:
+      if (!NoNans)
+        return SDValue();
+      // If we ignore NaNs then we can use to the LS implementation.
+      [[fallthrough]];
+    case AArch64CC::LS:
+      return DAG.getNode(AArch64ISD::FCMGE, dl, VT, RHS, LHS);
+    case AArch64CC::LT:
+      if (!NoNans)
+        return SDValue();
+      // If we ignore NaNs then we can use to the MI implementation.
+      [[fallthrough]];
+    case AArch64CC::MI:
+      return DAG.getNode(AArch64ISD::FCMGT, dl, VT, RHS, LHS);
+    }
+  }
+
+  return SDValue();
+}
+
 SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
                                            SelectionDAG &DAG) const {
   if (Op.getValueType().isScalableVector())
@@ -15867,7 +15661,7 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
   SDValue LHS = Op.getOperand(0);
   SDValue RHS = Op.getOperand(1);
   EVT CmpVT = LHS.getValueType().changeVectorElementTypeToInteger();
-  SDLoc DL(Op);
+  SDLoc dl(Op);
 
   if (LHS.getValueType().getVectorElementType().isInteger())
     return Op;
@@ -15904,22 +15698,22 @@ SDValue AArch64TargetLowering::LowerVSETCC(SDValue Op,
 
   bool NoNaNs =
       getTargetMachine().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs();
-  SDValue Cmp = emitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, DL, DAG);
+  SDValue Cmp = EmitVectorComparison(LHS, RHS, CC1, NoNaNs, CmpVT, dl, DAG);
   if (!Cmp.getNode())
     return SDValue();
 
   if (CC2 != AArch64CC::AL) {
-    SDValue Cmp2 = emitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, DL, DAG);
+    SDValue Cmp2 = EmitVectorComparison(LHS, RHS, CC2, NoNaNs, CmpVT, dl, DAG);
     if (!Cmp2.getNode())
       return SDValue();
 
-    Cmp = DAG.getNode(ISD::OR, DL, CmpVT, Cmp, Cmp2);
+    Cmp = DAG.getNode(ISD::OR, dl, CmpVT, Cmp, Cmp2);
   }
 
-  Cmp = DAG.getSExtOrTrunc(Cmp, DL, Op.getValueType());
+  Cmp = DAG.getSExtOrTrunc(Cmp, dl, Op.getValueType());
 
   if (ShouldInvert)
-    Cmp = DAG.getNOT(DL, Cmp, Cmp.getValueType());
+    Cmp = DAG.getNOT(dl, Cmp, Cmp.getValueType());
 
   return Cmp;
 }
@@ -16100,23 +15894,23 @@ SDValue AArch64TargetLowering::LowerVECREDUCE(SDValue Op,
   }
 
   // Lower NEON reductions.
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   switch (Op.getOpcode()) {
   case ISD::VECREDUCE_AND:
   case ISD::VECREDUCE_OR:
   case ISD::VECREDUCE_XOR:
     return getVectorBitwiseReduce(Op.getOpcode(), Op.getOperand(0),
-                                  Op.getValueType(), DL, DAG);
+                                  Op.getValueType(), dl, DAG);
   case ISD::VECREDUCE_ADD:
-    return getReductionSDNode(AArch64ISD::UADDV, DL, Op, DAG);
+    return getReductionSDNode(AArch64ISD::UADDV, dl, Op, DAG);
   case ISD::VECREDUCE_SMAX:
-    return getReductionSDNode(AArch64ISD::SMAXV, DL, Op, DAG);
+    return getReductionSDNode(AArch64ISD::SMAXV, dl, Op, DAG);
   case ISD::VECREDUCE_SMIN:
-    return getReductionSDNode(AArch64ISD::SMINV, DL, Op, DAG);
+    return getReductionSDNode(AArch64ISD::SMINV, dl, Op, DAG);
   case ISD::VECREDUCE_UMAX:
-    return getReductionSDNode(AArch64ISD::UMAXV, DL, Op, DAG);
+    return getReductionSDNode(AArch64ISD::UMAXV, dl, Op, DAG);
   case ISD::VECREDUCE_UMIN:
-    return getReductionSDNode(AArch64ISD::UMINV, DL, Op, DAG);
+    return getReductionSDNode(AArch64ISD::UMINV, dl, Op, DAG);
   default:
     llvm_unreachable("Unhandled reduction");
   }
@@ -16130,13 +15924,13 @@ SDValue AArch64TargetLowering::LowerATOMIC_LOAD_AND(SDValue Op,
     return SDValue();
 
   // LSE has an atomic load-clear instruction, but not a load-and.
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   MVT VT = Op.getSimpleValueType();
   assert(VT != MVT::i128 && "Handled elsewhere, code replicated.");
   SDValue RHS = Op.getOperand(2);
   AtomicSDNode *AN = cast<AtomicSDNode>(Op.getNode());
-  RHS = DAG.getNode(ISD::XOR, DL, VT, DAG.getAllOnesConstant(DL, VT), RHS);
-  return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, DL, AN->getMemoryVT(),
+  RHS = DAG.getNode(ISD::XOR, dl, VT, DAG.getAllOnesConstant(dl, VT), RHS);
+  return DAG.getAtomic(ISD::ATOMIC_LOAD_CLR, dl, AN->getMemoryVT(),
                        Op.getOperand(0), Op.getOperand(1), RHS,
                        AN->getMemOperand());
 }
@@ -16145,7 +15939,7 @@ SDValue
 AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
                                                       SelectionDAG &DAG) const {
 
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   // Get the inputs.
   SDNode *Node = Op.getNode();
   SDValue Chain = Op.getOperand(0);
@@ -16156,19 +15950,19 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
 
   if (DAG.getMachineFunction().getFunction().hasFnAttribute(
           "no-stack-arg-probe")) {
-    SDValue SP = DAG.getCopyFromReg(Chain, DL, AArch64::SP, MVT::i64);
+    SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
     Chain = SP.getValue(1);
-    SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size);
+    SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
     if (Align)
       SP =
-          DAG.getNode(ISD::AND, DL, VT, SP.getValue(0),
-                      DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT));
-    Chain = DAG.getCopyToReg(Chain, DL, AArch64::SP, SP);
+          DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+                      DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
+    Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
     SDValue Ops[2] = {SP, Chain};
-    return DAG.getMergeValues(Ops, DL);
+    return DAG.getMergeValues(Ops, dl);
   }
 
-  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
+  Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl);
 
   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   SDValue Callee = DAG.getTargetExternalSymbol(Subtarget->getChkStkName(),
@@ -16179,11 +15973,11 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
   if (Subtarget->hasCustomCallingConv())
     TRI->UpdateCustomCallPreservedMask(DAG.getMachineFunction(), &Mask);
 
-  Size = DAG.getNode(ISD::SRL, DL, MVT::i64, Size,
-                     DAG.getConstant(4, DL, MVT::i64));
-  Chain = DAG.getCopyToReg(Chain, DL, AArch64::X15, Size, SDValue());
+  Size = DAG.getNode(ISD::SRL, dl, MVT::i64, Size,
+                     DAG.getConstant(4, dl, MVT::i64));
+  Chain = DAG.getCopyToReg(Chain, dl, AArch64::X15, Size, SDValue());
   Chain =
-      DAG.getNode(AArch64ISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+      DAG.getNode(AArch64ISD::CALL, dl, DAG.getVTList(MVT::Other, MVT::Glue),
                   Chain, Callee, DAG.getRegister(AArch64::X15, MVT::i64),
                   DAG.getRegisterMask(Mask), Chain.getValue(1));
   // To match the actual intent better, we should read the output from X15 here
@@ -16191,21 +15985,21 @@ AArch64TargetLowering::LowerWindowsDYNAMIC_STACKALLOC(SDValue Op,
   // from X15 here doesn't work at -O0, since it thinks that X15 is undefined
   // here.
 
-  Size = DAG.getNode(ISD::SHL, DL, MVT::i64, Size,
-                     DAG.getConstant(4, DL, MVT::i64));
+  Size = DAG.getNode(ISD::SHL, dl, MVT::i64, Size,
+                     DAG.getConstant(4, dl, MVT::i64));
 
-  SDValue SP = DAG.getCopyFromReg(Chain, DL, AArch64::SP, MVT::i64);
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
   Chain = SP.getValue(1);
-  SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size);
+  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
   if (Align)
-    SP = DAG.getNode(ISD::AND, DL, VT, SP.getValue(0),
-                     DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT));
-  Chain = DAG.getCopyToReg(Chain, DL, AArch64::SP, SP);
+    SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+                     DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
+  Chain = DAG.getCopyToReg(Chain, dl, AArch64::SP, SP);
 
-  Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
+  Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), dl);
 
   SDValue Ops[2] = {SP, Chain};
-  return DAG.getMergeValues(Ops, DL);
+  return DAG.getMergeValues(Ops, dl);
 }
 
 SDValue
@@ -16218,21 +16012,21 @@ AArch64TargetLowering::LowerInlineDYNAMIC_STACKALLOC(SDValue Op,
 
   MaybeAlign Align =
       cast<ConstantSDNode>(Op.getOperand(2))->getMaybeAlignValue();
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   EVT VT = Node->getValueType(0);
 
   // Construct the new SP value in a GPR.
-  SDValue SP = DAG.getCopyFromReg(Chain, DL, AArch64::SP, MVT::i64);
+  SDValue SP = DAG.getCopyFromReg(Chain, dl, AArch64::SP, MVT::i64);
   Chain = SP.getValue(1);
-  SP = DAG.getNode(ISD::SUB, DL, MVT::i64, SP, Size);
+  SP = DAG.getNode(ISD::SUB, dl, MVT::i64, SP, Size);
   if (Align)
-    SP = DAG.getNode(ISD::AND, DL, VT, SP.getValue(0),
-                     DAG.getSignedConstant(-(uint64_t)Align->value(), DL, VT));
+    SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0),
+                     DAG.getSignedConstant(-(uint64_t)Align->value(), dl, VT));
 
   // Set the real SP to the new value with a probing loop.
-  Chain = DAG.getNode(AArch64ISD::PROBED_ALLOCA, DL, MVT::Other, Chain, SP);
+  Chain = DAG.getNode(AArch64ISD::PROBED_ALLOCA, dl, MVT::Other, Chain, SP);
   SDValue Ops[2] = {SP, Chain};
-  return DAG.getMergeValues(Ops, DL);
+  return DAG.getMergeValues(Ops, dl);
 }
 
 SDValue
@@ -16886,14 +16680,14 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
     if (SrcWidth * 4 <= DstWidth) {
       if (all_of(I->users(), [&](auto *U) {
             auto *SingleUser = cast<Instruction>(&*U);
-            if (match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))))
-              return true;
-            if (match(SingleUser,
-                      m_Intrinsic<
-                          Intrinsic::experimental_vector_partial_reduce_add>(
-                          m_Value(), m_Specific(I))))
-              return true;
-            return false;
+            return (
+                match(SingleUser, m_c_Mul(m_Specific(I), m_SExt(m_Value()))) ||
+                (match(SingleUser,
+                       m_Intrinsic<
+                           Intrinsic::experimental_vector_partial_reduce_add>(
+                           m_Value(), m_Specific(I))) &&
+                 !shouldExpandPartialReductionIntrinsic(
+                     cast<IntrinsicInst>(SingleUser))));
           }))
         return false;
     }
@@ -16978,12 +16772,12 @@ bool AArch64TargetLowering::optimizeExtendOrTruncateConversion(
 }
 
 bool AArch64TargetLowering::hasPairedLoad(EVT LoadedType,
-                                          Align &RequiredAlignment) const {
+                                          Align &RequiredAligment) const {
   if (!LoadedType.isSimple() ||
       (!LoadedType.isInteger() && !LoadedType.isFloatingPoint()))
     return false;
   // Cyclone supports unaligned accesses.
-  RequiredAlignment = Align(1);
+  RequiredAligment = Align(1);
   unsigned NumBits = LoadedType.getSizeInBits();
   return NumBits == 32 || NumBits == 64;
 }
@@ -18106,7 +17900,7 @@ static SDValue performVecReduceAddCombineWithUADDLP(SDNode *N,
       EXT1->getOperand(0)->getValueType(0) != MVT::v16i8)
     return SDValue();
 
-  // Pattern is detected. Let's convert it to sequence of nodes.
+  // Pattern is dectected. Let's convert it to sequence of nodes.
   SDLoc DL(N);
 
   // First, create the node pattern of UABD/SABD.
@@ -18149,8 +17943,7 @@ performActiveLaneMaskCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
                                                 /*IsEqual=*/false))
     return While;
 
-  if (!N->getValueType(0).isScalableVector() ||
-      (!ST->hasSVE2p1() && !(ST->hasSME2() && ST->isStreaming())))
+  if (!ST->hasSVE2p1())
     return SDValue();
 
   if (!N->hasNUsesOfValue(2, 0))
@@ -18325,10 +18118,10 @@ static SDValue performVecReduceAddCombine(SDNode *N, SelectionDAG &DAG,
                                 DAG.getConstant(I * 16, DL, MVT::i64));
   SDValue Dot =
       DAG.getNode(DotOpcode, DL, Zeros.getValueType(), Zeros, Vec8Op0, Vec8Op1);
-  SDValue VecReduceAdd8 =
+  SDValue VecReudceAdd8 =
       DAG.getNode(ISD::VECREDUCE_ADD, DL, N->getValueType(0), Dot);
   return DAG.getNode(ISD::ADD, DL, N->getValueType(0), VecReduceAdd16,
-                     VecReduceAdd8);
+                     VecReudceAdd8);
 }
 
 // Given an (integer) vecreduce, we know the order of the inputs does not
@@ -19918,7 +19711,7 @@ performExtractVectorEltCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
 static SDValue performConcatVectorsCombine(SDNode *N,
                                            TargetLowering::DAGCombinerInfo &DCI,
                                            SelectionDAG &DAG) {
-  SDLoc DL(N);
+  SDLoc dl(N);
   EVT VT = N->getValueType(0);
   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   unsigned N0Opc = N0->getOpcode(), N1Opc = N1->getOpcode();
@@ -19951,11 +19744,11 @@ static SDValue performConcatVectorsCombine(SDNode *N,
       SmallVector<int, 8> Mask(MidVT.getVectorNumElements());
       for (size_t i = 0; i < Mask.size(); ++i)
         Mask[i] = i * 2;
-      return DAG.getNode(ISD::TRUNCATE, DL, VT,
+      return DAG.getNode(ISD::TRUNCATE, dl, VT,
                          DAG.getVectorShuffle(
-                             MidVT, DL,
-                             DAG.getNode(ISD::BITCAST, DL, MidVT, N00),
-                             DAG.getNode(ISD::BITCAST, DL, MidVT, N10), Mask));
+                             MidVT, dl,
+                             DAG.getNode(ISD::BITCAST, dl, MidVT, N00),
+                             DAG.getNode(ISD::BITCAST, dl, MidVT, N10), Mask));
     }
 
     // Optimize two large shifts and a combine into a single combine and shift
@@ -19980,13 +19773,13 @@ static SDValue performConcatVectorsCombine(SDNode *N,
                NScalarSize = N->getValueType(0).getScalarSizeInBits();
 
       if (N001ConstVal == N101ConstVal && N001ConstVal > NScalarSize) {
-        N000 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N000);
-        N100 = DAG.getNode(AArch64ISD::NVCAST, DL, VT, N100);
-        SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, DL, VT, N000, N100);
+        N000 = DAG.getNode(AArch64ISD::NVCAST, dl, VT, N000);
+        N100 = DAG.getNode(AArch64ISD::NVCAST, dl, VT, N100);
+        SDValue Uzp = DAG.getNode(AArch64ISD::UZP2, dl, VT, N000, N100);
         SDValue NewShiftConstant =
-            DAG.getConstant(N001ConstVal - NScalarSize, DL, MVT::i32);
+            DAG.getConstant(N001ConstVal - NScalarSize, dl, MVT::i32);
 
-        return DAG.getNode(AArch64ISD::VLSHR, DL, VT, Uzp, NewShiftConstant);
+        return DAG.getNode(AArch64ISD::VLSHR, dl, VT, Uzp, NewShiftConstant);
       }
     }
   }
@@ -20018,14 +19811,14 @@ static SDValue performConcatVectorsCombine(SDNode *N,
           Ops.push_back(DAG.getUNDEF(FVT));
         else {
           LoadSDNode *LD = cast<LoadSDNode>(V);
-          SDValue NewLoad = DAG.getLoad(FVT, DL, LD->getChain(),
+          SDValue NewLoad = DAG.getLoad(FVT, dl, LD->getChain(),
                                         LD->getBasePtr(), LD->getMemOperand());
           DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLoad.getValue(1));
           Ops.push_back(NewLoad);
         }
       }
       return DAG.getBitcast(N->getValueType(0),
-                            DAG.getBuildVector(NVT, DL, Ops));
+                            DAG.getBuildVector(NVT, dl, Ops));
     }
   }
 
@@ -20049,11 +19842,11 @@ static SDValue performConcatVectorsCombine(SDNode *N,
     if (isBitwiseVectorNegate(N00) && N0->isOnlyUserOf(N00.getNode()) &&
         isBitwiseVectorNegate(N10) && N1->isOnlyUserOf(N10.getNode())) {
       return DAG.getNOT(
-          DL,
-          DAG.getNode(ISD::CONCAT_VECTORS, DL, VT,
-                      DAG.getNode(ISD::TRUNCATE, DL, N0.getValueType(),
+          dl,
+          DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
+                      DAG.getNode(ISD::TRUNCATE, dl, N0.getValueType(),
                                   N00->getOperand(0)),
-                      DAG.getNode(ISD::TRUNCATE, DL, N1.getValueType(),
+                      DAG.getNode(ISD::TRUNCATE, dl, N1.getValueType(),
                                   N10->getOperand(0))),
           VT);
     }
@@ -20077,9 +19870,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
     SDValue N11 = N1->getOperand(1);
 
     if (!N00.isUndef() && !N01.isUndef() && !N10.isUndef() && !N11.isUndef()) {
-      SDValue Concat0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N00, N10);
-      SDValue Concat1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N01, N11);
-      return DAG.getNode(N0Opc, DL, VT, Concat0, Concat1);
+      SDValue Concat0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, N00, N10);
+      SDValue Concat1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, N01, N11);
+      return DAG.getNode(N0Opc, dl, VT, Concat0, Concat1);
     }
   }
 
@@ -20119,12 +19912,12 @@ static SDValue performConcatVectorsCombine(SDNode *N,
                              : N1.getOperand(0).getOperand(0);
     EVT BVT =
         X.getValueType().getDoubleNumVectorElementsVT(*DCI.DAG.getContext());
-    SDValue CC = DAG.getNode(ISD::CONCAT_VECTORS, DL, BVT, X, Y);
+    SDValue CC = DAG.getNode(ISD::CONCAT_VECTORS, dl, BVT, X, Y);
     SDValue Add = DAG.getNode(
-        ISD::ADD, DL, BVT, CC,
-        DAG.getConstant(1ULL << (N0.getConstantOperandVal(1) - 1), DL, BVT));
+        ISD::ADD, dl, BVT, CC,
+        DAG.getConstant(1ULL << (N0.getConstantOperandVal(1) - 1), dl, BVT));
     SDValue Shr =
-        DAG.getNode(AArch64ISD::VLSHR, DL, BVT, Add, N0.getOperand(1));
+        DAG.getNode(AArch64ISD::VLSHR, dl, BVT, Add, N0.getOperand(1));
     return Shr;
   }
 
@@ -20132,11 +19925,11 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   if (N->getNumOperands() == 2 && N0Opc == AArch64ISD::ZIP1 &&
       N1Opc == AArch64ISD::ZIP2 && N0.getOperand(0) == N1.getOperand(0) &&
       N0.getOperand(1) == N1.getOperand(1)) {
-    SDValue E0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0),
+    SDValue E0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, N0.getOperand(0),
                              DAG.getUNDEF(N0.getValueType()));
-    SDValue E1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(1),
+    SDValue E1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, N0.getOperand(1),
                              DAG.getUNDEF(N0.getValueType()));
-    return DAG.getNode(AArch64ISD::ZIP1, DL, VT, E0, E1);
+    return DAG.getNode(AArch64ISD::ZIP1, dl, VT, E0, E1);
   }
 
   // If we see a (concat_vectors (v1x64 A), (v1x64 A)) it's really a vector
@@ -20144,8 +19937,8 @@ static SDValue performConcatVectorsCombine(SDNode *N,
   // canonicalise to that.
   if (N->getNumOperands() == 2 && N0 == N1 && VT.getVectorNumElements() == 2) {
     assert(VT.getScalarSizeInBits() == 64);
-    return DAG.getNode(AArch64ISD::DUPLANE64, DL, VT, WidenVector(N0, DAG),
-                       DAG.getConstant(0, DL, MVT::i64));
+    return DAG.getNode(AArch64ISD::DUPLANE64, dl, VT, WidenVector(N0, DAG),
+                       DAG.getConstant(0, dl, MVT::i64));
   }
 
   // Canonicalise concat_vectors so that the right-hand vector has as few
@@ -20170,9 +19963,9 @@ static SDValue performConcatVectorsCombine(SDNode *N,
 
   MVT ConcatTy = MVT::getVectorVT(RHSTy.getVectorElementType(),
                                   RHSTy.getVectorNumElements() * 2);
-  return DAG.getNode(ISD::BITCAST, DL, VT,
-                     DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatTy,
-                                 DAG.getNode(ISD::BITCAST, DL, RHSTy, N0),
+  return DAG.getNode(ISD::BITCAST, dl, VT,
+                     DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatTy,
+                                 DAG.getNode(ISD::BITCAST, dl, RHSTy, N0),
                                  RHS));
 }
 
@@ -20480,21 +20273,21 @@ static SDValue performSetccAddFolding(SDNode *Op, SelectionDAG &DAG) {
 
   SDValue CCVal;
   SDValue Cmp;
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   if (InfoAndKind.IsAArch64) {
     CCVal = DAG.getConstant(
-        AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), DL,
+        AArch64CC::getInvertedCondCode(InfoAndKind.Info.AArch64.CC), dl,
         MVT::i32);
     Cmp = *InfoAndKind.Info.AArch64.Cmp;
   } else
     Cmp = getAArch64Cmp(
         *InfoAndKind.Info.Generic.Opnd0, *InfoAndKind.Info.Generic.Opnd1,
         ISD::getSetCCInverse(InfoAndKind.Info.Generic.CC, CmpVT), CCVal, DAG,
-        DL);
+        dl);
 
   EVT VT = Op->getValueType(0);
-  LHS = DAG.getNode(ISD::ADD, DL, VT, RHS, DAG.getConstant(1, DL, VT));
-  return DAG.getNode(AArch64ISD::CSEL, DL, VT, RHS, LHS, CCVal, Cmp);
+  LHS = DAG.getNode(ISD::ADD, dl, VT, RHS, DAG.getConstant(1, dl, VT));
+  return DAG.getNode(AArch64ISD::CSEL, dl, VT, RHS, LHS, CCVal, Cmp);
 }
 
 // ADD(UADDV a, UADDV b) -->  UADDV(ADD a, b)
@@ -20931,7 +20724,7 @@ static SDValue performBuildVectorCombine(SDNode *N,
 
 // A special combine for the sqdmulh family of instructions.
 // smin( sra ( mul( sext v0, sext v1 ) ), SHIFT_AMOUNT ),
-// SATURATING_VAL ) can be reduced to sext(sqdmulh(...))
+// SATURATING_VAL ) can be reduced to sqdmulh(...)
 static SDValue trySQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
 
   if (N->getOpcode() != ISD::SMIN)
@@ -21638,7 +21431,7 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
   case Intrinsic::aarch64_neon_ushl:
     // For positive shift amounts we can use SHL, as ushl/sshl perform a regular
     // left shift for positive shift amounts. For negative shifts we can use a
-    // VASHR/VLSHR as appropriate.
+    // VASHR/VLSHR as appropiate.
     if (ShiftAmount < 0) {
       Opcode = IID == Intrinsic::aarch64_neon_sshl ? AArch64ISD::VASHR
                                                    : AArch64ISD::VLSHR;
@@ -21651,25 +21444,25 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) {
 
   EVT VT = N->getValueType(0);
   SDValue Op = N->getOperand(1);
-  SDLoc DL(N);
+  SDLoc dl(N);
   if (VT == MVT::i64) {
-    Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i64, Op);
+    Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i64, Op);
     VT = MVT::v1i64;
   }
 
   if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) {
-    Op = DAG.getNode(Opcode, DL, VT, Op,
-                     DAG.getSignedConstant(-ShiftAmount, DL, MVT::i32));
+    Op = DAG.getNode(Opcode, dl, VT, Op,
+                     DAG.getSignedConstant(-ShiftAmount, dl, MVT::i32));
     if (N->getValueType(0) == MVT::i64)
-      Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
-                       DAG.getConstant(0, DL, MVT::i64));
+      Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
+                       DAG.getConstant(0, dl, MVT::i64));
     return Op;
   } else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) {
-    Op = DAG.getNode(Opcode, DL, VT, Op,
-                     DAG.getConstant(ShiftAmount, DL, MVT::i32));
+    Op = DAG.getNode(Opcode, dl, VT, Op,
+                     DAG.getConstant(ShiftAmount, dl, MVT::i32));
     if (N->getValueType(0) == MVT::i64)
-      Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, Op,
-                       DAG.getConstant(0, DL, MVT::i64));
+      Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Op,
+                       DAG.getConstant(0, dl, MVT::i64));
     return Op;
   }
 
@@ -21694,11 +21487,11 @@ static SDValue tryCombineCRC32(unsigned Mask, SDNode *N, SelectionDAG &DAG) {
 
 static SDValue combineAcrossLanesIntrinsic(unsigned Opc, SDNode *N,
                                            SelectionDAG &DAG) {
-  SDLoc DL(N);
-  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, N->getValueType(0),
-                     DAG.getNode(Opc, DL, N->getOperand(1).getSimpleValueType(),
+  SDLoc dl(N);
+  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, N->getValueType(0),
+                     DAG.getNode(Opc, dl, N->getOperand(1).getSimpleValueType(),
                                  N->getOperand(1)),
-                     DAG.getConstant(0, DL, MVT::i64));
+                     DAG.getConstant(0, dl, MVT::i64));
 }
 
 static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
@@ -21718,21 +21511,21 @@ static SDValue LowerSVEIntrinsicIndex(SDNode *N, SelectionDAG &DAG) {
 }
 
 static SDValue LowerSVEIntrinsicDUP(SDNode *N, SelectionDAG &DAG) {
-  SDLoc DL(N);
+  SDLoc dl(N);
   SDValue Scalar = N->getOperand(3);
   EVT ScalarTy = Scalar.getValueType();
 
   if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
-    Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Scalar);
+    Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
 
   SDValue Passthru = N->getOperand(1);
   SDValue Pred = N->getOperand(2);
-  return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, DL, N->getValueType(0),
+  return DAG.getNode(AArch64ISD::DUP_MERGE_PASSTHRU, dl, N->getValueType(0),
                      Pred, Scalar, Passthru);
 }
 
 static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
-  SDLoc DL(N);
+  SDLoc dl(N);
   LLVMContext &Ctx = *DAG.getContext();
   EVT VT = N->getValueType(0);
 
@@ -21748,13 +21541,13 @@ static SDValue LowerSVEIntrinsicEXT(SDNode *N, SelectionDAG &DAG) {
       EVT::getVectorVT(Ctx, MVT::i8, ElementCount::getScalable(ByteSize));
 
   // Convert everything to the domain of EXT (i.e bytes).
-  SDValue Op0 = DAG.getNode(ISD::BITCAST, DL, ByteVT, N->getOperand(1));
-  SDValue Op1 = DAG.getNode(ISD::BITCAST, DL, ByteVT, N->getOperand(2));
-  SDValue Op2 = DAG.getNode(ISD::MUL, DL, MVT::i32, N->getOperand(3),
-                            DAG.getConstant(ElemSize, DL, MVT::i32));
+  SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(1));
+  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, ByteVT, N->getOperand(2));
+  SDValue Op2 = DAG.getNode(ISD::MUL, dl, MVT::i32, N->getOperand(3),
+                            DAG.getConstant(ElemSize, dl, MVT::i32));
 
-  SDValue EXT = DAG.getNode(AArch64ISD::EXT, DL, ByteVT, Op0, Op1, Op2);
-  return DAG.getNode(ISD::BITCAST, DL, VT, EXT);
+  SDValue EXT = DAG.getNode(AArch64ISD::EXT, dl, ByteVT, Op0, Op1, Op2);
+  return DAG.getNode(ISD::BITCAST, dl, VT, EXT);
 }
 
 static SDValue tryConvertSVEWideCompare(SDNode *N, ISD::CondCode CC,
@@ -23043,7 +22836,7 @@ static SDValue splitStores(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
 }
 
 static SDValue performSpliceCombine(SDNode *N, SelectionDAG &DAG) {
-  assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexpected Opcode!");
+  assert(N->getOpcode() == AArch64ISD::SPLICE && "Unexepected Opcode!");
 
   // splice(pg, op1, undef) -> op1
   if (N->getOperand(2).isUndef())
@@ -23511,8 +23304,6 @@ static SDValue performPostLD1Combine(SDNode *N,
     auto *LaneC = dyn_cast<ConstantSDNode>(Lane);
     if (!LaneC || LaneC->getZExtValue() >= VT.getVectorNumElements())
       return SDValue();
-    if (LaneC->getZExtValue() == 0 && isNullOrNullSplat(N->getOperand(0)))
-      return SDValue();
   }
 
   LoadSDNode *LoadSDN = cast<LoadSDNode>(LD);
@@ -23781,10 +23572,10 @@ static SDValue performLOADCombine(SDNode *N,
                   LD->getMemOperand()->getFlags(), LD->getAAInfo());
   SDValue UndefVector = DAG.getUNDEF(NewVT);
   SDValue InsertIdx = DAG.getVectorIdxConstant(0, DL);
-  SDValue ExtendedRemainingLoad =
+  SDValue ExtendedReminingLoad =
       DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT,
                   {UndefVector, RemainingLoad, InsertIdx});
-  LoadOps.push_back(ExtendedRemainingLoad);
+  LoadOps.push_back(ExtendedReminingLoad);
   LoadOpsChain.push_back(SDValue(cast<SDNode>(RemainingLoad), 1));
   EVT ConcatVT =
       EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(),
@@ -24832,19 +24623,6 @@ static SDValue performBRCONDCombine(SDNode *N,
   if (CC != AArch64CC::EQ && CC != AArch64CC::NE)
     return SDValue();
 
-  // Fold away brcond(NE, cmp(csel(1, 0, CC, Cmp), 1)) -> brcond(~CC, Cmp)
-  if (isCMP(Cmp) && CC == AArch64CC::NE && isOneConstant(Cmp.getOperand(1))) {
-    SDValue CSel = Cmp.getOperand(0);
-    auto CSelCC = getCSETCondCode(CSel);
-    if (CSelCC) {
-      SDLoc DL(N);
-      return DAG.getNode(
-          N->getOpcode(), DL, N->getVTList(), Chain, Dest,
-          DAG.getConstant(getInvertedCondCode(*CSelCC), DL, MVT::i32),
-          CSel.getOperand(3));
-    }
-  }
-
   unsigned CmpOpc = Cmp.getOpcode();
   if (CmpOpc != AArch64ISD::ADDS && CmpOpc != AArch64ISD::SUBS)
     return SDValue();
@@ -25766,28 +25544,6 @@ static SDValue performDUPCombine(SDNode *N,
   }
 
   if (N->getOpcode() == AArch64ISD::DUP) {
-    // If the instruction is known to produce a scalar in SIMD registers, we can
-    // duplicate it across the vector lanes using DUPLANE instead of moving it
-    // to a GPR first. For example, this allows us to handle:
-    //   v4i32 = DUP (i32 (FCMGT (f32, f32)))
-    SDValue Op = N->getOperand(0);
-    // FIXME: Ideally, we should be able to handle all instructions that
-    // produce a scalar value in FPRs.
-    if (Op.getOpcode() == AArch64ISD::FCMEQ ||
-        Op.getOpcode() == AArch64ISD::FCMGE ||
-        Op.getOpcode() == AArch64ISD::FCMGT) {
-      EVT ElemVT = VT.getVectorElementType();
-      EVT ExpandedVT = VT;
-      // Insert into a 128-bit vector to match DUPLANE's pattern.
-      if (VT.getSizeInBits() != 128)
-        ExpandedVT = EVT::getVectorVT(*DCI.DAG.getContext(), ElemVT,
-                                      128 / ElemVT.getSizeInBits());
-      SDValue Zero = DCI.DAG.getConstant(0, DL, MVT::i64);
-      SDValue Vec = DCI.DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ExpandedVT,
-                                    DCI.DAG.getUNDEF(ExpandedVT), Op, Zero);
-      return DCI.DAG.getNode(getDUPLANEOp(ElemVT), DL, VT, Vec, Zero);
-    }
-
     if (DCI.isAfterLegalizeDAG()) {
       // If scalar dup's operand is extract_vector_elt, try to combine them into
       // duplane. For example,
@@ -27186,12 +26942,6 @@ bool AArch64TargetLowering::getIndexedAddressParts(SDNode *N, SDNode *Op,
       RHSC = -(uint64_t)RHSC;
     if (!isInt<9>(RHSC))
       return false;
-    // When big-endian VLD1/VST1 are used for vector load and store, and these
-    // only allow an offset that's equal to the store size.
-    EVT MemType = cast<MemSDNode>(N)->getMemoryVT();
-    if (!Subtarget->isLittleEndian() && MemType.isVector() &&
-        (uint64_t)RHSC != MemType.getStoreSize())
-      return false;
     // Always emit pre-inc/post-inc addressing mode. Use negated constant offset
     // when dealing with subtraction.
     Offset = DAG.getConstant(RHSC, SDLoc(N), RHS->getValueType(0));
@@ -27394,11 +27144,11 @@ static void ReplaceReductionResults(SDNode *N,
                                     unsigned AcrossOp) {
   EVT LoVT, HiVT;
   SDValue Lo, Hi;
-  SDLoc DL(N);
+  SDLoc dl(N);
   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
-  SDValue InterVal = DAG.getNode(InterOp, DL, LoVT, Lo, Hi);
-  SDValue SplitVal = DAG.getNode(AcrossOp, DL, LoVT, InterVal);
+  SDValue InterVal = DAG.getNode(InterOp, dl, LoVT, Lo, Hi);
+  SDValue SplitVal = DAG.getNode(AcrossOp, dl, LoVT, InterVal);
   Results.push_back(SplitVal);
 }
 
@@ -27436,50 +27186,19 @@ void AArch64TargetLowering::ReplaceExtractSubVectorResults(
   Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Half));
 }
 
-void AArch64TargetLowering::ReplaceGetActiveLaneMaskResults(
-    SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
-  assert((Subtarget->hasSVE2p1() ||
-          (Subtarget->hasSME2() && Subtarget->isStreaming())) &&
-         "Custom lower of get.active.lane.mask missing required feature.");
-
-  assert(N->getValueType(0) == MVT::nxv32i1 &&
-         "Unexpected result type for get.active.lane.mask");
-
-  SDLoc DL(N);
-  SDValue Idx = N->getOperand(0);
-  SDValue TC = N->getOperand(1);
-
-  assert(Idx.getValueType().getFixedSizeInBits() <= 64 &&
-         "Unexpected operand type for get.active.lane.mask");
-
-  if (Idx.getValueType() != MVT::i64) {
-    Idx = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Idx);
-    TC = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, TC);
-  }
-
-  SDValue ID =
-      DAG.getTargetConstant(Intrinsic::aarch64_sve_whilelo_x2, DL, MVT::i64);
-  EVT HalfVT = N->getValueType(0).getHalfNumVectorElementsVT(*DAG.getContext());
-  auto WideMask =
-      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, {HalfVT, HalfVT}, {ID, Idx, TC});
-
-  Results.push_back(DAG.getNode(ISD::CONCAT_VECTORS, DL, N->getValueType(0),
-                                {WideMask.getValue(0), WideMask.getValue(1)}));
-}
-
 // Create an even/odd pair of X registers holding integer value V.
 static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V) {
-  SDLoc DL(V.getNode());
-  auto [VLo, VHi] = DAG.SplitScalar(V, DL, MVT::i64, MVT::i64);
+  SDLoc dl(V.getNode());
+  auto [VLo, VHi] = DAG.SplitScalar(V, dl, MVT::i64, MVT::i64);
   if (DAG.getDataLayout().isBigEndian())
     std::swap (VLo, VHi);
   SDValue RegClass =
-      DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, DL, MVT::i32);
-  SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, DL, MVT::i32);
-  SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, DL, MVT::i32);
+      DAG.getTargetConstant(AArch64::XSeqPairsClassRegClassID, dl, MVT::i32);
+  SDValue SubReg0 = DAG.getTargetConstant(AArch64::sube64, dl, MVT::i32);
+  SDValue SubReg1 = DAG.getTargetConstant(AArch64::subo64, dl, MVT::i32);
   const SDValue Ops[] = { RegClass, VLo, SubReg0, VHi, SubReg1 };
   return SDValue(
-      DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops), 0);
+      DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
 }
 
 static void ReplaceCMP_SWAP_128Results(SDNode *N,
@@ -27675,13 +27394,13 @@ static void ReplaceATOMIC_LOAD_128Results(SDNode *N,
       getAtomicLoad128Opcode(ISDOpcode, MemOp->getMergedOrdering());
 
   if (ISDOpcode == ISD::ATOMIC_LOAD_AND) {
-    SDLoc DL(Val128);
+    SDLoc dl(Val128);
     Val2x64.first =
-        DAG.getNode(ISD::XOR, DL, MVT::i64,
-                    DAG.getAllOnesConstant(DL, MVT::i64), Val2x64.first);
+        DAG.getNode(ISD::XOR, dl, MVT::i64,
+                    DAG.getAllOnesConstant(dl, MVT::i64), Val2x64.first);
     Val2x64.second =
-        DAG.getNode(ISD::XOR, DL, MVT::i64,
-                    DAG.getAllOnesConstant(DL, MVT::i64), Val2x64.second);
+        DAG.getNode(ISD::XOR, dl, MVT::i64,
+                    DAG.getAllOnesConstant(dl, MVT::i64), Val2x64.second);
   }
 
   SDValue Ops[] = {Val2x64.first, Val2x64.second, Ptr, Chain};
@@ -27853,9 +27572,6 @@ void AArch64TargetLowering::ReplaceNodeResults(
     // CONCAT_VECTORS -- but delegate to common code for result type
     // legalisation
     return;
-  case ISD::GET_ACTIVE_LANE_MASK:
-    ReplaceGetActiveLaneMaskResults(N, Results, DAG);
-    return;
   case ISD::INTRINSIC_WO_CHAIN: {
     EVT VT = N->getValueType(0);
 
@@ -29013,7 +28729,7 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorMStoreToSVE(
 
 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
     SDValue Op, SelectionDAG &DAG) const {
-  SDLoc DL(Op);
+  SDLoc dl(Op);
   EVT VT = Op.getValueType();
   EVT EltVT = VT.getVectorElementType();
 
@@ -29025,14 +28741,14 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
   if (Signed && isPow2Splat(Op.getOperand(1), SplatVal, Negated)) {
     EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
     SDValue Op1 = convertToScalableVector(DAG, ContainerVT, Op.getOperand(0));
-    SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), DL, MVT::i32);
+    SDValue Op2 = DAG.getTargetConstant(Log2_64(SplatVal), dl, MVT::i32);
 
-    SDValue Pg = getPredicateForFixedLengthVector(DAG, DL, VT);
+    SDValue Pg = getPredicateForFixedLengthVector(DAG, dl, VT);
     SDValue Res =
-        DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, DL, ContainerVT, Pg, Op1, Op2);
+        DAG.getNode(AArch64ISD::SRAD_MERGE_OP1, dl, ContainerVT, Pg, Op1, Op2);
     if (Negated)
-      Res = DAG.getNode(ISD::SUB, DL, ContainerVT,
-                        DAG.getConstant(0, DL, ContainerVT), Res);
+      Res = DAG.getNode(ISD::SUB, dl, ContainerVT,
+                        DAG.getConstant(0, dl, ContainerVT), Res);
 
     return convertFromScalableVector(DAG, VT, Res);
   }
@@ -29049,32 +28765,32 @@ SDValue AArch64TargetLowering::LowerFixedLengthVectorIntDivideToSVE(
   // If the wider type is legal: extend, op, and truncate.
   EVT WideVT = VT.widenIntegerVectorElementType(*DAG.getContext());
   if (DAG.getTargetLoweringInfo().isTypeLegal(WideVT)) {
-    SDValue Op0 = DAG.getNode(ExtendOpcode, DL, WideVT, Op.getOperand(0));
-    SDValue Op1 = DAG.getNode(ExtendOpcode, DL, WideVT, Op.getOperand(1));
-    SDValue Div = DAG.getNode(Op.getOpcode(), DL, WideVT, Op0, Op1);
-    return DAG.getNode(ISD::TRUNCATE, DL, VT, Div);
+    SDValue Op0 = DAG.getNode(ExtendOpcode, dl, WideVT, Op.getOperand(0));
+    SDValue Op1 = DAG.getNode(ExtendOpcode, dl, WideVT, Op.getOperand(1));
+    SDValue Div = DAG.getNode(Op.getOpcode(), dl, WideVT, Op0, Op1);
+    return DAG.getNode(ISD::TRUNCATE, dl, VT, Div);
   }
 
-  auto HalveAndExtendVector = [&DAG, &DL, &HalfVT, &PromVT,
+  auto HalveAndExtendVector = [&DAG, &dl, &HalfVT, &PromVT,
                                &ExtendOpcode](SDValue Op) {
-    SDValue IdxZero = DAG.getConstant(0, DL, MVT::i64);
+    SDValue IdxZero = DAG.getConstant(0, dl, MVT::i64);
     SDValue IdxHalf =
-        DAG.getConstant(HalfVT.getVectorNumElements(), DL, MVT::i64);
-    SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op, IdxZero);
-    SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, Op, IdxHalf);
+        DAG.getConstant(HalfVT.getVectorNumElements(), dl, MVT::i64);
+    SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, Op, IdxZero);
+    SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HalfVT, Op, IdxHalf);
     return std::pair<SDValue, SDValue>(
-        {DAG.getNode(ExtendOpcode, DL, PromVT, Lo),
-         DAG.getNode(ExtendOpcode, DL, PromVT, Hi)});
+        {DAG.getNode(ExtendOpcode, dl, PromVT, Lo),
+         DAG.getNode(ExtendOpcode, dl, PromVT, Hi)});
   };
 
   // If wider type is not legal: split, extend, op, trunc and concat.
   auto [Op0LoExt, Op0HiExt] = HalveAndExtendVector(Op.getOperand(0));
   auto [Op1LoExt, Op1HiExt] = HalveAndExtendVector(Op.getOperand(1));
-  SDValue Lo = DAG.getNode(Op.getOpcode(), DL, PromVT, Op0LoExt, Op1LoExt);
-  SDValue Hi = DAG.getNode(Op.getOpcode(), DL, PromVT, Op0HiExt, Op1HiExt);
-  SDValue LoTrunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Lo);
-  SDValue HiTrunc = DAG.getNode(ISD::TRUNCATE, DL, HalfVT, Hi);
-  return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, {LoTrunc, HiTrunc});
+  SDValue Lo = DAG.getNode(Op.getOpcode(), dl, PromVT, Op0LoExt, Op1LoExt);
+  SDValue Hi = DAG.getNode(Op.getOpcode(), dl, PromVT, Op0HiExt, Op1HiExt);
+  SDValue LoTrunc = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Lo);
+  SDValue HiTrunc = DAG.getNode(ISD::TRUNCATE, dl, HalfVT, Hi);
+  return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, {LoTrunc, HiTrunc});
 }
 
 SDValue AArch64TargetLowering::LowerFixedLengthVectorIntExtendToSVE(
@@ -29462,16 +29178,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthConcatVectorsToSVE(
   EVT VT = Op.getValueType();
   EVT SrcVT = SrcOp1.getValueType();
 
-  // Match a splat of 128b segments that fit in a single register.
-  if (SrcVT.is128BitVector() && all_equal(Op.getNode()->op_values())) {
-    EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT);
-    SDValue Splat =
-        DAG.getNode(AArch64ISD::DUPLANE128, DL, ContainerVT,
-                    convertToScalableVector(DAG, ContainerVT, SrcOp1),
-                    DAG.getConstant(0, DL, MVT::i64, /*isTarget=*/true));
-    return convertFromScalableVector(DAG, VT, Splat);
-  }
-
   if (NumOperands > 2) {
     SmallVector<SDValue, 4> Ops;
     EVT PairVT = SrcVT.getDoubleNumVectorElementsVT(*DAG.getContext());
@@ -29594,30 +29300,6 @@ AArch64TargetLowering::LowerVECTOR_DEINTERLEAVE(SDValue Op,
   assert(OpVT.isScalableVector() &&
          "Expected scalable vector in LowerVECTOR_DEINTERLEAVE.");
 
-  // Are multi-register uzp instructions available?
-  if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
-      OpVT.getVectorElementType() != MVT::i1) {
-    Intrinsic::ID IntID;
-    switch (Op->getNumOperands()) {
-    default:
-      return SDValue();
-    case 2:
-      IntID = Intrinsic::aarch64_sve_uzp_x2;
-      break;
-    case 4:
-      if (Subtarget->getMinSVEVectorSizeInBits() < 256 &&
-          OpVT.getScalarSizeInBits() == 64)
-        return SDValue();
-      IntID = Intrinsic::aarch64_sve_uzp_x4;
-      break;
-    }
-
-    SmallVector<SDValue, 5> Ops;
-    Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64));
-    Ops.append(Op->op_values().begin(), Op->op_values().end());
-    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op->getVTList(), Ops);
-  }
-
   if (Op->getNumOperands() != 2)
     return SDValue();
 
@@ -29635,30 +29317,6 @@ SDValue AArch64TargetLowering::LowerVECTOR_INTERLEAVE(SDValue Op,
   assert(OpVT.isScalableVector() &&
          "Expected scalable vector in LowerVECTOR_INTERLEAVE.");
 
-  // Are multi-register zip instructions available?
-  if (Subtarget->hasSME2() && Subtarget->isStreaming() &&
-      OpVT.getVectorElementType() != MVT::i1) {
-    Intrinsic::ID IntID;
-    switch (Op->getNumOperands()) {
-    default:
-      return SDValue();
-    case 2:
-      IntID = Intrinsic::aarch64_sve_zip_x2;
-      break;
-    case 4:
-      if (Subtarget->getMinSVEVectorSizeInBits() < 256 &&
-          OpVT.getScalarSizeInBits() == 64)
-        return SDValue();
-      IntID = Intrinsic::aarch64_sve_zip_x4;
-      break;
-    }
-
-    SmallVector<SDValue, 5> Ops;
-    Ops.push_back(DAG.getTargetConstant(IntID, DL, MVT::i64));
-    Ops.append(Op->op_values().begin(), Op->op_values().end());
-    return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op->getVTList(), Ops);
-  }
-
   if (Op->getNumOperands() != 2)
     return SDValue();
 
@@ -30124,26 +29782,6 @@ SDValue AArch64TargetLowering::LowerFixedLengthVECTOR_SHUFFLEToSVE(
       return convertFromScalableVector(
           DAG, VT, DAG.getNode(Opc, DL, ContainerVT, Op1, Op1));
     }
-
-    if ((Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()) &&
-        Subtarget->isSVEorStreamingSVEAvailable()) {
-      assert(VT.getFixedSizeInBits() % AArch64::SVEBitsPerBlock == 0 &&
-             "Unsupported SVE vector size");
-
-      unsigned Segments = VT.getFixedSizeInBits() / AArch64::SVEBitsPerBlock;
-      unsigned SegmentElts = VT.getVectorNumElements() / Segments;
-      if (std::optional<unsigned> Lane =
-              isDUPQMask(ShuffleMask, Segments, SegmentElts)) {
-        SDValue IID =
-            DAG.getConstant(Intrinsic::aarch64_sve_dup_laneq, DL, MVT::i64);
-        return convertFromScalableVector(
-            DAG, VT,
-            DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
-                        {IID, Op1,
-                         DAG.getConstant(*Lane, DL, MVT::i64,
-                                         /*isTarget=*/true)}));
-      }
-    }
   }
 
   // Try to widen the shuffle before generating a possibly expensive SVE TBL.
@@ -30591,8 +30229,3 @@ bool AArch64TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
 
   return TargetLowering::isTypeDesirableForOp(Opc, VT);
 }
-
-bool AArch64TargetLowering::shouldPreservePtrArith(const Function &F,
-                                                   EVT VT) const {
-  return Subtarget->hasCPA() && UseFEATCPACodegen;
-}



More information about the llvm-commits mailing list