[llvm] Revert "[CodeGen] Update for scalable MemoryType in MMO" (PR #88803)

Mon Apr 15 15:09:23 PDT 2024

https://github.com/hiraditya created https://github.com/llvm/llvm-project/pull/88803

Reverts llvm/llvm-project#70452

Fixes: https://github.com/llvm/llvm-project/issues/88799

>From 5415528397880c89b5408eed6131aa2c752797a1 Mon Sep 17 00:00:00 2001
From: AdityaK <hiraditya at msn.com>
Date: Mon, 15 Apr 2024 15:08:37 -0700
Subject: [PATCH] Revert "[CodeGen] Update for scalable MemoryType in MMO
 (#70452)"

This reverts commit 57146daeaaf366050dc913db910fcc2995a3e06d.
---
 llvm/include/llvm/Analysis/MemoryLocation.h   |  7 +++
 llvm/include/llvm/CodeGen/MachineFunction.h   |  5 +-
 llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp  | 34 ++++---------
 llvm/lib/CodeGen/MachineInstr.cpp             | 37 ++++----------
 llvm/lib/CodeGen/MachineOperand.cpp           | 13 +++--
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 51 +++++++------------
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 22 ++++----
 .../SelectionDAGAddressAnalysis.cpp           |  2 +
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  3 +-
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp  |  5 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   | 22 ++------
 llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll |  6 +--
 .../alloca-load-store-scalable-array.ll       | 36 ++++++-------
 .../alloca-load-store-scalable-struct.ll      | 12 ++---
 .../rvv/alloca-load-store-scalable-array.ll   | 12 ++---
 .../rvv/alloca-load-store-scalable-struct.ll  |  8 +--
 .../RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll |  8 +--
 17 files changed, 118 insertions(+), 165 deletions(-)

diff --git a/llvm/include/llvm/Analysis/MemoryLocation.h b/llvm/include/llvm/Analysis/MemoryLocation.h
index 7d896c44f46795d..830eed5d60ee46b 100644
--- a/llvm/include/llvm/Analysis/MemoryLocation.h
+++ b/llvm/include/llvm/Analysis/MemoryLocation.h
@@ -297,6 +297,13 @@ class MemoryLocation {
     return MemoryLocation(Ptr, LocationSize::beforeOrAfterPointer(), AATags);
   }
 
+  // Return the exact size if the exact size is known at compiletime,
+  // otherwise return LocationSize::beforeOrAfterPointer().
+  static LocationSize getSizeOrUnknown(const TypeSize &T) {
+    return T.isScalable() ? LocationSize::beforeOrAfterPointer()
+                          : LocationSize::precise(T.getFixedValue());
+  }
+
   MemoryLocation() : Ptr(nullptr), Size(LocationSize::beforeOrAfterPointer()) {}
 
   explicit MemoryLocation(const Value *Ptr, LocationSize Size,
diff --git a/llvm/include/llvm/CodeGen/MachineFunction.h b/llvm/include/llvm/CodeGen/MachineFunction.h
index 470997b31fe85fe..a0bc3aa1ed3140b 100644
--- a/llvm/include/llvm/CodeGen/MachineFunction.h
+++ b/llvm/include/llvm/CodeGen/MachineFunction.h
@@ -1060,9 +1060,8 @@ class LLVM_EXTERNAL_VISIBILITY MachineFunction {
                                           int64_t Offset, LocationSize Size) {
     return getMachineMemOperand(
         MMO, Offset,
-        !Size.hasValue() ? LLT()
-        : Size.isScalable()
-            ? LLT::scalable_vector(1, 8 * Size.getValue().getKnownMinValue())
+        !Size.hasValue() || Size.isScalable()
+            ? LLT()
             : LLT::scalar(8 * Size.getValue().getKnownMinValue()));
   }
   MachineMemOperand *getMachineMemOperand(const MachineMemOperand *MMO,
diff --git a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
index fb9656c09ca39de..9fc8ecd60b03ff5 100644
--- a/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LoadStoreOpt.cpp
@@ -128,14 +128,14 @@ bool GISelAddressing::aliasIsKnownForLoadStore(const MachineInstr &MI1,
     // vector objects on the stack.
     // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
     // following situations arise:
-    if (PtrDiff >= 0 && Size1.hasValue() && !Size1.isScalable()) {
+    if (PtrDiff >= 0 && Size1.hasValue()) {
       // [----BasePtr0----]
       //                         [---BasePtr1--]
       // ========PtrDiff========>
       IsAlias = !((int64_t)Size1.getValue() <= PtrDiff);
       return true;
     }
-    if (PtrDiff < 0 && Size2.hasValue() && !Size2.isScalable()) {
+    if (PtrDiff < 0 && Size2.hasValue()) {
       //                     [----BasePtr0----]
       // [---BasePtr1--]
       // =====(-PtrDiff)====>
@@ -248,20 +248,10 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
       return false;
   }
 
-  // If NumBytes is scalable and offset is not 0, conservatively return may
-  // alias
-  if ((MUC0.NumBytes.isScalable() && MUC0.Offset != 0) ||
-      (MUC1.NumBytes.isScalable() && MUC1.Offset != 0))
-    return true;
-
-  const bool BothNotScalable =
-      !MUC0.NumBytes.isScalable() && !MUC1.NumBytes.isScalable();
-
   // Try to prove that there is aliasing, or that there is no aliasing. Either
   // way, we can return now. If nothing can be proved, proceed with more tests.
   bool IsAlias;
-  if (BothNotScalable &&
-      GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
+  if (GISelAddressing::aliasIsKnownForLoadStore(MI, Other, IsAlias, MRI))
     return IsAlias;
 
   // The following all rely on MMO0 and MMO1 being valid.
@@ -277,18 +267,12 @@ bool GISelAddressing::instMayAlias(const MachineInstr &MI,
       Size1.hasValue()) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
-    int64_t Overlap0 =
-        Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
-    int64_t Overlap1 =
-        Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
-    LocationSize Loc0 =
-        Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
-    LocationSize Loc1 =
-        Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
-
-    if (AA->isNoAlias(
-            MemoryLocation(MUC0.MMO->getValue(), Loc0, MUC0.MMO->getAAInfo()),
-            MemoryLocation(MUC1.MMO->getValue(), Loc1, MUC1.MMO->getAAInfo())))
+    int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset;
+    int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset;
+    if (AA->isNoAlias(MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+                                     MUC0.MMO->getAAInfo()),
+                      MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+                                     MUC1.MMO->getAAInfo())))
       return false;
   }
 
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 83604003a038bd7..e74f75ea858577b 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1323,7 +1323,6 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
   LocationSize WidthB = MMOb->getSize();
   bool KnownWidthA = WidthA.hasValue();
   bool KnownWidthB = WidthB.hasValue();
-  bool BothMMONonScalable = !WidthA.isScalable() && !WidthB.isScalable();
 
   const Value *ValA = MMOa->getValue();
   const Value *ValB = MMOb->getValue();
@@ -1339,14 +1338,12 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
       SameVal = true;
   }
 
-  if (SameVal && BothMMONonScalable) {
+  if (SameVal) {
     if (!KnownWidthA || !KnownWidthB)
       return true;
     int64_t MaxOffset = std::max(OffsetA, OffsetB);
-    int64_t LowWidth = (MinOffset == OffsetA)
-                           ? WidthA.getValue().getKnownMinValue()
-                           : WidthB.getValue().getKnownMinValue();
-    return (MinOffset + LowWidth > MaxOffset);
+    LocationSize LowWidth = (MinOffset == OffsetA) ? WidthA : WidthB;
+    return (MinOffset + (int)LowWidth.getValue() > MaxOffset);
   }
 
   if (!AA)
@@ -1358,29 +1355,15 @@ static bool MemOperandsHaveAlias(const MachineFrameInfo &MFI, AAResults *AA,
   assert((OffsetA >= 0) && "Negative MachineMemOperand offset");
   assert((OffsetB >= 0) && "Negative MachineMemOperand offset");
 
-  // If Scalable Location Size has non-zero offset, Width + Offset does not work
-  // at the moment
-  if ((WidthA.isScalable() && OffsetA > 0) ||
-      (WidthB.isScalable() && OffsetB > 0))
-    return true;
-
-  int64_t OverlapA =
-      KnownWidthA ? WidthA.getValue().getKnownMinValue() + OffsetA - MinOffset
-                  : MemoryLocation::UnknownSize;
-  int64_t OverlapB =
-      KnownWidthB ? WidthB.getValue().getKnownMinValue() + OffsetB - MinOffset
-                  : MemoryLocation::UnknownSize;
-
-  LocationSize LocA = (WidthA.isScalable() || !KnownWidthA)
-                          ? WidthA
-                          : LocationSize::precise(OverlapA);
-  LocationSize LocB = (WidthB.isScalable() || !KnownWidthB)
-                          ? WidthB
-                          : LocationSize::precise(OverlapB);
+  int64_t OverlapA = KnownWidthA ? WidthA.getValue() + OffsetA - MinOffset
+                                 : MemoryLocation::UnknownSize;
+  int64_t OverlapB = KnownWidthB ? WidthB.getValue() + OffsetB - MinOffset
+                                 : MemoryLocation::UnknownSize;
 
   return !AA->isNoAlias(
-      MemoryLocation(ValA, LocA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
-      MemoryLocation(ValB, LocB, UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
+      MemoryLocation(ValA, OverlapA, UseTBAA ? MMOa->getAAInfo() : AAMDNodes()),
+      MemoryLocation(ValB, OverlapB,
+                     UseTBAA ? MMOb->getAAInfo() : AAMDNodes()));
 }
 
 bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
diff --git a/llvm/lib/CodeGen/MachineOperand.cpp b/llvm/lib/CodeGen/MachineOperand.cpp
index ace05902d5df79e..937ca539513afd6 100644
--- a/llvm/lib/CodeGen/MachineOperand.cpp
+++ b/llvm/lib/CodeGen/MachineOperand.cpp
@@ -1107,13 +1107,12 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags F,
                                      const MDNode *Ranges, SyncScope::ID SSID,
                                      AtomicOrdering Ordering,
                                      AtomicOrdering FailureOrdering)
-    : MachineMemOperand(
-          ptrinfo, F,
-          !TS.hasValue() ? LLT()
-          : TS.isScalable()
-              ? LLT::scalable_vector(1, 8 * TS.getValue().getKnownMinValue())
-              : LLT::scalar(8 * TS.getValue().getKnownMinValue()),
-          BaseAlignment, AAInfo, Ranges, SSID, Ordering, FailureOrdering) {}
+    : MachineMemOperand(ptrinfo, F,
+                        !TS.hasValue() || TS.isScalable()
+                            ? LLT()
+                            : LLT::scalar(8 * TS.getValue().getKnownMinValue()),
+                        BaseAlignment, AAInfo, Ranges, SSID, Ordering,
+                        FailureOrdering) {}
 
 void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
   // The Value and Offset may differ due to CSE. But the flags and size
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cbba3a294b3d687..14725c3e99b8a6a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24266,7 +24266,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
   // TODO: Use "BaseIndexOffset" to make this more effective.
   SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), Offset, DL);
 
-  LocationSize StoreSize = LocationSize::precise(VT.getStoreSize());
+  LocationSize StoreSize = MemoryLocation::getSizeOrUnknown(VT.getStoreSize());
   MachineFunction &MF = DAG.getMachineFunction();
   MachineMemOperand *MMO;
   if (Offset.isScalable()) {
@@ -27933,10 +27933,14 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
                  : (LSN->getAddressingMode() == ISD::PRE_DEC)
                      ? -1 * C->getSExtValue()
                      : 0;
-      TypeSize Size = LSN->getMemoryVT().getStoreSize();
-      return {LSN->isVolatile(),           LSN->isAtomic(),
-              LSN->getBasePtr(),           Offset /*base offset*/,
-              LocationSize::precise(Size), LSN->getMemOperand()};
+      LocationSize Size =
+          MemoryLocation::getSizeOrUnknown(LSN->getMemoryVT().getStoreSize());
+      return {LSN->isVolatile(),
+              LSN->isAtomic(),
+              LSN->getBasePtr(),
+              Offset /*base offset*/,
+              Size,
+              LSN->getMemOperand()};
     }
     if (const auto *LN = cast<LifetimeSDNode>(N))
       return {false /*isVolatile*/,
@@ -27978,13 +27982,6 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
       return false;
   }
 
-  // If NumBytes is scalable and offset is not 0, conservatively return may
-  // alias
-  if ((MUC0.NumBytes.hasValue() && MUC0.NumBytes.isScalable() &&
-       MUC0.Offset != 0) ||
-      (MUC1.NumBytes.hasValue() && MUC1.NumBytes.isScalable() &&
-       MUC1.Offset != 0))
-    return true;
   // Try to prove that there is aliasing, or that there is no aliasing. Either
   // way, we can return now. If nothing can be proved, proceed with more tests.
   bool IsAlias;
@@ -28015,22 +28012,18 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
   Align OrigAlignment1 = MUC1.MMO->getBaseAlign();
   LocationSize Size0 = MUC0.NumBytes;
   LocationSize Size1 = MUC1.NumBytes;
-
   if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
-      Size0.hasValue() && Size1.hasValue() && !Size0.isScalable() &&
-      !Size1.isScalable() && Size0 == Size1 &&
-      OrigAlignment0 > Size0.getValue().getKnownMinValue() &&
-      SrcValOffset0 % Size0.getValue().getKnownMinValue() == 0 &&
-      SrcValOffset1 % Size1.getValue().getKnownMinValue() == 0) {
+      Size0.hasValue() && Size1.hasValue() && Size0 == Size1 &&
+      OrigAlignment0 > Size0.getValue() &&
+      SrcValOffset0 % Size0.getValue() == 0 &&
+      SrcValOffset1 % Size1.getValue() == 0) {
     int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0.value();
     int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1.value();
 
     // There is no overlap between these relatively aligned accesses of
     // similar size. Return no alias.
-    if ((OffAlign0 + static_cast<int64_t>(
-                         Size0.getValue().getKnownMinValue())) <= OffAlign1 ||
-        (OffAlign1 + static_cast<int64_t>(
-                         Size1.getValue().getKnownMinValue())) <= OffAlign0)
+    if ((OffAlign0 + (int64_t)Size0.getValue()) <= OffAlign1 ||
+        (OffAlign1 + (int64_t)Size1.getValue()) <= OffAlign0)
       return false;
   }
 
@@ -28047,18 +28040,12 @@ bool DAGCombiner::mayAlias(SDNode *Op0, SDNode *Op1) const {
       Size0.hasValue() && Size1.hasValue()) {
     // Use alias analysis information.
     int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
-    int64_t Overlap0 =
-        Size0.getValue().getKnownMinValue() + SrcValOffset0 - MinOffset;
-    int64_t Overlap1 =
-        Size1.getValue().getKnownMinValue() + SrcValOffset1 - MinOffset;
-    LocationSize Loc0 =
-        Size0.isScalable() ? Size0 : LocationSize::precise(Overlap0);
-    LocationSize Loc1 =
-        Size1.isScalable() ? Size1 : LocationSize::precise(Overlap1);
+    int64_t Overlap0 = Size0.getValue() + SrcValOffset0 - MinOffset;
+    int64_t Overlap1 = Size1.getValue() + SrcValOffset1 - MinOffset;
     if (AA->isNoAlias(
-            MemoryLocation(MUC0.MMO->getValue(), Loc0,
+            MemoryLocation(MUC0.MMO->getValue(), Overlap0,
                            UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
-            MemoryLocation(MUC1.MMO->getValue(), Loc1,
+            MemoryLocation(MUC1.MMO->getValue(), Overlap1,
                            UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes())))
       return false;
   }
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ca0a95750ba8d8a..ab59bc96a2553d2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8465,7 +8465,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(
     EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment,
     MachineMemOperand::Flags Flags, LocationSize Size,
     const AAMDNodes &AAInfo) {
-  if (Size.hasValue() && !Size.getValue())
+  if (Size.hasValue() && MemVT.isScalableVector())
+    Size = LocationSize::beforeOrAfterPointer();
+  else if (Size.hasValue() && !Size.getValue())
     Size = LocationSize::precise(MemVT.getStoreSize());
 
   MachineFunction &MF = getMachineFunction();
@@ -8628,7 +8630,7 @@ SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
   if (PtrInfo.V.isNull())
     PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
 
-  LocationSize Size = LocationSize::precise(MemVT.getStoreSize());
+  LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
                                                    Alignment, AAInfo, Ranges);
@@ -8749,7 +8751,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, const SDLoc &dl, SDValue Val,
     PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr);
 
   MachineFunction &MF = getMachineFunction();
-  LocationSize Size = LocationSize::precise(Val.getValueType().getStoreSize());
+  LocationSize Size =
+      MemoryLocation::getSizeOrUnknown(Val.getValueType().getStoreSize());
   MachineMemOperand *MMO =
       MF.getMachineMemOperand(PtrInfo, MMOFlags, Size, Alignment, AAInfo);
   return getStore(Chain, dl, Val, Ptr, MMO);
@@ -8802,8 +8805,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val,
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
-      PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment,
-      AAInfo);
+      PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+      Alignment, AAInfo);
   return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
 }
 
@@ -8897,7 +8900,7 @@ SDValue SelectionDAG::getLoadVP(
   if (PtrInfo.V.isNull())
     PtrInfo = InferPointerInfo(PtrInfo, *this, Ptr, Offset);
 
-  LocationSize Size = LocationSize::precise(MemVT.getStoreSize());
+  LocationSize Size = MemoryLocation::getSizeOrUnknown(MemVT.getStoreSize());
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, MMOFlags, Size,
                                                    Alignment, AAInfo, Ranges);
@@ -9050,8 +9053,8 @@ SDValue SelectionDAG::getTruncStoreVP(SDValue Chain, const SDLoc &dl,
 
   MachineFunction &MF = getMachineFunction();
   MachineMemOperand *MMO = MF.getMachineMemOperand(
-      PtrInfo, MMOFlags, LocationSize::precise(SVT.getStoreSize()), Alignment,
-      AAInfo);
+      PtrInfo, MMOFlags, MemoryLocation::getSizeOrUnknown(SVT.getStoreSize()),
+      Alignment, AAInfo);
   return getTruncStoreVP(Chain, dl, Val, Ptr, Mask, EVL, SVT, MMO,
                          IsCompressing);
 }
@@ -11792,9 +11795,10 @@ MemSDNode::MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl,
   // We check here that the size of the memory operand fits within the size of
   // the MMO. This is because the MMO might indicate only a possible address
   // range instead of specifying the affected memory addresses precisely.
+  // TODO: Make MachineMemOperands aware of scalable vectors.
   assert(
       (!MMO->getType().isValid() ||
-       TypeSize::isKnownLE(memvt.getStoreSize(), MMO->getSize().getValue())) &&
+       memvt.getStoreSize().getKnownMinValue() <= MMO->getSize().getValue()) &&
       "Size mismatch!");
 }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index f2ab88851b780ef..9670c3ac8430eb9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -106,6 +106,8 @@ bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
   int64_t PtrDiff;
   if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
     // If the size of memory access is unknown, do not use it to analysis.
+    // One example of unknown size memory access is to load/store scalable
+    // vector objects on the stack.
     // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
     // following situations arise:
     if (PtrDiff >= 0 && NumBytes0.hasValue() && !NumBytes0.isScalable()) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 3f69f7ad54477e2..50b5ac01135dc81 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4974,8 +4974,7 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
       MachinePointerInfo(AS), MachineMemOperand::MOLoad,
-      LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(),
-      Ranges);
+      LocationSize::beforeOrAfterPointer(), Alignment, I.getAAMetadata(), Ranges);
 
   if (!UniformBase) {
     Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 9518d573bccdd17..d03da07f38cb8b9 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2687,7 +2687,10 @@ bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
     return false;
   // The maximum vscale is 16 under AArch64, return the maximal extent for the
   // vector.
-  Width = LocationSize::precise(WidthN);
+  Width = WidthN.isScalable()
+              ? WidthN.getKnownMinValue() * AArch64::SVEMaxBitsPerVector /
+                    AArch64::SVEBitsPerBlock
+              : WidthN.getKnownMinValue();
   BaseOps.push_back(BaseOp);
   return true;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 27387595164a463..1239a92956ca8ff 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -10402,15 +10402,9 @@ RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
       RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
   if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
       getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
-    MachineMemOperand *MMO = Load->getMemOperand();
-    MachineFunction &MF = DAG.getMachineFunction();
-    MMO = MF.getMachineMemOperand(
-        MMO, MMO->getPointerInfo(),
-        MMO->getMemoryType().isValid()
-            ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits())
-            : MMO->getMemoryType());
     SDValue NewLoad =
-        DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(), MMO);
+        DAG.getLoad(ContainerVT, DL, Load->getChain(), Load->getBasePtr(),
+                    Load->getMemOperand());
     SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
     return DAG.getMergeValues({Result, NewLoad.getValue(1)}, DL);
   }
@@ -10468,17 +10462,9 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
   const auto [MinVLMAX, MaxVLMAX] =
       RISCVTargetLowering::computeVLMAXBounds(ContainerVT, Subtarget);
   if (MinVLMAX == MaxVLMAX && MinVLMAX == VT.getVectorNumElements() &&
-      getLMUL1VT(ContainerVT).bitsLE(ContainerVT)) {
-    MachineMemOperand *MMO = Store->getMemOperand();
-    MachineFunction &MF = DAG.getMachineFunction();
-    MMO = MF.getMachineMemOperand(
-        MMO, MMO->getPointerInfo(),
-        MMO->getMemoryType().isValid()
-            ? LLT::scalable_vector(1, MMO->getMemoryType().getSizeInBits())
-            : MMO->getMemoryType());
+      getLMUL1VT(ContainerVT).bitsLE(ContainerVT))
     return DAG.getStore(Store->getChain(), DL, NewValue, Store->getBasePtr(),
-                        MMO);
-  }
+                        Store->getMemOperand());
 
   SDValue VL = getVLOp(VT.getVectorNumElements(), ContainerVT, DL, DAG,
                        Subtarget);
diff --git a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
index d4d803a91cfa14e..58299696e78fc2d 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-sme2-asm.ll
@@ -3,7 +3,7 @@
 define void @UphPNR(target("aarch64.svcount") %predcnt) {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
-; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
+; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
 ; CHECK:  %1:pnr_p8to15 = COPY %0
 ; CHECK:  INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_p8to15 */, %1
 ; CHECK:  RET_ReallyLR
@@ -17,7 +17,7 @@ entry:
 define void @UpaPNR(target("aarch64.svcount") %predcnt) {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
-; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
+; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
 ; CHECK:  %1:pnr = COPY %0
 ; CHECK:  INLINEASM &"ld1w {z0.s,z1.s,z2.s,z3.s}, $0/z, [x10]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR */, %1
 ; CHECK:  RET_ReallyLR
@@ -31,7 +31,7 @@ entry:
 define void @UplPNR(target("aarch64.svcount") %predcnt) {
 entry:
 ; CHECK:  %0:ppr = COPY $p0
-; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store (<vscale x 1 x s16>) into %ir.predcnt.addr)
+; CHECK:  STR_PXI %0, %stack.0.predcnt.addr, 0 :: (store unknown-size into %ir.predcnt.addr, align 2)
 ; CHECK:  %1:pnr_3b = COPY %0
 ; CHECK:  INLINEASM &"fadd z0.h, $0/m, z0.h, #0.5", 1 /* sideeffect attdialect */, {{[0-9]+}} /* reguse:PNR_3b */, %1
 ; CHECK:  RET_ReallyLR
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
index 7244ac949ab88c3..9a4e01a29ecb6da 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-array.ll
@@ -14,12 +14,12 @@ define void @array_1D(ptr %addr) #0 {
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #2, mul vl]
-; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #2, mul vl]
-; CHECK-NEXT:    st1d { z2.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT:    st1d { z0.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    st1d { z2.d }, p0, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #3
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
@@ -81,18 +81,18 @@ define void @array_2D(ptr %addr) #0 {
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 48 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #5, mul vl]
-; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT:    ld1d { z3.d }, p0/z, [x0, #4, mul vl]
-; CHECK-NEXT:    ld1d { z4.d }, p0/z, [x0, #2, mul vl]
-; CHECK-NEXT:    ld1d { z5.d }, p0/z, [x0, #3, mul vl]
-; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #5, mul vl]
-; CHECK-NEXT:    st1d { z3.d }, p0, [sp, #4, mul vl]
-; CHECK-NEXT:    st1d { z5.d }, p0, [sp, #3, mul vl]
-; CHECK-NEXT:    st1d { z4.d }, p0, [sp, #2, mul vl]
-; CHECK-NEXT:    st1d { z2.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #5, mul vl]
+; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #4, mul vl]
+; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT:    ld1d { z3.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT:    ld1d { z4.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ld1d { z5.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    st1d { z0.d }, p0, [sp, #5, mul vl]
+; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #4, mul vl]
+; CHECK-NEXT:    st1d { z3.d }, p0, [sp, #3, mul vl]
+; CHECK-NEXT:    st1d { z5.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT:    st1d { z4.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    st1d { z2.d }, p0, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #6
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
index f03a6f018d34d0c..7292d52aaf4765a 100644
--- a/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
+++ b/llvm/test/CodeGen/AArch64/alloca-load-store-scalable-struct.ll
@@ -13,12 +13,12 @@ define void @test(ptr %addr) #0 {
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG
 ; CHECK-NEXT:    .cfi_offset w29, -16
 ; CHECK-NEXT:    ptrue p0.d
-; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0]
-; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #2, mul vl]
-; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0, #1, mul vl]
-; CHECK-NEXT:    st1d { z0.d }, p0, [sp]
-; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #2, mul vl]
-; CHECK-NEXT:    st1d { z2.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ld1d { z1.d }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ld1d { z2.d }, p0/z, [x0]
+; CHECK-NEXT:    st1d { z0.d }, p0, [sp, #2, mul vl]
+; CHECK-NEXT:    st1d { z1.d }, p0, [sp, #1, mul vl]
+; CHECK-NEXT:    st1d { z2.d }, p0, [sp]
 ; CHECK-NEXT:    addvl sp, sp, #3
 ; CHECK-NEXT:    ldr x29, [sp], #16 // 8-byte Folded Reload
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
index 1fe91c721f4dd2b..1d025a2f776f825 100644
--- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll
@@ -18,15 +18,15 @@ define void @test(ptr %addr) {
 ; CHECK-NEXT:    add a2, a0, a1
 ; CHECK-NEXT:    vl1re64.v v8, (a2)
 ; CHECK-NEXT:    slli a2, a1, 1
-; CHECK-NEXT:    vl1re64.v v9, (a0)
-; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    add a3, a0, a2
+; CHECK-NEXT:    vl1re64.v v9, (a3)
 ; CHECK-NEXT:    vl1re64.v v10, (a0)
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs1r.v v9, (a0)
 ; CHECK-NEXT:    add a2, a0, a2
-; CHECK-NEXT:    vs1r.v v10, (a2)
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    vs1r.v v8, (a0)
+; CHECK-NEXT:    vs1r.v v9, (a2)
+; CHECK-NEXT:    add a1, a0, a1
+; CHECK-NEXT:    vs1r.v v8, (a1)
+; CHECK-NEXT:    vs1r.v v10, (a0)
 ; CHECK-NEXT:    csrrs a0, vlenb, zero
 ; CHECK-NEXT:    slli a0, a0, 2
 ; CHECK-NEXT:    add sp, sp, a0
diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll
index a9a680d54d58977..64031f8a93598f6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll
@@ -16,13 +16,13 @@ define <vscale x 1 x double> @test(ptr %addr, i64 %vl) {
 ; CHECK-NEXT:    sub sp, sp, a2
 ; CHECK-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb
 ; CHECK-NEXT:    csrrs a2, vlenb, zero
-; CHECK-NEXT:    vl1re64.v v8, (a0)
-; CHECK-NEXT:    add a0, a0, a2
+; CHECK-NEXT:    add a3, a0, a2
+; CHECK-NEXT:    vl1re64.v v8, (a3)
 ; CHECK-NEXT:    vl1re64.v v9, (a0)
 ; CHECK-NEXT:    addi a0, sp, 16
-; CHECK-NEXT:    vs1r.v v8, (a0)
 ; CHECK-NEXT:    add a2, a0, a2
-; CHECK-NEXT:    vs1r.v v9, (a2)
+; CHECK-NEXT:    vs1r.v v8, (a2)
+; CHECK-NEXT:    vs1r.v v9, (a0)
 ; CHECK-NEXT:    vl1re64.v v8, (a2)
 ; CHECK-NEXT:    vl1re64.v v9, (a0)
 ; CHECK-NEXT:    vsetvli zero, a1, e64, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
index 31fd5bdbd31fd74..9c30f28445e36ae 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops-mir.ll
@@ -16,8 +16,8 @@ define void @vpmerge_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $x10
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
   ; CHECK-NEXT:   $v0 = COPY [[COPY1]]
-  ; CHECK-NEXT:   [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 /* tu, mu */ :: (load unknown-size from %ir.p, align 8)
-  ; CHECK-NEXT:   VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store (<vscale x 1 x s64>) into %ir.p)
+  ; CHECK-NEXT:   [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 0 /* tu, mu */
+  ; CHECK-NEXT:   VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store unknown-size into %ir.p, align 8)
   ; CHECK-NEXT:   PseudoRET
   %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
   %b = call <vscale x 2 x i32> @llvm.vp.merge.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)
@@ -35,8 +35,8 @@ define void @vpselect_vpload_store(<vscale x 2 x i32> %passthru, ptr %p, <vscale
   ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr = COPY $x10
   ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:vrnov0 = COPY $v8
   ; CHECK-NEXT:   $v0 = COPY [[COPY1]]
-  ; CHECK-NEXT:   [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 1 /* ta, mu */ :: (load unknown-size from %ir.p, align 8)
-  ; CHECK-NEXT:   VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store (<vscale x 1 x s64>) into %ir.p)
+  ; CHECK-NEXT:   [[PseudoVLE32_V_M1_MASK:%[0-9]+]]:vrnov0 = PseudoVLE32_V_M1_MASK [[COPY3]], [[COPY2]], $v0, [[COPY]], 5 /* e32 */, 1 /* ta, mu */
+  ; CHECK-NEXT:   VS1R_V killed [[PseudoVLE32_V_M1_MASK]], [[COPY2]] :: (store unknown-size into %ir.p, align 8)
   ; CHECK-NEXT:   PseudoRET
   %a = call <vscale x 2 x i32> @llvm.vp.load.nxv2i32.p0(ptr %p, <vscale x 2 x i1> splat (i1 -1), i32 %vl)
   %b = call <vscale x 2 x i32> @llvm.vp.select.nxv2i32(<vscale x 2 x i1> %m, <vscale x 2 x i32> %a, <vscale x 2 x i32> %passthru, i32 %vl)