[llvm] 0ed2c04 - [AMDGPU/MemOpsCluster] Let mem ops clustering logic also consider number of clustered bytes

Mon Jun 1 10:23:00 PDT 2020

Author: hsmahesha
Date: 2020-06-01T22:52:34+05:30
New Revision: 0ed2c046362e2248eaf3d81e235115b28d4af262

URL: https://github.com/llvm/llvm-project/commit/0ed2c046362e2248eaf3d81e235115b28d4af262
DIFF: https://github.com/llvm/llvm-project/commit/0ed2c046362e2248eaf3d81e235115b28d4af262.diff

LOG: [AMDGPU/MemOpsCluster] Let mem ops clustering logic also consider number of clustered bytes

Summary:
While clustering mem ops, AMDGPU target needs to consider number of clustered bytes
to decide on max number of mem ops that can be clustered. This patch adds support to pass
number of clustered bytes to target mem ops clustering logic.

Reviewers: foad, rampitec, arsenm, vpykhtin, javedabsar

Reviewed By: foad

Subscribers: MatzeB, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, javed.absar, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D80545

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/TargetInstrInfo.h
    llvm/lib/CodeGen/MachineScheduler.cpp
    llvm/lib/CodeGen/TargetInstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.h
    llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.h
    llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
    llvm/lib/Target/Hexagon/HexagonInstrInfo.h
    llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
    llvm/lib/Target/Lanai/LanaiInstrInfo.h
    llvm/lib/Target/X86/X86InstrInfo.cpp
    llvm/lib/Target/X86/X86InstrInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 709030b62076..b3b2fa218627 100644

--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1254,11 +1254,10 @@ class TargetInstrInfo : public MCInstrInfo {
   /// It returns false if no base operands and offset was found.
   /// It is not guaranteed to always recognize base operands and offsets in all
   /// cases.
-  virtual bool
-  getMemOperandsWithOffset(const MachineInstr &MI,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const {
+  virtual bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
+      int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const {
     return false;
   }
 
@@ -1286,9 +1285,11 @@ class TargetInstrInfo : public MCInstrInfo {
   /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
   /// \p NumLoads is the number of loads that will be in the cluster if this
   /// hook returns true.
+  /// \p NumBytes is the number of bytes that will be loaded from all the
+  /// clustered loads if this hook returns true.
   virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                                    ArrayRef<const MachineOperand *> BaseOps2,
-                                   unsigned NumLoads) const {
+                                   unsigned NumLoads, unsigned NumBytes) const {
     llvm_unreachable("target did not implement shouldClusterMemOps()");
   }
 

diff  --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 92fd3edf9236..a68899191374 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1473,10 +1473,12 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
     SUnit *SU;
     SmallVector<const MachineOperand *, 4> BaseOps;
     int64_t Offset;
+    unsigned Width;
 
     MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
-              int64_t Offset)
-        : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset) {}
+              int64_t Offset, unsigned Width)
+        : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
+          Width(Width) {}
 
     static bool Compare(const MachineOperand *const &A,
                         const MachineOperand *const &B) {
@@ -1565,12 +1567,14 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
     ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
   SmallVector<MemOpInfo, 32> MemOpRecords;
   for (SUnit *SU : MemOps) {
+    const MachineInstr &MI = *SU->getInstr();
     SmallVector<const MachineOperand *, 4> BaseOps;
     int64_t Offset;
     bool OffsetIsScalable;
-    if (TII->getMemOperandsWithOffset(*SU->getInstr(), BaseOps, Offset,
-                                      OffsetIsScalable, TRI))
-      MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset));
+    unsigned Width;
+    if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
+                                           OffsetIsScalable, Width, TRI))
+      MemOpRecords.push_back(MemOpInfo(SU, BaseOps, Offset, Width));
 #ifndef NDEBUG
     for (auto *Op : BaseOps)
       assert(Op);
@@ -1584,16 +1588,19 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
   // At this point, `MemOpRecords` array must hold atleast two mem ops. Try to
   // cluster mem ops collected within `MemOpRecords` array.
   unsigned ClusterLength = 1;
+  unsigned CurrentClusterBytes = MemOpRecords[0].Width;
   for (unsigned Idx = 0, End = MemOpRecords.size(); Idx < (End - 1); ++Idx) {
     // Decision to cluster mem ops is taken based on target dependent logic
     auto MemOpa = MemOpRecords[Idx];
     auto MemOpb = MemOpRecords[Idx + 1];
     ++ClusterLength;
-    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps,
-                                  ClusterLength)) {
+    CurrentClusterBytes += MemOpb.Width;
+    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
+                                  CurrentClusterBytes)) {
       // Current mem ops pair could not be clustered, reset cluster length, and
       // go to next pair
       ClusterLength = 1;
+      CurrentClusterBytes = MemOpb.Width;
       continue;
     }
 
@@ -1605,6 +1612,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
     // FIXME: Is this check really required?
     if (!DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) {
       ClusterLength = 1;
+      CurrentClusterBytes = MemOpb.Width;
       continue;
     }
 

diff  --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 54913175f167..228e3c1051ab 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1037,7 +1037,9 @@ bool TargetInstrInfo::getMemOperandWithOffset(
     const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset,
     bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
   SmallVector<const MachineOperand *, 4> BaseOps;
-  if (!getMemOperandsWithOffset(MI, BaseOps, Offset, OffsetIsScalable, TRI) ||
+  unsigned Width;
+  if (!getMemOperandsWithOffsetWidth(MI, BaseOps, Offset, OffsetIsScalable,
+                                     Width, TRI) ||
       BaseOps.size() != 1)
     return false;
   BaseOp = BaseOps.front();

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 801f162937ed..ed8c5f6ce879 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2033,15 +2033,14 @@ bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
   return true;
 }
 
-bool AArch64InstrInfo::getMemOperandsWithOffset(
+bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
-    const {
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
   if (!LdSt.mayLoadOrStore())
     return false;
 
   const MachineOperand *BaseOp;
-  unsigned Width;
   if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
                                     Width, TRI))
     return false;
@@ -2513,7 +2512,8 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
 /// Only called for LdSt for which getMemOperandWithOffset returns true.
 bool AArch64InstrInfo::shouldClusterMemOps(
     ArrayRef<const MachineOperand *> BaseOps1,
-    ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads) const {
+    ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+    unsigned NumBytes) const {
   assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
   const MachineOperand &BaseOp1 = *BaseOps1.front();
   const MachineOperand &BaseOp2 = *BaseOps2.front();

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 13ef9845ae3d..e05b1837edc3 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -113,10 +113,10 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
   /// Hint that pairing the given load or store is unprofitable.
   static void suppressLdStPair(MachineInstr &MI);
 
-  bool getMemOperandsWithOffset(
+  bool getMemOperandsWithOffsetWidth(
       const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
-      int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
-      const override;
+      int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const override;
 
   /// If \p OffsetIsScalable is set to 'true', the offset is scaled by `vscale`.
   /// This is true for some SVE instructions like ldr/str that have a
@@ -140,7 +140,7 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
 
   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                            ArrayRef<const MachineOperand *> BaseOps2,
-                           unsigned NumLoads) const override;
+                           unsigned NumLoads, unsigned NumBytes) const override;
 
   void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
                         const DebugLoc &DL, MCRegister DestReg,

diff  --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 4c8405120548..35c49ae8c0dd 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -146,10 +146,11 @@ class SIInsertHardClauses : public MachineFunctionPass {
 
         int64_t Dummy1;
         bool Dummy2;
+        unsigned Dummy3;
         SmallVector<const MachineOperand *, 4> BaseOps;
         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
-          if (!SII->getMemOperandsWithOffset(MI, BaseOps, Dummy1, Dummy2,
-                                             TRI)) {
+          if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
+                                                  Dummy3, TRI)) {
             // We failed to get the base operands, so we'll never clause this
             // instruction with any other, so pretend it's illegal.
             Type = HARDCLAUSE_ILLEGAL;
@@ -164,7 +165,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
               // scheduler it limits the size of the cluster to avoid increasing
               // register pressure too much, but this pass runs after register
               // allocation so there is no need for that kind of limit.
-              !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2)))) {
+              !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
           // Finish the current clause.
           Changed |= emitClause(CI, SII);
           CI = ClauseInfo();

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d7508c3b1ea2..5feed9d53bc7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -264,16 +264,27 @@ static bool isStride64(unsigned Opc) {
   }
 }
 
-bool SIInstrInfo::getMemOperandsWithOffset(
+unsigned SIInstrInfo::getOperandSizeInBytes(const MachineInstr &LdSt,
+                                            const MachineOperand *MOp) const {
+  assert(MOp && "Unexpected null machine operand!");
+  const MachineRegisterInfo &MRI = LdSt.getParent()->getParent()->getRegInfo();
+  const Register Reg = MOp->getReg();
+  const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg)
+                                         ? MRI.getRegClass(Reg)
+                                         : RI.getPhysRegClass(Reg);
+  return (RI.getRegSizeInBits(*DstRC) / 8);
+}
+
+bool SIInstrInfo::getMemOperandsWithOffsetWidth(
     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI)
-    const {
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
   if (!LdSt.mayLoadOrStore())
     return false;
 
   unsigned Opc = LdSt.getOpcode();
   OffsetIsScalable = false;
-  const MachineOperand *BaseOp, *OffsetOp;
+  const MachineOperand *BaseOp, *OffsetOp, *MOp;
 
   if (isDS(LdSt)) {
     BaseOp = getNamedOperand(LdSt, AMDGPU::OpName::addr);
@@ -287,6 +298,11 @@ bool SIInstrInfo::getMemOperandsWithOffset(
       }
       BaseOps.push_back(BaseOp);
       Offset = OffsetOp->getImm();
+      // Get appropriate operand, and compute width accordingly.
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+      if (!MOp)
+        MOp = getNamedOperand(LdSt, AMDGPU::OpName::data0);
+      Width = getOperandSizeInBytes(LdSt, MOp);
     } else {
       // The 2 offset instructions use offset0 and offset1 instead. We can treat
       // these as a load with a single offset if the 2 offsets are consecutive.
@@ -318,6 +334,16 @@ bool SIInstrInfo::getMemOperandsWithOffset(
 
       BaseOps.push_back(BaseOp);
       Offset = EltSize * Offset0;
+      // Get appropriate operand(s), and compute width accordingly.
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+      if (!MOp) {
+        MOp = getNamedOperand(LdSt, AMDGPU::OpName::data0);
+        Width = getOperandSizeInBytes(LdSt, MOp);
+        MOp = getNamedOperand(LdSt, AMDGPU::OpName::data1);
+        Width += getOperandSizeInBytes(LdSt, MOp);
+      } else {
+        Width = getOperandSizeInBytes(LdSt, MOp);
+      }
     }
     return true;
   }
@@ -342,6 +368,11 @@ bool SIInstrInfo::getMemOperandsWithOffset(
       BaseOps.push_back(RSrc);
       BaseOps.push_back(SOffset);
       Offset = OffsetImm->getImm();
+      // Get appropriate operand, and compute width accordingly.
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+      if (!MOp)
+        MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+      Width = getOperandSizeInBytes(LdSt, MOp);
       return true;
     }
 
@@ -359,6 +390,11 @@ bool SIInstrInfo::getMemOperandsWithOffset(
     Offset = OffsetImm->getImm();
     if (SOffset) // soffset can be an inline immediate.
       Offset += SOffset->getImm();
+    // Get appropriate operand, and compute width accordingly.
+    MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+    if (!MOp)
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+    Width = getOperandSizeInBytes(LdSt, MOp);
     return true;
   }
 
@@ -369,6 +405,9 @@ bool SIInstrInfo::getMemOperandsWithOffset(
     BaseOps.push_back(BaseOp);
     OffsetOp = getNamedOperand(LdSt, AMDGPU::OpName::offset);
     Offset = OffsetOp ? OffsetOp->getImm() : 0;
+    // Get appropriate operand, and compute width accordingly.
+    MOp = getNamedOperand(LdSt, AMDGPU::OpName::sdst);
+    Width = getOperandSizeInBytes(LdSt, MOp);
     return true;
   }
 
@@ -381,6 +420,11 @@ bool SIInstrInfo::getMemOperandsWithOffset(
     if (BaseOp)
       BaseOps.push_back(BaseOp);
     Offset = getNamedOperand(LdSt, AMDGPU::OpName::offset)->getImm();
+    // Get appropriate operand, and compute width accordingly.
+    MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdst);
+    if (!MOp)
+      MOp = getNamedOperand(LdSt, AMDGPU::OpName::vdata);
+    Width = getOperandSizeInBytes(LdSt, MOp);
     return true;
   }
 
@@ -430,7 +474,8 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
 
 bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                                       ArrayRef<const MachineOperand *> BaseOps2,
-                                      unsigned NumLoads) const {
+                                      unsigned NumLoads,
+                                      unsigned NumBytes) const {
   assert(!BaseOps1.empty() && !BaseOps2.empty());
   const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
   const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
@@ -2730,9 +2775,12 @@ bool SIInstrInfo::checkInstOffsetsDoNotOverlap(const MachineInstr &MIa,
                                                const MachineInstr &MIb) const {
   SmallVector<const MachineOperand *, 4> BaseOps0, BaseOps1;
   int64_t Offset0, Offset1;
+  unsigned Dummy0, Dummy1;
   bool Offset0IsScalable, Offset1IsScalable;
-  if (!getMemOperandsWithOffset(MIa, BaseOps0, Offset0, Offset0IsScalable, &RI) ||
-      !getMemOperandsWithOffset(MIb, BaseOps1, Offset1, Offset1IsScalable, &RI))
+  if (!getMemOperandsWithOffsetWidth(MIa, BaseOps0, Offset0, Offset0IsScalable,
+                                     Dummy0, &RI) ||
+      !getMemOperandsWithOffsetWidth(MIb, BaseOps1, Offset1, Offset1IsScalable,
+                                     Dummy1, &RI))
     return false;
 
   if (!memOpsHaveSameBaseOperands(BaseOps0, BaseOps1))

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index 8231a96f5f6b..c6d0349d3575 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -181,15 +181,18 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
                                int64_t &Offset1,
                                int64_t &Offset2) const override;
 
-  bool
-  getMemOperandsWithOffset(const MachineInstr &LdSt,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const final;
+  unsigned getOperandSizeInBytes(const MachineInstr &LdSt,
+                                 const MachineOperand *MOp) const;
+
+  bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const final;
 
   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                            ArrayRef<const MachineOperand *> BaseOps2,
-                           unsigned NumLoads) const override;
+                           unsigned NumLoads, unsigned NumBytes) const override;
 
   bool shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1, int64_t Offset0,
                                int64_t Offset1, unsigned NumLoads) const override;

diff  --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 0bfb28b935c3..64922d30c415 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -2963,12 +2963,12 @@ bool HexagonInstrInfo::addLatencyToSchedule(const MachineInstr &MI1,
 }
 
 /// Get the base register and byte offset of a load/store instr.
-bool HexagonInstrInfo::getMemOperandsWithOffset(
+bool HexagonInstrInfo::getMemOperandsWithOffsetWidth(
     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
-  unsigned AccessSize = 0;
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
   OffsetIsScalable = false;
-  const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, AccessSize);
+  const MachineOperand *BaseOp = getBaseAndOffset(LdSt, Offset, Width);
   if (!BaseOp || !BaseOp->isReg())
     return false;
   BaseOps.push_back(BaseOp);

diff  --git a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
index d8998d265477..847b9a672891 100644
--- a/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
+++ b/llvm/lib/Target/Hexagon/HexagonInstrInfo.h
@@ -204,11 +204,11 @@ class HexagonInstrInfo : public HexagonGenInstrInfo {
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
   /// Get the base register and byte offset of a load/store instr.
-  bool
-  getMemOperandsWithOffset(const MachineInstr &LdSt,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const override;
 
   /// Reverses the branch condition of the specified condition list,
   /// returning false on success and true if it cannot be reversed.

diff  --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
index 8afd27d53469..c82142970357 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.cpp
@@ -795,9 +795,10 @@ bool LanaiInstrInfo::getMemOperandWithOffsetWidth(
   return true;
 }
 
-bool LanaiInstrInfo::getMemOperandsWithOffset(
+bool LanaiInstrInfo::getMemOperandsWithOffsetWidth(
     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
   switch (LdSt.getOpcode()) {
   default:
     return false;
@@ -811,7 +812,6 @@ bool LanaiInstrInfo::getMemOperandsWithOffset(
   case Lanai::LDBs_RI:
   case Lanai::LDBz_RI:
     const MachineOperand *BaseOp;
-    unsigned Width;
     OffsetIsScalable = false;
     if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))
       return false;

diff  --git a/llvm/lib/Target/Lanai/LanaiInstrInfo.h b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
index 216dbfb63acc..44c1e629a8e6 100644
--- a/llvm/lib/Target/Lanai/LanaiInstrInfo.h
+++ b/llvm/lib/Target/Lanai/LanaiInstrInfo.h
@@ -67,11 +67,11 @@ class LanaiInstrInfo : public LanaiGenInstrInfo {
 
   bool expandPostRAPseudo(MachineInstr &MI) const override;
 
-  bool
-  getMemOperandsWithOffset(const MachineInstr &LdSt,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const override;
 
   bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
                                     const MachineOperand *&BaseOp,

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index c8939e348a70..46ff62f7a4ed 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3662,9 +3662,10 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg,
   }
 }
 
-bool X86InstrInfo::getMemOperandsWithOffset(
+bool X86InstrInfo::getMemOperandsWithOffsetWidth(
     const MachineInstr &MemOp, SmallVectorImpl<const MachineOperand *> &BaseOps,
-    int64_t &Offset, bool &OffsetIsScalable, const TargetRegisterInfo *TRI) const {
+    int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
+    const TargetRegisterInfo *TRI) const {
   const MCInstrDesc &Desc = MemOp.getDesc();
   int MemRefBegin = X86II::getMemoryOperandNo(Desc.TSFlags);
   if (MemRefBegin < 0)
@@ -3696,6 +3697,11 @@ bool X86InstrInfo::getMemOperandsWithOffset(
     return false;
 
   OffsetIsScalable = false;
+  // FIXME: Relying on memoperands() may not be right thing to do here. Check
+  // with X86 maintainers, and fix it accordingly. For now, it is ok, since
+  // there is no use of `Width` for X86 back-end at the moment.
+  Width =
+      !MemOp.memoperands_empty() ? MemOp.memoperands().front()->getSize() : 0;
   BaseOps.push_back(BaseOp);
   return true;
 }

diff  --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index fe79073ae370..89f2ff118c37 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -317,11 +317,11 @@ class X86InstrInfo final : public X86GenInstrInfo {
                      SmallVectorImpl<MachineOperand> &Cond,
                      bool AllowModify) const override;
 
-  bool
-  getMemOperandsWithOffset(const MachineInstr &LdSt,
-                           SmallVectorImpl<const MachineOperand *> &BaseOps,
-                           int64_t &Offset, bool &OffsetIsScalable,
-                           const TargetRegisterInfo *TRI) const override;
+  bool getMemOperandsWithOffsetWidth(
+      const MachineInstr &LdSt,
+      SmallVectorImpl<const MachineOperand *> &BaseOps, int64_t &Offset,
+      bool &OffsetIsScalable, unsigned &Width,
+      const TargetRegisterInfo *TRI) const override;
   bool analyzeBranchPredicate(MachineBasicBlock &MBB,
                               TargetInstrInfo::MachineBranchPredicate &MBP,
                               bool AllowModify = false) const override;