[llvm] 9949b1a - [GlobalISel][AArch64] Import + select LDRroW and STRroW patterns

Thu Jan 9 12:16:20 PST 2020

Author: Jessica Paquette
Date: 2020-01-09T12:15:56-08:00
New Revision: 9949b1a1753aa0f229c5b55ea01ec96f48164d9e

URL: https://github.com/llvm/llvm-project/commit/9949b1a1753aa0f229c5b55ea01ec96f48164d9e
DIFF: https://github.com/llvm/llvm-project/commit/9949b1a1753aa0f229c5b55ea01ec96f48164d9e.diff

LOG: [GlobalISel][AArch64] Import + select LDR*roW and STR*roW patterns

This adds support for selecting a large chunk of the load/store *roW patterns.

This is pretty much a straight port of AArch64DAGToDAGISel::SelectAddrModeWRO
into GISel. The code is very similar to the XRO code. The main difference is
that in the *roW patterns, we want to try and fold in an extend, and *possibly*
a shift along with it. A good portion of this patch is refactoring the existing
XRO code.

- Add selectAddrModeWRO

- Factor out the code from selectAddrModeShiftedExtendXReg which is used by both
  selectAddrModeXRO and selectAddrModeWRO into selectExtendedSHL.
  This is similar to the function of the same name in AArch64DAGToDAGISel.

- Add support for extends to the factored out code in selectExtendedSHL.

- Teach getExtendTypeForInst how to handle AND masks that are intended to be
  used in loads/stores (necessary for this addressing mode.)

- Make getExtendTypeForInst not static because moving it made an annoying diff
  and I wanted to have the WRO/XRO functions close to each other while I was
  writing the code.

Differential Revision: https://reviews.llvm.org/D72426

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
    llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrFormats.td
    llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4ac52a48b3a1..e9cacbf739ef 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -3131,6 +3131,22 @@ def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>;
 def ro_Windexed64 : ComplexPattern<i64, 4, "SelectAddrModeWRO<64>", []>;
 def ro_Windexed128 : ComplexPattern<i64, 4, "SelectAddrModeWRO<128>", []>;
 
+def gi_ro_Windexed8 :
+    GIComplexOperandMatcher<s64, "selectAddrModeWRO<8>">,
+    GIComplexPatternEquiv<ro_Windexed8>;
+def gi_ro_Windexed16 :
+    GIComplexOperandMatcher<s64, "selectAddrModeWRO<16>">,
+    GIComplexPatternEquiv<ro_Windexed16>;
+def gi_ro_Windexed32 :
+    GIComplexOperandMatcher<s64, "selectAddrModeWRO<32>">,
+    GIComplexPatternEquiv<ro_Windexed32>;
+def gi_ro_Windexed64 :
+    GIComplexOperandMatcher<s64, "selectAddrModeWRO<64>">,
+    GIComplexPatternEquiv<ro_Windexed64>;
+def gi_ro_Windexed128 :
+    GIComplexOperandMatcher<s64, "selectAddrModeWRO<128>">,
+    GIComplexPatternEquiv<ro_Windexed128>;
+
 class MemExtendOperand<string Reg, int Width> : AsmOperandClass {
   let Name = "Mem" # Reg # "Extend" # Width;
   let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">";

diff  --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 193136fe53d8..ad59a95de288 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -206,6 +206,14 @@ class AArch64InstructionSelector : public InstructionSelector {
   ComplexRendererFns
   selectAddrModeShiftedExtendXReg(MachineOperand &Root,
                                   unsigned SizeInBytes) const;
+
+  /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
+  /// or not a shift + extend should be folded into an addressing mode. Returns
+  /// None when this is not profitable or possible.
+  ComplexRendererFns
+  selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
+                    MachineOperand &Offset, unsigned SizeInBytes,
+                    bool WantsExt) const;
   ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
   ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
                                        unsigned SizeInBytes) const;
@@ -214,6 +222,13 @@ class AArch64InstructionSelector : public InstructionSelector {
     return selectAddrModeXRO(Root, Width / 8);
   }
 
+  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
+                                       unsigned SizeInBytes) const;
+  template <int Width>
+  ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
+    return selectAddrModeWRO(Root, Width / 8);
+  }
+
   ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
 
   ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
@@ -228,6 +243,15 @@ class AArch64InstructionSelector : public InstructionSelector {
     return selectShiftedRegister(Root);
   }
 
+  /// Given an extend instruction, determine the correct shift-extend type for
+  /// that instruction.
+  ///
+  /// If the instruction is going to be used in a load or store, pass
+  /// \p IsLoadStore = true.
+  AArch64_AM::ShiftExtendType
+  getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
+                       bool IsLoadStore = false) const;
+
   /// Instructions that accept extend modifiers like UXTW expect the register
   /// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
   /// subregister copy if necessary. Return either ExtReg, or the result of the
@@ -4234,45 +4258,15 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
                 [](MachineInstr &Use) { return Use.mayLoadOrStore(); });
 }
 
-/// This is used for computing addresses like this:
-///
-/// ldr x1, [x2, x3, lsl #3]
-///
-/// Where x2 is the base register, and x3 is an offset register. The shift-left
-/// is a constant value specific to this load instruction. That is, we'll never
-/// see anything other than a 3 here (which corresponds to the size of the
-/// element being loaded.)
 InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
-    MachineOperand &Root, unsigned SizeInBytes) const {
-  if (!Root.isReg())
-    return None;
-  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+AArch64InstructionSelector::selectExtendedSHL(
+    MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
+    unsigned SizeInBytes, bool WantsExt) const {
+  assert(Base.isReg() && "Expected base to be a register operand");
+  assert(Offset.isReg() && "Expected offset to be a register operand");
 
-  // Make sure that the memory op is a valid size.
-  int64_t LegalShiftVal = Log2_32(SizeInBytes);
-  if (LegalShiftVal == 0)
-    return None;
-
-  // We want to find something like this:
-  //
-  // val = G_CONSTANT LegalShiftVal
-  // shift = G_SHL off_reg val
-  // ptr = G_PTR_ADD base_reg shift
-  // x = G_LOAD ptr
-  //
-  // And fold it into this addressing mode:
-  //
-  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
-
-  // Check if we can find the G_PTR_ADD.
-  MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
-  if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
-    return None;
-
-  // Now, try to match an opcode which will match our specific offset.
-  // We want a G_SHL or a G_MUL.
-  MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
+  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+  MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
   if (!OffsetInst)
     return None;
 
@@ -4280,6 +4274,10 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
   if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
     return None;
 
+  // Make sure that the memory op is a valid size.
+  int64_t LegalShiftVal = Log2_32(SizeInBytes);
+  if (LegalShiftVal == 0)
+    return None;
   if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
     return None;
 
@@ -4324,20 +4322,75 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
   if (ImmVal != LegalShiftVal)
     return None;
 
+  unsigned SignExtend = 0;
+  if (WantsExt) {
+    // Check if the offset is defined by an extend.
+    MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
+    auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
+    if (Ext == AArch64_AM::InvalidShiftExtend)
+      return None;
+
+    SignExtend = Ext == AArch64_AM::SXTW;
+
+    // Need a 32-bit wide register here.
+    MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
+    OffsetReg = ExtInst->getOperand(1).getReg();
+    OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
+  }
+
   // We can use the LHS of the GEP as the base, and the LHS of the shift as an
   // offset. Signify that we are shifting by setting the shift flag to 1.
-  return {{[=](MachineInstrBuilder &MIB) {
-             MIB.addUse(Gep->getOperand(1).getReg());
-           },
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
            [=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
            [=](MachineInstrBuilder &MIB) {
              // Need to add both immediates here to make sure that they are both
              // added to the instruction.
-             MIB.addImm(0);
+             MIB.addImm(SignExtend);
              MIB.addImm(1);
            }}};
 }
 
+/// This is used for computing addresses like this:
+///
+/// ldr x1, [x2, x3, lsl #3]
+///
+/// Where x2 is the base register, and x3 is an offset register. The shift-left
+/// is a constant value specific to this load instruction. That is, we'll never
+/// see anything other than a 3 here (which corresponds to the size of the
+/// element being loaded.)
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
+    MachineOperand &Root, unsigned SizeInBytes) const {
+  if (!Root.isReg())
+    return None;
+  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+  // We want to find something like this:
+  //
+  // val = G_CONSTANT LegalShiftVal
+  // shift = G_SHL off_reg val
+  // ptr = G_PTR_ADD base_reg shift
+  // x = G_LOAD ptr
+  //
+  // And fold it into this addressing mode:
+  //
+  // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
+
+  // Check if we can find the G_PTR_ADD.
+  MachineInstr *PtrAdd =
+      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+    return None;
+
+  // Now, try to match an opcode which will match our specific offset.
+  // We want a G_SHL or a G_MUL.
+  MachineInstr *OffsetInst =
+      getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
+  return selectExtendedSHL(Root, PtrAdd->getOperand(1),
+                           OffsetInst->getOperand(0), SizeInBytes,
+                           /*WantsExt=*/false);
+}
+
 /// This is used for computing addresses like this:
 ///
 /// ldr x1, [x2, x3]
@@ -4399,6 +4452,74 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
   return selectAddrModeRegisterOffset(Root);
 }
 
+/// This is used for computing addresses like this:
+///
+/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
+///
+/// Where we have a 64-bit base register, a 32-bit offset register, and an
+/// extend (which may or may not be signed).
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
+                                              unsigned SizeInBytes) const {
+  MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+  MachineInstr *PtrAdd =
+      getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+  if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+    return None;
+
+  MachineOperand &LHS = PtrAdd->getOperand(1);
+  MachineOperand &RHS = PtrAdd->getOperand(2);
+  MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
+
+  // The first case is the same as selectAddrModeXRO, except we need an extend.
+  // In this case, we try to find a shift and extend, and fold them into the
+  // addressing mode.
+  //
+  // E.g.
+  //
+  // off_reg = G_Z/S/ANYEXT ext_reg
+  // val = G_CONSTANT LegalShiftVal
+  // shift = G_SHL off_reg val
+  // ptr = G_PTR_ADD base_reg shift
+  // x = G_LOAD ptr
+  //
+  // In this case we can get a load like this:
+  //
+  // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
+  auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
+                                       SizeInBytes, /*WantsExt=*/true);
+  if (ExtendedShl)
+    return ExtendedShl;
+
+  // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
+  //
+  // e.g.
+  // ldr something, [base_reg, ext_reg, sxtw]
+  if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
+    return None;
+
+  // Check if this is an extend. We'll get an extend type if it is.
+  AArch64_AM::ShiftExtendType Ext =
+      getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
+  if (Ext == AArch64_AM::InvalidShiftExtend)
+    return None;
+
+  // Need a 32-bit wide register.
+  MachineIRBuilder MIB(*PtrAdd);
+  Register ExtReg =
+      narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
+  unsigned SignExtend = Ext == AArch64_AM::SXTW;
+
+  // Base is LHS, offset is ExtReg.
+  return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
+           [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
+           [=](MachineInstrBuilder &MIB) {
+             MIB.addImm(SignExtend);
+             MIB.addImm(0);
+           }}};
+}
+
 /// Select a "register plus unscaled signed 9-bit immediate" address.  This
 /// should only match when there is an offset that is not valid for a scaled
 /// immediate addressing mode.  The "Size" argument is the size in bytes of the
@@ -4561,9 +4682,8 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
            [=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
 }
 
-/// Get the correct ShiftExtendType for an extend instruction.
-static AArch64_AM::ShiftExtendType
-getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
+AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
+    MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
   unsigned Opc = MI.getOpcode();
 
   // Handle explicit extend instructions first.
@@ -4610,9 +4730,9 @@ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
   default:
     return AArch64_AM::InvalidShiftExtend;
   case 0xFF:
-    return AArch64_AM::UXTB;
+    return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
   case 0xFFFF:
-    return AArch64_AM::UXTH;
+    return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
   case 0xFFFFFFFF:
     return AArch64_AM::UXTW;
   }

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
new file mode 100644
index 000000000000..e7c95fbe1063
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
@@ -0,0 +1,431 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+...
+---
+name:            shl_gep_sext_ldrwrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x0
+
+    ; We should be able to fold a shift + extend into the pattern.
+    ; In this case, we should get a roW load with two 1s, representing a shift
+    ; plus sign extend.
+
+    ; CHECK-LABEL: name: shl_gep_sext_ldrwrow
+    ; CHECK: liveins: $w1, $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load 4)
+    ; CHECK: $w0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $w0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 2
+    %offset:gpr(s64) = G_SHL %ext, %c
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+    $w0 = COPY %load(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            shl_gep_zext_ldrwrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x0
+
+    ; We should be able to fold a shift + extend into the pattern.
+    ; In this case, we should get a roW load with a 0 representing a zero-extend
+    ; and a 1 representing a shift.
+
+    ; CHECK-LABEL: name: shl_gep_zext_ldrwrow
+    ; CHECK: liveins: $w1, $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4)
+    ; CHECK: $w0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $w0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_ZEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 2
+    %offset:gpr(s64) = G_SHL %ext, %c
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+    $w0 = COPY %load(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            shl_gep_anyext_ldrwrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x0
+
+    ; We should be able to fold a shift + extend into the pattern.
+    ; In this case, we should get a roW load with a 0 representing a zero-extend
+    ; and a 1 representing a shift.
+
+    ; CHECK-LABEL: name: shl_gep_anyext_ldrwrow
+    ; CHECK: liveins: $w1, $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4)
+    ; CHECK: $w0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $w0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_ANYEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 2
+    %offset:gpr(s64) = G_SHL %ext, %c
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+    $w0 = COPY %load(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            mul_gep_sext_ldrwrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+
+    ; We should be able to do the same with multiplies as with shifts.
+
+    liveins: $w1, $x0
+    ; CHECK-LABEL: name: mul_gep_sext_ldrwrow
+    ; CHECK: liveins: $w1, $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load 4)
+    ; CHECK: $w0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $w0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 4
+    %offset:gpr(s64) = G_MUL %c, %ext
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+    $w0 = COPY %load(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            mul_gep_zext_ldrwrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x0
+
+    ; We should be able to do the same with multiplies as with shifts.
+
+    ; CHECK-LABEL: name: mul_gep_zext_ldrwrow
+    ; CHECK: liveins: $w1, $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4)
+    ; CHECK: $w0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $w0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_ZEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 4
+    %offset:gpr(s64) = G_MUL %c, %ext
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+    $w0 = COPY %load(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            mul_gep_anyext_ldrwrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x0
+
+    ; We should be able to do the same with multiplies as with shifts.
+
+    ; CHECK-LABEL: name: mul_gep_anyext_ldrwrow
+    ; CHECK: liveins: $w1, $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4)
+    ; CHECK: $w0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $w0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_ANYEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 4
+    %offset:gpr(s64) = G_MUL %c, %ext
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+    $w0 = COPY %load(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name:            ldrdrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x0, $d0
+
+    ; Verify that we can select LDRDroW.
+
+    ; CHECK-LABEL: name: ldrdrow
+    ; CHECK: liveins: $w1, $x0, $d0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:fpr64 = LDRDroW %base, %foo, 1, 1 :: (load 8)
+    ; CHECK: $x0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $x0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 8
+    %offset:gpr(s64) = G_MUL %c, %ext
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8)
+    $x0 = COPY %load(<2 x s32>)
+    RET_ReallyLR implicit $x0
+...
+---
+name:            ldrxrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $w1, $x0, $d0
+
+    ; Verify that we can select LDRXroW.
+
+    ; CHECK-LABEL: name: ldrxrow
+    ; CHECK: liveins: $w1, $x0, $d0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:gpr64 = LDRXroW %base, %foo, 1, 1 :: (load 8)
+    ; CHECK: $x0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $x0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 8
+    %offset:gpr(s64) = G_MUL %c, %ext
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8)
+    $x0 = COPY %load(s64)
+    RET_ReallyLR implicit $x0
+...
+---
+name: ldrbbrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $x0, $w0, $w1
+
+    ; Verify that we can select LDRBBroW. Note that there is no shift here,
+    ; but we still fold the extend into the addressing mode.
+
+    ; CHECK-LABEL: name: ldrbbrow
+    ; CHECK: liveins: $x0, $w0, $w1
+    ; CHECK: %val:gpr32 = COPY $w1
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %load:gpr32 = LDRBBroW %base, %val, 1, 0 :: (load 1)
+    ; CHECK: $w0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $w0
+    %val:gpr(s32) = COPY $w1
+    %base:gpr(p0) = COPY $x0
+    %ext:gpr(s64) = G_SEXT %val(s32)
+    %ptr:gpr(p0) = G_PTR_ADD %base, %ext(s64)
+    %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 1)
+    $w0 = COPY %load(s32)
+    RET_ReallyLR implicit $w0
+...
+---
+name: ldrhrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    liveins: $w1, $x0
+
+    ; Verify that we can select ldrhrow.
+
+    ; CHECK-LABEL: name: ldrhrow
+    ; CHECK: liveins: $w1, $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %load:fpr16 = LDRHroW %base, %foo, 1, 1 :: (load 2)
+    ; CHECK: $h0 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $h0
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 2
+    %offset:gpr(s64) = G_MUL %c, %ext
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %load:fpr(s16) = G_LOAD %ptr(p0) :: (load 2)
+    $h0 = COPY %load(s16)
+    RET_ReallyLR implicit $h0
+...
+---
+name:            bad_and_mask_1
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; We should get a roX load here, not a roW load. We can't use the mask in
+    ; this test for an extend.
+
+    ; CHECK-LABEL: name: bad_and_mask_1
+    ; CHECK: liveins: $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %imp:gpr64 = IMPLICIT_DEF
+    ; CHECK: %and:gpr64common = ANDXri %imp, 4103
+    ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load 8)
+    ; CHECK: $x1 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $x1
+    %base:gpr(p0) = COPY $x0
+    %imp:gpr(s64) = G_IMPLICIT_DEF
+    %bad_mask:gpr(s64) = G_CONSTANT i64 255
+    %and:gpr(s64) = G_AND %imp, %bad_mask
+    %c:gpr(s64) = G_CONSTANT i64 8
+    %mul:gpr(s64) = G_MUL %c, %and
+    %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64)
+    %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8)
+    $x1 = COPY %load(s64)
+    RET_ReallyLR implicit $x1
+...
+---
+name:            bad_and_mask_2
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; We should get a roX load here, not a roW load. We can't use the mask in
+    ; this test for an extend.
+
+    ; CHECK-LABEL: name: bad_and_mask_2
+    ; CHECK: liveins: $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %imp:gpr64 = IMPLICIT_DEF
+    ; CHECK: %and:gpr64common = ANDXri %imp, 4111
+    ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load 8)
+    ; CHECK: $x1 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $x1
+    %base:gpr(p0) = COPY $x0
+    %imp:gpr(s64) = G_IMPLICIT_DEF
+    %bad_mask:gpr(s64) = G_CONSTANT i64 65535
+    %and:gpr(s64) = G_AND %imp, %bad_mask
+    %c:gpr(s64) = G_CONSTANT i64 8
+    %mul:gpr(s64) = G_MUL %c, %and
+    %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64)
+    %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8)
+    $x1 = COPY %load(s64)
+    RET_ReallyLR implicit $x1
+...
+---
+name:            and_uxtw
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+body:             |
+  bb.0:
+    liveins: $x0
+
+    ; The mask used for the AND here is legal for producing a roW load.
+
+    ; CHECK-LABEL: name: and_uxtw
+    ; CHECK: liveins: $x0
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %imp:gpr64 = IMPLICIT_DEF
+    ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %imp
+    ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+    ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+    ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY2]], 0, 1 :: (load 8)
+    ; CHECK: $x1 = COPY %load
+    ; CHECK: RET_ReallyLR implicit $x1
+    %base:gpr(p0) = COPY $x0
+    %imp:gpr(s64) = G_IMPLICIT_DEF
+    %mask:gpr(s64) = G_CONSTANT i64 4294967295
+    %and:gpr(s64) = G_AND %imp, %mask
+    %c:gpr(s64) = G_CONSTANT i64 8
+    %mul:gpr(s64) = G_MUL %c, %and
+    %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64)
+    %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8)
+    $x1 = COPY %load(s64)
+    RET_ReallyLR implicit $x1
+...

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir
new file mode 100644
index 000000000000..41fcb6204726
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir
@@ -0,0 +1,52 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+---
+name:            strwrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $w2
+    ; CHECK-LABEL: name: strwrow
+    ; CHECK: liveins: $x0, $x1, $w2
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %dst:gpr32 = COPY $w2
+    ; CHECK: STRWroW %dst, %base, %foo, 1, 1 :: (store 4)
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 2
+    %offset:gpr(s64) = G_SHL %ext, %c
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %dst:gpr(s32) = COPY $w2
+    G_STORE %dst, %ptr :: (store 4)
+...
+---
+name:            strxrow
+alignment:       4
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $x0, $x1, $x2
+    ; CHECK-LABEL: name: strxrow
+    ; CHECK: liveins: $x0, $x1, $x2
+    ; CHECK: %base:gpr64sp = COPY $x0
+    ; CHECK: %foo:gpr32 = COPY $w1
+    ; CHECK: %dst:gpr64 = COPY $x2
+    ; CHECK: STRXroW %dst, %base, %foo, 1, 1 :: (store 8)
+    %base:gpr(p0) = COPY $x0
+    %foo:gpr(s32) = COPY $w1
+    %ext:gpr(s64) = G_SEXT %foo(s32)
+    %c:gpr(s64) = G_CONSTANT i64 3
+    %offset:gpr(s64) = G_SHL %ext, %c
+    %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+    %dst:gpr(s64) = COPY $x2
+    G_STORE %dst, %ptr :: (store 8)
+...


        


[llvm] 9949b1a - [GlobalISel][AArch64] Import + select LDR*roW and STR*roW patterns

[llvm] 9949b1a - [GlobalISel][AArch64] Import + select LDRroW and STRroW patterns