[llvm] 9949b1a - [GlobalISel][AArch64] Import + select LDR*roW and STR*roW patterns
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 9 12:16:20 PST 2020
Author: Jessica Paquette
Date: 2020-01-09T12:15:56-08:00
New Revision: 9949b1a1753aa0f229c5b55ea01ec96f48164d9e
URL: https://github.com/llvm/llvm-project/commit/9949b1a1753aa0f229c5b55ea01ec96f48164d9e
DIFF: https://github.com/llvm/llvm-project/commit/9949b1a1753aa0f229c5b55ea01ec96f48164d9e.diff
LOG: [GlobalISel][AArch64] Import + select LDR*roW and STR*roW patterns
This adds support for selecting a large chunk of the load/store *roW patterns.
This is pretty much a straight port of AArch64DAGToDAGISel::SelectAddrModeWRO
into GISel. The code is very similar to the XRO code. The main difference is
that in the *roW patterns, we want to try and fold in an extend, and *possibly*
a shift along with it. A good portion of this patch is refactoring the existing
XRO code.
- Add selectAddrModeWRO
- Factor out the code from selectAddrModeShiftedExtendXReg which is used by both
selectAddrModeXRO and selectAddrModeWRO into selectExtendedSHL.
This is similar to the function of the same name in AArch64DAGToDAGISel.
- Add support for extends to the factored out code in selectExtendedSHL.
- Teach getExtendTypeForInst how to handle AND masks that are intended to be
used in loads/stores (necessary for this addressing mode.)
- Make getExtendTypeForInst not static because moving it made an annoying diff
and I wanted to have the WRO/XRO functions close to each other while I was
writing the code.
Differential Revision: https://reviews.llvm.org/D72426
Added:
llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir
Modified:
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 4ac52a48b3a1..e9cacbf739ef 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -3131,6 +3131,22 @@ def ro_Windexed32 : ComplexPattern<i64, 4, "SelectAddrModeWRO<32>", []>;
def ro_Windexed64 : ComplexPattern<i64, 4, "SelectAddrModeWRO<64>", []>;
def ro_Windexed128 : ComplexPattern<i64, 4, "SelectAddrModeWRO<128>", []>;
+def gi_ro_Windexed8 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<8>">,
+ GIComplexPatternEquiv<ro_Windexed8>;
+def gi_ro_Windexed16 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<16>">,
+ GIComplexPatternEquiv<ro_Windexed16>;
+def gi_ro_Windexed32 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<32>">,
+ GIComplexPatternEquiv<ro_Windexed32>;
+def gi_ro_Windexed64 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<64>">,
+ GIComplexPatternEquiv<ro_Windexed64>;
+def gi_ro_Windexed128 :
+ GIComplexOperandMatcher<s64, "selectAddrModeWRO<128>">,
+ GIComplexPatternEquiv<ro_Windexed128>;
+
class MemExtendOperand<string Reg, int Width> : AsmOperandClass {
let Name = "Mem" # Reg # "Extend" # Width;
let PredicateMethod = "isMem" # Reg # "Extend<" # Width # ">";
diff --git a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 193136fe53d8..ad59a95de288 100644
--- a/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -206,6 +206,14 @@ class AArch64InstructionSelector : public InstructionSelector {
ComplexRendererFns
selectAddrModeShiftedExtendXReg(MachineOperand &Root,
unsigned SizeInBytes) const;
+
+ /// Returns a \p ComplexRendererFns which contains a base, offset, and whether
+ /// or not a shift + extend should be folded into an addressing mode. Returns
+ /// None when this is not profitable or possible.
+ ComplexRendererFns
+ selectExtendedSHL(MachineOperand &Root, MachineOperand &Base,
+ MachineOperand &Offset, unsigned SizeInBytes,
+ bool WantsExt) const;
ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
ComplexRendererFns selectAddrModeXRO(MachineOperand &Root,
unsigned SizeInBytes) const;
@@ -214,6 +222,13 @@ class AArch64InstructionSelector : public InstructionSelector {
return selectAddrModeXRO(Root, Width / 8);
}
+ ComplexRendererFns selectAddrModeWRO(MachineOperand &Root,
+ unsigned SizeInBytes) const;
+ template <int Width>
+ ComplexRendererFns selectAddrModeWRO(MachineOperand &Root) const {
+ return selectAddrModeWRO(Root, Width / 8);
+ }
+
ComplexRendererFns selectShiftedRegister(MachineOperand &Root) const;
ComplexRendererFns selectArithShiftedRegister(MachineOperand &Root) const {
@@ -228,6 +243,15 @@ class AArch64InstructionSelector : public InstructionSelector {
return selectShiftedRegister(Root);
}
+ /// Given an extend instruction, determine the correct shift-extend type for
+ /// that instruction.
+ ///
+ /// If the instruction is going to be used in a load or store, pass
+ /// \p IsLoadStore = true.
+ AArch64_AM::ShiftExtendType
+ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI,
+ bool IsLoadStore = false) const;
+
/// Instructions that accept extend modifiers like UXTW expect the register
/// being extended to be a GPR32. Narrow ExtReg to a 32-bit register using a
/// subregister copy if necessary. Return either ExtReg, or the result of the
@@ -4234,45 +4258,15 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg(
[](MachineInstr &Use) { return Use.mayLoadOrStore(); });
}
-/// This is used for computing addresses like this:
-///
-/// ldr x1, [x2, x3, lsl #3]
-///
-/// Where x2 is the base register, and x3 is an offset register. The shift-left
-/// is a constant value specific to this load instruction. That is, we'll never
-/// see anything other than a 3 here (which corresponds to the size of the
-/// element being loaded.)
InstructionSelector::ComplexRendererFns
-AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
- MachineOperand &Root, unsigned SizeInBytes) const {
- if (!Root.isReg())
- return None;
- MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+AArch64InstructionSelector::selectExtendedSHL(
+ MachineOperand &Root, MachineOperand &Base, MachineOperand &Offset,
+ unsigned SizeInBytes, bool WantsExt) const {
+ assert(Base.isReg() && "Expected base to be a register operand");
+ assert(Offset.isReg() && "Expected offset to be a register operand");
- // Make sure that the memory op is a valid size.
- int64_t LegalShiftVal = Log2_32(SizeInBytes);
- if (LegalShiftVal == 0)
- return None;
-
- // We want to find something like this:
- //
- // val = G_CONSTANT LegalShiftVal
- // shift = G_SHL off_reg val
- // ptr = G_PTR_ADD base_reg shift
- // x = G_LOAD ptr
- //
- // And fold it into this addressing mode:
- //
- // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
-
- // Check if we can find the G_PTR_ADD.
- MachineInstr *Gep = getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
- if (!Gep || !isWorthFoldingIntoExtendedReg(*Gep, MRI))
- return None;
-
- // Now, try to match an opcode which will match our specific offset.
- // We want a G_SHL or a G_MUL.
- MachineInstr *OffsetInst = getDefIgnoringCopies(Gep->getOperand(2).getReg(), MRI);
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+ MachineInstr *OffsetInst = MRI.getVRegDef(Offset.getReg());
if (!OffsetInst)
return None;
@@ -4280,6 +4274,10 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
if (OffsetOpc != TargetOpcode::G_SHL && OffsetOpc != TargetOpcode::G_MUL)
return None;
+ // Make sure that the memory op is a valid size.
+ int64_t LegalShiftVal = Log2_32(SizeInBytes);
+ if (LegalShiftVal == 0)
+ return None;
if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
return None;
@@ -4324,20 +4322,75 @@ AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
if (ImmVal != LegalShiftVal)
return None;
+ unsigned SignExtend = 0;
+ if (WantsExt) {
+ // Check if the offset is defined by an extend.
+ MachineInstr *ExtInst = getDefIgnoringCopies(OffsetReg, MRI);
+ auto Ext = getExtendTypeForInst(*ExtInst, MRI, true);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return None;
+
+ SignExtend = Ext == AArch64_AM::SXTW;
+
+ // Need a 32-bit wide register here.
+ MachineIRBuilder MIB(*MRI.getVRegDef(Root.getReg()));
+ OffsetReg = ExtInst->getOperand(1).getReg();
+ OffsetReg = narrowExtendRegIfNeeded(OffsetReg, MIB);
+ }
+
// We can use the LHS of the GEP as the base, and the LHS of the shift as an
// offset. Signify that we are shifting by setting the shift flag to 1.
- return {{[=](MachineInstrBuilder &MIB) {
- MIB.addUse(Gep->getOperand(1).getReg());
- },
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(Base.getReg()); },
[=](MachineInstrBuilder &MIB) { MIB.addUse(OffsetReg); },
[=](MachineInstrBuilder &MIB) {
// Need to add both immediates here to make sure that they are both
// added to the instruction.
- MIB.addImm(0);
+ MIB.addImm(SignExtend);
MIB.addImm(1);
}}};
}
+/// This is used for computing addresses like this:
+///
+/// ldr x1, [x2, x3, lsl #3]
+///
+/// Where x2 is the base register, and x3 is an offset register. The shift-left
+/// is a constant value specific to this load instruction. That is, we'll never
+/// see anything other than a 3 here (which corresponds to the size of the
+/// element being loaded.)
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeShiftedExtendXReg(
+ MachineOperand &Root, unsigned SizeInBytes) const {
+ if (!Root.isReg())
+ return None;
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+ // We want to find something like this:
+ //
+ // val = G_CONSTANT LegalShiftVal
+ // shift = G_SHL off_reg val
+ // ptr = G_PTR_ADD base_reg shift
+ // x = G_LOAD ptr
+ //
+ // And fold it into this addressing mode:
+ //
+ // ldr x, [base_reg, off_reg, lsl #LegalShiftVal]
+
+ // Check if we can find the G_PTR_ADD.
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+ return None;
+
+ // Now, try to match an opcode which will match our specific offset.
+ // We want a G_SHL or a G_MUL.
+ MachineInstr *OffsetInst =
+ getDefIgnoringCopies(PtrAdd->getOperand(2).getReg(), MRI);
+ return selectExtendedSHL(Root, PtrAdd->getOperand(1),
+ OffsetInst->getOperand(0), SizeInBytes,
+ /*WantsExt=*/false);
+}
+
/// This is used for computing addresses like this:
///
/// ldr x1, [x2, x3]
@@ -4399,6 +4452,74 @@ AArch64InstructionSelector::selectAddrModeXRO(MachineOperand &Root,
return selectAddrModeRegisterOffset(Root);
}
+/// This is used for computing addresses like this:
+///
+/// ldr x0, [xBase, wOffset, sxtw #LegalShiftVal]
+///
+/// Where we have a 64-bit base register, a 32-bit offset register, and an
+/// extend (which may or may not be signed).
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeWRO(MachineOperand &Root,
+ unsigned SizeInBytes) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+ MachineInstr *PtrAdd =
+ getOpcodeDef(TargetOpcode::G_PTR_ADD, Root.getReg(), MRI);
+ if (!PtrAdd || !isWorthFoldingIntoExtendedReg(*PtrAdd, MRI))
+ return None;
+
+ MachineOperand &LHS = PtrAdd->getOperand(1);
+ MachineOperand &RHS = PtrAdd->getOperand(2);
+ MachineInstr *OffsetInst = getDefIgnoringCopies(RHS.getReg(), MRI);
+
+ // The first case is the same as selectAddrModeXRO, except we need an extend.
+ // In this case, we try to find a shift and extend, and fold them into the
+ // addressing mode.
+ //
+ // E.g.
+ //
+ // off_reg = G_Z/S/ANYEXT ext_reg
+ // val = G_CONSTANT LegalShiftVal
+ // shift = G_SHL off_reg val
+ // ptr = G_PTR_ADD base_reg shift
+ // x = G_LOAD ptr
+ //
+ // In this case we can get a load like this:
+ //
+ // ldr x0, [base_reg, ext_reg, sxtw #LegalShiftVal]
+ auto ExtendedShl = selectExtendedSHL(Root, LHS, OffsetInst->getOperand(0),
+ SizeInBytes, /*WantsExt=*/true);
+ if (ExtendedShl)
+ return ExtendedShl;
+
+ // There was no shift. We can try and fold a G_Z/S/ANYEXT in alone though.
+ //
+ // e.g.
+ // ldr something, [base_reg, ext_reg, sxtw]
+ if (!isWorthFoldingIntoExtendedReg(*OffsetInst, MRI))
+ return None;
+
+ // Check if this is an extend. We'll get an extend type if it is.
+ AArch64_AM::ShiftExtendType Ext =
+ getExtendTypeForInst(*OffsetInst, MRI, /*IsLoadStore=*/true);
+ if (Ext == AArch64_AM::InvalidShiftExtend)
+ return None;
+
+ // Need a 32-bit wide register.
+ MachineIRBuilder MIB(*PtrAdd);
+ Register ExtReg =
+ narrowExtendRegIfNeeded(OffsetInst->getOperand(1).getReg(), MIB);
+ unsigned SignExtend = Ext == AArch64_AM::SXTW;
+
+ // Base is LHS, offset is ExtReg.
+ return {{[=](MachineInstrBuilder &MIB) { MIB.addUse(LHS.getReg()); },
+ [=](MachineInstrBuilder &MIB) { MIB.addUse(ExtReg); },
+ [=](MachineInstrBuilder &MIB) {
+ MIB.addImm(SignExtend);
+ MIB.addImm(0);
+ }}};
+}
+
/// Select a "register plus unscaled signed 9-bit immediate" address. This
/// should only match when there is an offset that is not valid for a scaled
/// immediate addressing mode. The "Size" argument is the size in bytes of the
@@ -4561,9 +4682,8 @@ AArch64InstructionSelector::selectShiftedRegister(MachineOperand &Root) const {
[=](MachineInstrBuilder &MIB) { MIB.addImm(ShiftVal); }}};
}
-/// Get the correct ShiftExtendType for an extend instruction.
-static AArch64_AM::ShiftExtendType
-getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
+AArch64_AM::ShiftExtendType AArch64InstructionSelector::getExtendTypeForInst(
+ MachineInstr &MI, MachineRegisterInfo &MRI, bool IsLoadStore) const {
unsigned Opc = MI.getOpcode();
// Handle explicit extend instructions first.
@@ -4610,9 +4730,9 @@ getExtendTypeForInst(MachineInstr &MI, MachineRegisterInfo &MRI) {
default:
return AArch64_AM::InvalidShiftExtend;
case 0xFF:
- return AArch64_AM::UXTB;
+ return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
case 0xFFFF:
- return AArch64_AM::UXTH;
+ return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
case 0xFFFFFFFF:
return AArch64_AM::UXTW;
}
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
new file mode 100644
index 000000000000..e7c95fbe1063
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-wro-addressing-modes.mir
@@ -0,0 +1,431 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+...
+---
+name: shl_gep_sext_ldrwrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w1, $x0
+
+ ; We should be able to fold a shift + extend into the pattern.
+ ; In this case, we should get a roW load with two 1s, representing a shift
+ ; plus sign extend.
+
+ ; CHECK-LABEL: name: shl_gep_sext_ldrwrow
+ ; CHECK: liveins: $w1, $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load 4)
+ ; CHECK: $w0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $w0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_SEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 2
+ %offset:gpr(s64) = G_SHL %ext, %c
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+ $w0 = COPY %load(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: shl_gep_zext_ldrwrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w1, $x0
+
+ ; We should be able to fold a shift + extend into the pattern.
+ ; In this case, we should get a roW load with a 0 representing a zero-extend
+ ; and a 1 representing a shift.
+
+ ; CHECK-LABEL: name: shl_gep_zext_ldrwrow
+ ; CHECK: liveins: $w1, $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4)
+ ; CHECK: $w0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $w0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_ZEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 2
+ %offset:gpr(s64) = G_SHL %ext, %c
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+ $w0 = COPY %load(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: shl_gep_anyext_ldrwrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w1, $x0
+
+ ; We should be able to fold a shift + extend into the pattern.
+ ; In this case, we should get a roW load with a 0 representing a zero-extend
+ ; and a 1 representing a shift.
+
+ ; CHECK-LABEL: name: shl_gep_anyext_ldrwrow
+ ; CHECK: liveins: $w1, $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4)
+ ; CHECK: $w0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $w0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_ANYEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 2
+ %offset:gpr(s64) = G_SHL %ext, %c
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+ $w0 = COPY %load(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: mul_gep_sext_ldrwrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+
+ ; We should be able to do the same with multiplies as with shifts.
+
+ liveins: $w1, $x0
+ ; CHECK-LABEL: name: mul_gep_sext_ldrwrow
+ ; CHECK: liveins: $w1, $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 1, 1 :: (load 4)
+ ; CHECK: $w0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $w0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_SEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 4
+ %offset:gpr(s64) = G_MUL %c, %ext
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+ $w0 = COPY %load(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: mul_gep_zext_ldrwrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w1, $x0
+
+ ; We should be able to do the same with multiplies as with shifts.
+
+ ; CHECK-LABEL: name: mul_gep_zext_ldrwrow
+ ; CHECK: liveins: $w1, $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4)
+ ; CHECK: $w0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $w0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_ZEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 4
+ %offset:gpr(s64) = G_MUL %c, %ext
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+ $w0 = COPY %load(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: mul_gep_anyext_ldrwrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w1, $x0
+
+ ; We should be able to do the same with multiplies as with shifts.
+
+ ; CHECK-LABEL: name: mul_gep_anyext_ldrwrow
+ ; CHECK: liveins: $w1, $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:gpr32 = LDRWroW %base, %foo, 0, 1 :: (load 4)
+ ; CHECK: $w0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $w0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_ANYEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 4
+ %offset:gpr(s64) = G_MUL %c, %ext
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 4)
+ $w0 = COPY %load(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: ldrdrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w1, $x0, $d0
+
+ ; Verify that we can select LDRDroW.
+
+ ; CHECK-LABEL: name: ldrdrow
+ ; CHECK: liveins: $w1, $x0, $d0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:fpr64 = LDRDroW %base, %foo, 1, 1 :: (load 8)
+ ; CHECK: $x0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $x0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_SEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 8
+ %offset:gpr(s64) = G_MUL %c, %ext
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:fpr(<2 x s32>) = G_LOAD %ptr(p0) :: (load 8)
+ $x0 = COPY %load(<2 x s32>)
+ RET_ReallyLR implicit $x0
+...
+---
+name: ldrxrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $w1, $x0, $d0
+
+ ; Verify that we can select LDRXroW.
+
+ ; CHECK-LABEL: name: ldrxrow
+ ; CHECK: liveins: $w1, $x0, $d0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:gpr64 = LDRXroW %base, %foo, 1, 1 :: (load 8)
+ ; CHECK: $x0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $x0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_SEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 8
+ %offset:gpr(s64) = G_MUL %c, %ext
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8)
+ $x0 = COPY %load(s64)
+ RET_ReallyLR implicit $x0
+...
+---
+name: ldrbbrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $x0, $w0, $w1
+
+ ; Verify that we can select LDRBBroW. Note that there is no shift here,
+ ; but we still fold the extend into the addressing mode.
+
+ ; CHECK-LABEL: name: ldrbbrow
+ ; CHECK: liveins: $x0, $w0, $w1
+ ; CHECK: %val:gpr32 = COPY $w1
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %load:gpr32 = LDRBBroW %base, %val, 1, 0 :: (load 1)
+ ; CHECK: $w0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $w0
+ %val:gpr(s32) = COPY $w1
+ %base:gpr(p0) = COPY $x0
+ %ext:gpr(s64) = G_SEXT %val(s32)
+ %ptr:gpr(p0) = G_PTR_ADD %base, %ext(s64)
+ %load:gpr(s32) = G_LOAD %ptr(p0) :: (load 1)
+ $w0 = COPY %load(s32)
+ RET_ReallyLR implicit $w0
+...
+---
+name: ldrhrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ liveins: $w1, $x0
+
+ ; Verify that we can select ldrhrow.
+
+ ; CHECK-LABEL: name: ldrhrow
+ ; CHECK: liveins: $w1, $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %load:fpr16 = LDRHroW %base, %foo, 1, 1 :: (load 2)
+ ; CHECK: $h0 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $h0
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_SEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 2
+ %offset:gpr(s64) = G_MUL %c, %ext
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %load:fpr(s16) = G_LOAD %ptr(p0) :: (load 2)
+ $h0 = COPY %load(s16)
+ RET_ReallyLR implicit $h0
+...
+---
+name: bad_and_mask_1
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; We should get a roX load here, not a roW load. We can't use the mask in
+ ; this test for an extend.
+
+ ; CHECK-LABEL: name: bad_and_mask_1
+ ; CHECK: liveins: $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %imp:gpr64 = IMPLICIT_DEF
+ ; CHECK: %and:gpr64common = ANDXri %imp, 4103
+ ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load 8)
+ ; CHECK: $x1 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $x1
+ %base:gpr(p0) = COPY $x0
+ %imp:gpr(s64) = G_IMPLICIT_DEF
+ %bad_mask:gpr(s64) = G_CONSTANT i64 255
+ %and:gpr(s64) = G_AND %imp, %bad_mask
+ %c:gpr(s64) = G_CONSTANT i64 8
+ %mul:gpr(s64) = G_MUL %c, %and
+ %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64)
+ %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8)
+ $x1 = COPY %load(s64)
+ RET_ReallyLR implicit $x1
+...
+---
+name: bad_and_mask_2
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; We should get a roX load here, not a roW load. We can't use the mask in
+ ; this test for an extend.
+
+ ; CHECK-LABEL: name: bad_and_mask_2
+ ; CHECK: liveins: $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %imp:gpr64 = IMPLICIT_DEF
+ ; CHECK: %and:gpr64common = ANDXri %imp, 4111
+ ; CHECK: %load:gpr64 = LDRXroX %base, %and, 0, 1 :: (load 8)
+ ; CHECK: $x1 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $x1
+ %base:gpr(p0) = COPY $x0
+ %imp:gpr(s64) = G_IMPLICIT_DEF
+ %bad_mask:gpr(s64) = G_CONSTANT i64 65535
+ %and:gpr(s64) = G_AND %imp, %bad_mask
+ %c:gpr(s64) = G_CONSTANT i64 8
+ %mul:gpr(s64) = G_MUL %c, %and
+ %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64)
+ %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8)
+ $x1 = COPY %load(s64)
+ RET_ReallyLR implicit $x1
+...
+---
+name: and_uxtw
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+frameInfo:
+ maxAlignment: 1
+body: |
+ bb.0:
+ liveins: $x0
+
+ ; The mask used for the AND here is legal for producing a roW load.
+
+ ; CHECK-LABEL: name: and_uxtw
+ ; CHECK: liveins: $x0
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %imp:gpr64 = IMPLICIT_DEF
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64all = COPY %imp
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr32all = COPY [[COPY]].sub_32
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr32 = COPY [[COPY1]]
+ ; CHECK: %load:gpr64 = LDRXroW %base, [[COPY2]], 0, 1 :: (load 8)
+ ; CHECK: $x1 = COPY %load
+ ; CHECK: RET_ReallyLR implicit $x1
+ %base:gpr(p0) = COPY $x0
+ %imp:gpr(s64) = G_IMPLICIT_DEF
+ %mask:gpr(s64) = G_CONSTANT i64 4294967295
+ %and:gpr(s64) = G_AND %imp, %mask
+ %c:gpr(s64) = G_CONSTANT i64 8
+ %mul:gpr(s64) = G_MUL %c, %and
+ %ptr:gpr(p0) = G_PTR_ADD %base, %mul(s64)
+ %load:gpr(s64) = G_LOAD %ptr(p0) :: (load 8)
+ $x1 = COPY %load(s64)
+ RET_ReallyLR implicit $x1
+...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir
new file mode 100644
index 000000000000..41fcb6204726
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-wro-addressing-modes.mir
@@ -0,0 +1,52 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+---
+name: strwrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $w2
+ ; CHECK-LABEL: name: strwrow
+ ; CHECK: liveins: $x0, $x1, $w2
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %dst:gpr32 = COPY $w2
+ ; CHECK: STRWroW %dst, %base, %foo, 1, 1 :: (store 4)
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_SEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 2
+ %offset:gpr(s64) = G_SHL %ext, %c
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %dst:gpr(s32) = COPY $w2
+ G_STORE %dst, %ptr :: (store 4)
+...
+---
+name: strxrow
+alignment: 4
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1, $x2
+ ; CHECK-LABEL: name: strxrow
+ ; CHECK: liveins: $x0, $x1, $x2
+ ; CHECK: %base:gpr64sp = COPY $x0
+ ; CHECK: %foo:gpr32 = COPY $w1
+ ; CHECK: %dst:gpr64 = COPY $x2
+ ; CHECK: STRXroW %dst, %base, %foo, 1, 1 :: (store 8)
+ %base:gpr(p0) = COPY $x0
+ %foo:gpr(s32) = COPY $w1
+ %ext:gpr(s64) = G_SEXT %foo(s32)
+ %c:gpr(s64) = G_CONSTANT i64 3
+ %offset:gpr(s64) = G_SHL %ext, %c
+ %ptr:gpr(p0) = G_PTR_ADD %base, %offset(s64)
+ %dst:gpr(s64) = COPY $x2
+ G_STORE %dst, %ptr :: (store 8)
+...
More information about the llvm-commits
mailing list