[llvm] r366503 - [GlobalISel][AArch64] Add support for base register + offset register loads
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 18 14:50:12 PDT 2019
Author: paquette
Date: Thu Jul 18 14:50:11 2019
New Revision: 366503
URL: http://llvm.org/viewvc/llvm-project?rev=366503&view=rev
Log:
[GlobalISel][AArch64] Add support for base register + offset register loads
Add support for folding G_GEPs into loads of the form
```
ldr reg, [base, off]
```
when possible. This can save an add before the load. Currently, this is only
supported for loads of 64 bits into 64 bit registers.
Add a new addressing mode function, `selectAddrModeRegisterOffset` which
performs this folding when it is profitable.
Also add a test for addressing modes for G_LOAD.
Differential Revision: https://reviews.llvm.org/D64944
Added:
llvm/trunk/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
Modified:
llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp?rev=366503&r1=366502&r2=366503&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstructionSelector.cpp Thu Jul 18 14:50:11 2019
@@ -67,6 +67,7 @@ private:
bool earlySelect(MachineInstr &I) const;
bool earlySelectSHL(MachineInstr &I, MachineRegisterInfo &MRI) const;
+ bool earlySelectLoad(MachineInstr &I, MachineRegisterInfo &MRI) const;
bool selectVaStartAAPCS(MachineInstr &I, MachineFunction &MF,
MachineRegisterInfo &MRI) const;
@@ -182,6 +183,7 @@ private:
ComplexRendererFns selectAddrModeIndexed(MachineOperand &Root) const {
return selectAddrModeIndexed(Root, Width / 8);
}
+ ComplexRendererFns selectAddrModeRegisterOffset(MachineOperand &Root) const;
void renderTruncImm(MachineInstrBuilder &MIB, const MachineInstr &MI) const;
@@ -1158,6 +1160,57 @@ bool AArch64InstructionSelector::earlySe
return constrainSelectedInstRegOperands(*NewI, TII, TRI, RBI);
}
+bool AArch64InstructionSelector::earlySelectLoad(
+ MachineInstr &I, MachineRegisterInfo &MRI) const {
+ // Try to fold in shifts, etc into the addressing mode of a load.
+ assert(I.getOpcode() == TargetOpcode::G_LOAD && "unexpected op");
+
+ // Don't handle atomic loads/stores yet.
+ auto &MemOp = **I.memoperands_begin();
+ if (MemOp.getOrdering() != AtomicOrdering::NotAtomic) {
+ LLVM_DEBUG(dbgs() << "Atomic load/store not supported yet\n");
+ return false;
+ }
+
+ unsigned MemBytes = MemOp.getSize();
+
+ // Only support 64-bit loads for now.
+ if (MemBytes != 8)
+ return false;
+
+ Register DstReg = I.getOperand(0).getReg();
+ const LLT DstTy = MRI.getType(DstReg);
+ // Don't handle vectors.
+ if (DstTy.isVector())
+ return false;
+
+ unsigned DstSize = DstTy.getSizeInBits();
+ // TODO: 32-bit destinations.
+ if (DstSize != 64)
+ return false;
+
+ // Check if we can do any folding from GEPs etc. into the load.
+ auto ImmFn = selectAddrModeRegisterOffset(I.getOperand(1));
+ if (!ImmFn)
+ return false;
+
+ // We can fold something. Emit the load here.
+ MachineIRBuilder MIB(I);
+
+ // Choose the instruction based off the size of the element being loaded, and
+ // whether or not we're loading into a FPR.
+ const RegisterBank &RB = *RBI.getRegBank(DstReg, MRI, TRI);
+ unsigned Opc =
+ RB.getID() == AArch64::GPRRegBankID ? AArch64::LDRXroX : AArch64::LDRDroX;
+ // Construct the load.
+ auto LoadMI = MIB.buildInstr(Opc, {DstReg}, {});
+ for (auto &RenderFn : *ImmFn)
+ RenderFn(LoadMI);
+ LoadMI.addMemOperand(*I.memoperands_begin());
+ I.eraseFromParent();
+ return constrainSelectedInstRegOperands(*LoadMI, TII, TRI, RBI);
+}
+
bool AArch64InstructionSelector::earlySelect(MachineInstr &I) const {
assert(I.getParent() && "Instruction should be in a basic block!");
assert(I.getParent()->getParent() && "Instruction should be in a function!");
@@ -1169,6 +1222,8 @@ bool AArch64InstructionSelector::earlySe
switch (I.getOpcode()) {
case TargetOpcode::G_SHL:
return earlySelectSHL(I, MRI);
+ case TargetOpcode::G_LOAD:
+ return earlySelectLoad(I, MRI);
default:
return false;
}
@@ -3891,6 +3946,44 @@ AArch64InstructionSelector::selectArithI
}};
}
+/// This is used for computing addresses like this:
+///
+/// ldr x1, [x2, x3]
+///
+/// Where x2 is the base register, and x3 is an offset register.
+///
+/// When possible (or profitable) to fold a G_GEP into the address calculation,
+/// this will do so. Otherwise, it will return None.
+InstructionSelector::ComplexRendererFns
+AArch64InstructionSelector::selectAddrModeRegisterOffset(
+ MachineOperand &Root) const {
+ MachineRegisterInfo &MRI = Root.getParent()->getMF()->getRegInfo();
+
+ // If we have a constant offset, then we probably don't want to match a
+ // register offset.
+ if (isBaseWithConstantOffset(Root, MRI))
+ return None;
+
+ // We need a GEP.
+ MachineInstr *Gep = MRI.getVRegDef(Root.getReg());
+ if (!Gep || Gep->getOpcode() != TargetOpcode::G_GEP)
+ return None;
+
+ // If this is used more than once, let's not bother folding.
+ // TODO: Check if they are memory ops. If they are, then we can still fold
+ // without having to recompute anything.
+ if (!MRI.hasOneUse(Gep->getOperand(0).getReg()))
+ return None;
+
+ // Base is the GEP's LHS, offset is its RHS.
+ return {{
+ [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(1)); },
+ [=](MachineInstrBuilder &MIB) { MIB.add(Gep->getOperand(2)); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
+ [=](MachineInstrBuilder &MIB) { MIB.addImm(0); },
+ }};
+}
+
/// Select a "register plus unscaled signed 9-bit immediate" address. This
/// should only match when there is an offset that is not valid for a scaled
/// immediate addressing mode. The "Size" argument is the size in bytes of the
Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir?rev=366503&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir Thu Jul 18 14:50:11 2019
@@ -0,0 +1,90 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=aarch64-unknown-unknown -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
+
+--- |
+ define void @ldrxrox_breg_oreg(i64* %addr) { ret void }
+ define void @ldrdrox_breg_oreg(i64* %addr) { ret void }
+ define void @more_than_one_use(i64* %addr) { ret void }
+...
+
+---
+name: ldrxrox_breg_oreg
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+
+ ; CHECK-LABEL: name: ldrxrox_breg_oreg
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY]], [[COPY1]], 0, 0 :: (load 8 from %ir.addr)
+ ; CHECK: $x0 = COPY [[LDRXroX]]
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(p0) = G_GEP %0, %1
+ %4:gpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
+ $x0 = COPY %4(s64)
+ RET_ReallyLR implicit $x0
+...
+
+---
+name: ldrdrox_breg_oreg
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $d0, $x1
+ ; CHECK-LABEL: name: ldrdrox_breg_oreg
+ ; CHECK: liveins: $d0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64sp = COPY $d0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK: [[LDRDroX:%[0-9]+]]:fpr64 = LDRDroX [[COPY]], [[COPY1]], 0, 0 :: (load 8 from %ir.addr)
+ ; CHECK: $d0 = COPY [[LDRDroX]]
+ ; CHECK: RET_ReallyLR implicit $d0
+ %0:gpr(p0) = COPY $d0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(p0) = G_GEP %0, %1
+ %4:fpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
+ $d0 = COPY %4(s64)
+ RET_ReallyLR implicit $d0
+...
+
+---
+name: more_than_one_use
+alignment: 2
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+machineFunctionInfo: {}
+body: |
+ bb.0:
+ liveins: $x0, $x1
+ ; This shouldn't be folded, since we reuse the result of the G_GEP outside
+ ; the G_LOAD
+ ; CHECK-LABEL: name: more_than_one_use
+ ; CHECK: liveins: $x0, $x1
+ ; CHECK: [[COPY:%[0-9]+]]:gpr64 = COPY $x0
+ ; CHECK: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1
+ ; CHECK: [[ADDXrr:%[0-9]+]]:gpr64common = ADDXrr [[COPY]], [[COPY1]]
+ ; CHECK: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrr]], 0 :: (load 8 from %ir.addr)
+ ; CHECK: [[COPY2:%[0-9]+]]:gpr64 = COPY [[ADDXrr]]
+ ; CHECK: [[ADDXrr1:%[0-9]+]]:gpr64 = ADDXrr [[COPY2]], [[LDRXui]]
+ ; CHECK: $x0 = COPY [[ADDXrr1]]
+ ; CHECK: RET_ReallyLR implicit $x0
+ %0:gpr(p0) = COPY $x0
+ %1:gpr(s64) = COPY $x1
+ %2:gpr(p0) = G_GEP %0, %1
+ %4:gpr(s64) = G_LOAD %2(p0) :: (load 8 from %ir.addr)
+ %5:gpr(s64) = G_PTRTOINT %2
+ %6:gpr(s64) = G_ADD %5, %4
+ $x0 = COPY %6(s64)
+ RET_ReallyLR implicit $x0
More information about the llvm-commits
mailing list