[llvm] 480e7f4 - [AArch64][GlobalISel] Share address mode selection code for memops
Jessica Paquette via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 9 15:25:15 PDT 2020
Author: Jessica Paquette
Date: 2020-09-09T15:14:46-07:00
New Revision: 480e7f43a22578beaa2edc7a271e77793222a1c3
URL: https://github.com/llvm/llvm-project/commit/480e7f43a22578beaa2edc7a271e77793222a1c3
DIFF: https://github.com/llvm/llvm-project/commit/480e7f43a22578beaa2edc7a271e77793222a1c3.diff
LOG: [AArch64][GlobalISel] Share address mode selection code for memops
We were missing support for the G_ADD_LOW + ADRP folding optimization in the
manual selection code for G_LOAD, G_STORE, and G_ZEXTLOAD.
As a result, we were missing cases like this:
```
@foo = external hidden global i32*
define void @baz(i32* %0) {
store i32* %0, i32** @foo
ret void
}
```
https://godbolt.org/z/16r7ad
This functionality already existed in the addressing mode functions for the
importer. So, this patch makes the manual selection code use
`selectAddrModeIndexed` rather than duplicating work.
This is a 0.2% geomean code size improvement for CTMark at -O3.
There is one code size increase (0.1% on lencod) which is likely because
`selectAddrModeIndexed` doesn't look through constants.
Differential Revision: https://reviews.llvm.org/D87397
Added:
Modified:
llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index a8d68180bb76..228db83533cd 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2260,18 +2260,19 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
}
auto &MemOp = **I.memoperands_begin();
+ uint64_t MemSizeInBytes = MemOp.getSize();
if (MemOp.isAtomic()) {
// For now we just support s8 acquire loads to be able to compile stack
// protector code.
if (MemOp.getOrdering() == AtomicOrdering::Acquire &&
- MemOp.getSize() == 1) {
+ MemSizeInBytes == 1) {
I.setDesc(TII.get(AArch64::LDARB));
return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
}
LLVM_DEBUG(dbgs() << "Atomic load/store not fully supported yet\n");
return false;
}
- unsigned MemSizeInBits = MemOp.getSize() * 8;
+ unsigned MemSizeInBits = MemSizeInBytes * 8;
const Register PtrReg = I.getOperand(1).getReg();
#ifndef NDEBUG
@@ -2286,78 +2287,78 @@ bool AArch64InstructionSelector::select(MachineInstr &I) {
const Register ValReg = I.getOperand(0).getReg();
const RegisterBank &RB = *RBI.getRegBank(ValReg, MRI, TRI);
- const unsigned NewOpc =
- selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
- if (NewOpc == I.getOpcode())
- return false;
-
- I.setDesc(TII.get(NewOpc));
-
- uint64_t Offset = 0;
- auto *PtrMI = MRI.getVRegDef(PtrReg);
-
- // Try to fold a GEP into our unsigned immediate addressing mode.
- if (PtrMI->getOpcode() == TargetOpcode::G_PTR_ADD) {
- if (auto COff = getConstantVRegVal(PtrMI->getOperand(2).getReg(), MRI)) {
- int64_t Imm = *COff;
- const unsigned Size = MemSizeInBits / 8;
- const unsigned Scale = Log2_32(Size);
- if ((Imm & (Size - 1)) == 0 && Imm >= 0 && Imm < (0x1000 << Scale)) {
- Register Ptr2Reg = PtrMI->getOperand(1).getReg();
- I.getOperand(1).setReg(Ptr2Reg);
- PtrMI = MRI.getVRegDef(Ptr2Reg);
- Offset = Imm / Size;
- }
+ // Helper lambda for partially selecting I. Either returns the original
+ // instruction with an updated opcode, or a new instruction.
+ auto SelectLoadStoreAddressingMode = [&]() -> MachineInstr * {
+ bool IsStore = I.getOpcode() == TargetOpcode::G_STORE;
+ const unsigned NewOpc =
+ selectLoadStoreUIOp(I.getOpcode(), RB.getID(), MemSizeInBits);
+ if (NewOpc == I.getOpcode())
+ return nullptr;
+ // Check if we can fold anything into the addressing mode.
+ auto AddrModeFns =
+ selectAddrModeIndexed(I.getOperand(1), MemSizeInBytes);
+ if (!AddrModeFns) {
+ // Can't fold anything. Use the original instruction.
+ I.setDesc(TII.get(NewOpc));
+ I.addOperand(MachineOperand::CreateImm(0));
+ return &I;
}
- }
- // If we haven't folded anything into our addressing mode yet, try to fold
- // a frame index into the base+offset.
- if (!Offset && PtrMI->getOpcode() == TargetOpcode::G_FRAME_INDEX)
- I.getOperand(1).ChangeToFrameIndex(PtrMI->getOperand(1).getIndex());
+ // Folded something. Create a new instruction and return it.
+ auto NewInst = MIB.buildInstr(NewOpc, {}, {}, I.getFlags());
+ IsStore ? NewInst.addUse(ValReg) : NewInst.addDef(ValReg);
+ NewInst.cloneMemRefs(I);
+ for (auto &Fn : *AddrModeFns)
+ Fn(NewInst);
+ I.eraseFromParent();
+ return &*NewInst;
+ };
- I.addOperand(MachineOperand::CreateImm(Offset));
+ MachineInstr *LoadStore = SelectLoadStoreAddressingMode();
+ if (!LoadStore)
+ return false;
// If we're storing a 0, use WZR/XZR.
if (Opcode == TargetOpcode::G_STORE) {
auto CVal = getConstantVRegValWithLookThrough(
- ValReg, MRI, /*LookThroughInstrs = */ true,
+ LoadStore->getOperand(0).getReg(), MRI, /*LookThroughInstrs = */ true,
/*HandleFConstants = */ false);
if (CVal && CVal->Value == 0) {
- unsigned Opc = I.getOpcode();
- switch (Opc) {
+ switch (LoadStore->getOpcode()) {
case AArch64::STRWui:
case AArch64::STRHHui:
case AArch64::STRBBui:
- I.getOperand(0).setReg(AArch64::WZR);
+ LoadStore->getOperand(0).setReg(AArch64::WZR);
break;
case AArch64::STRXui:
- I.getOperand(0).setReg(AArch64::XZR);
+ LoadStore->getOperand(0).setReg(AArch64::XZR);
break;
}
}
}
if (IsZExtLoad) {
- // The zextload from a smaller type to i32 should be handled by the importer.
- if (MRI.getType(ValReg).getSizeInBits() != 64)
+ // The zextload from a smaller type to i32 should be handled by the
+ // importer.
+ if (MRI.getType(LoadStore->getOperand(0).getReg()).getSizeInBits() != 64)
return false;
// If we have a ZEXTLOAD then change the load's type to be a narrower reg
- //and zero_extend with SUBREG_TO_REG.
+ // and zero_extend with SUBREG_TO_REG.
Register LdReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
- Register DstReg = I.getOperand(0).getReg();
- I.getOperand(0).setReg(LdReg);
+ Register DstReg = LoadStore->getOperand(0).getReg();
+ LoadStore->getOperand(0).setReg(LdReg);
- MIB.setInsertPt(MIB.getMBB(), std::next(I.getIterator()));
+ MIB.setInsertPt(MIB.getMBB(), std::next(LoadStore->getIterator()));
MIB.buildInstr(AArch64::SUBREG_TO_REG, {DstReg}, {})
.addImm(0)
.addUse(LdReg)
.addImm(AArch64::sub_32);
- constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
return RBI.constrainGenericRegister(DstReg, AArch64::GPR64allRegClass,
MRI);
}
- return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+ return constrainSelectedInstRegOperands(*LoadStore, TII, TRI, RBI);
}
case TargetOpcode::G_SMULH:
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
index db355dfc151f..05038b40ca36 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-store.mir
@@ -39,6 +39,9 @@
define void @store_8xi16(<8 x i16> %v, <8 x i16>* %ptr) { ret void }
define void @store_16xi8(<16 x i8> %v, <16 x i8>* %ptr) { ret void }
+ @x = external hidden local_unnamed_addr global i32*, align 8
+ define void @store_adrp_add_low() { ret void }
+
...
---
@@ -600,3 +603,20 @@ body: |
RET_ReallyLR
...
+---
+name: store_adrp_add_low
+legalized: true
+regBankSelected: true
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $x0
+ ; CHECK-LABEL: name: store_adrp_add_low
+ ; CHECK: liveins: $x0
+ ; CHECK: %copy:gpr64 = COPY $x0
+ ; CHECK: %adrp:gpr64common = ADRP target-flags(aarch64-page) @x
+ ; CHECK: STRXui %copy, %adrp, target-flags(aarch64-pageoff, aarch64-nc) @x :: (store 8 into @x)
+ %copy:gpr(p0) = COPY $x0
+ %adrp:gpr64(p0) = ADRP target-flags(aarch64-page) @x
+ %add_low:gpr(p0) = G_ADD_LOW %adrp(p0), target-flags(aarch64-pageoff, aarch64-nc) @x
+ G_STORE %copy(p0), %add_low(p0) :: (store 8 into @x)
More information about the llvm-commits
mailing list