[llvm] r241926 - ARMLoadStoreOptimizer: Create LDRD/STRD on thumb2
Matthias Braun
matze at braunis.de
Fri Jul 10 11:28:50 PDT 2015
Author: matze
Date: Fri Jul 10 13:28:49 2015
New Revision: 241926
URL: http://llvm.org/viewvc/llvm-project?rev=241926&view=rev
Log:
ARMLoadStoreOptimizer: Create LDRD/STRD on thumb2
Differential Revision: http://reviews.llvm.org/D10623
Modified:
llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
llvm/trunk/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
llvm/trunk/test/CodeGen/ARM/byval-align.ll
llvm/trunk/test/CodeGen/ARM/ldrd.ll
llvm/trunk/test/CodeGen/ARM/memset-inline.ll
llvm/trunk/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
llvm/trunk/test/CodeGen/Thumb2/aapcs.ll
Modified: llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp?rev=241926&r1=241925&r2=241926&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp Fri Jul 10 13:28:49 2015
@@ -111,6 +111,10 @@ namespace {
/// Index into the basic block where the merged instruction will be
/// inserted. (See MemOpQueueEntry.Position)
unsigned InsertPos;
+ /// Whether the instructions can be merged into a ldm/stm instruction.
+ bool CanMergeToLSMulti;
+ /// Whether the instructions can be merged into a ldrd/strd instruction.
+ bool CanMergeToLSDouble;
};
BumpPtrAllocator Allocator;
SmallVector<const MergeCandidate*,4> Candidates;
@@ -122,11 +126,14 @@ namespace {
MachineBasicBlock::iterator MBBI,
DebugLoc DL, unsigned Base, unsigned WordOffset,
ARMCC::CondCodes Pred, unsigned PredReg);
- MachineInstr *MergeOps(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset,
- unsigned Base, bool BaseKill, unsigned Opcode,
- ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
- ArrayRef<std::pair<unsigned, bool>> Regs);
+ MachineInstr *CreateLoadStoreMulti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs);
+ MachineInstr *CreateLoadStoreDouble(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const;
void FormCandidates(const MemOpQueue &MemOps);
MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
@@ -555,12 +562,10 @@ static bool ContainsReg(const ArrayRef<s
/// Create and insert a LDM or STM with Base as base register and registers in
/// Regs as the register operands that would be loaded / stored. It returns
/// true if the transformation is done.
-MachineInstr *
-ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator InsertBefore, int Offset,
- unsigned Base, bool BaseKill, unsigned Opcode,
- ARMCC::CondCodes Pred, unsigned PredReg, DebugLoc DL,
- ArrayRef<std::pair<unsigned, bool>> Regs) {
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) {
unsigned NumRegs = Regs.size();
assert(NumRegs > 1);
@@ -749,6 +754,28 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBl
return MIB.getInstr();
}
+MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator InsertBefore, int Offset, unsigned Base,
+ bool BaseKill, unsigned Opcode, ARMCC::CondCodes Pred, unsigned PredReg,
+ DebugLoc DL, ArrayRef<std::pair<unsigned, bool>> Regs) const {
+ bool IsLoad = isi32Load(Opcode);
+ assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
+ unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
+
+ assert(Regs.size() == 2);
+ MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
+ TII->get(LoadStoreOpcode));
+ if (IsLoad) {
+ MIB.addReg(Regs[0].first, RegState::Define)
+ .addReg(Regs[1].first, RegState::Define);
+ } else {
+ MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
+ .addReg(Regs[1].first, getKillRegState(Regs[1].second));
+ }
+ MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
+ return MIB.getInstr();
+}
+
/// Call MergeOps and update MemOps and merges accordingly on success.
MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
const MachineInstr *First = Cand.Instrs.front();
@@ -797,7 +824,12 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsU
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(First, PredReg);
DebugLoc DL = First->getDebugLoc();
- MachineInstr *Merged = MergeOps(MBB, InsertBefore, Offset, Base, BaseKill,
+ MachineInstr *Merged = nullptr;
+ if (Cand.CanMergeToLSDouble)
+ Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
+ Opcode, Pred, PredReg, DL, Regs);
+ if (!Merged && Cand.CanMergeToLSMulti)
+ Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
Opcode, Pred, PredReg, DL, Regs);
if (!Merged)
return nullptr;
@@ -859,6 +891,13 @@ MachineInstr *ARMLoadStoreOpt::MergeOpsU
return Merged;
}
+static bool isValidLSDoubleOffset(int Offset) {
+ unsigned Value = abs(Offset);
+ // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
+ // multiplied by 4.
+ return (Value % 4) == 0 && Value < 1024;
+}
+
/// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
const MachineInstr *FirstMI = MemOps[0].MI;
@@ -897,29 +936,51 @@ void ARMLoadStoreOpt::FormCandidates(con
unsigned Latest = SIndex;
unsigned Earliest = SIndex;
unsigned Count = 1;
+ bool CanMergeToLSDouble =
+ STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
+ // ARM errata 602117: LDRD with base in list may result in incorrect base
+ // register when interrupted or faulted.
+ if (STI->isCortexM3() && isi32Load(Opcode) &&
+ PReg == getLoadStoreBaseOp(*MI).getReg())
+ CanMergeToLSDouble = false;
+
+ bool CanMergeToLSMulti = true;
+ // On swift vldm/vstm starting with an odd register number as that needs
+ // more uops than single vldrs.
+ if (STI->isSwift() && !isNotVFP && (PRegNum % 2) == 1)
+ CanMergeToLSMulti = false;
- // Merge additional instructions fulfilling LDM/STM constraints.
+ // Merge following instructions where possible.
for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
int NewOffset = MemOps[I].Offset;
if (NewOffset != Offset + (int)Size)
break;
const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
unsigned Reg = MO.getReg();
- if (Reg == ARM::SP)
- break;
unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
- // Register numbers must be in ascending order.
- if (RegNum <= PRegNum)
- break;
- // For VFP / NEON load/store multiples, the registers must be consecutive
- // and within the limit on the number of registers per instruction.
- if (!isNotVFP && RegNum != PRegNum+1)
- break;
- // On Swift we don't want vldm/vstm to start with a odd register num
- // because Q register unaligned vldm/vstm need more uops.
- if (!isNotVFP && STI->isSwift() && Count == 1 && (PRegNum % 2) == 1)
- break;
+ // See if the current load/store may be part of a multi load/store.
+ bool PartOfLSMulti = CanMergeToLSMulti;
+ if (PartOfLSMulti) {
+ // Cannot load from SP
+ if (Reg == ARM::SP)
+ PartOfLSMulti = false;
+ // Register numbers must be in ascending order.
+ else if (RegNum <= PRegNum)
+ PartOfLSMulti = false;
+ // For VFP / NEON load/store multiples, the registers must be
+ // consecutive and within the limit on the number of registers per
+ // instruction.
+ else if (!isNotVFP && RegNum != PRegNum+1)
+ PartOfLSMulti = false;
+ }
+ // See if the current load/store may be part of a double load/store.
+ bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
+
+ if (!PartOfLSMulti && !PartOfLSDouble)
+ break;
+ CanMergeToLSMulti &= PartOfLSMulti;
+ CanMergeToLSDouble &= PartOfLSDouble;
// Track MemOp with latest and earliest position (Positions are
// counted in reverse).
unsigned Position = MemOps[I].Position;
@@ -939,6 +1000,10 @@ void ARMLoadStoreOpt::FormCandidates(con
Candidate->LatestMIIdx = Latest - SIndex;
Candidate->EarliestMIIdx = Earliest - SIndex;
Candidate->InsertPos = MemOps[Latest].Position;
+ if (Count == 1)
+ CanMergeToLSMulti = CanMergeToLSDouble = false;
+ Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
+ Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
Candidates.push_back(Candidate);
// Continue after the chain.
SIndex += Count;
@@ -1677,12 +1742,14 @@ bool ARMLoadStoreOpt::LoadStoreMultipleO
// Go through list of candidates and merge.
bool Changed = false;
for (const MergeCandidate *Candidate : Candidates) {
- if (Candidate->Instrs.size() > 1) {
+ if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
MachineInstr *Merged = MergeOpsUpdate(*Candidate);
// Merge preceding/trailing base inc/dec into the merged op.
if (Merged) {
- MergeBaseUpdateLSMultiple(Merged);
Changed = true;
+ unsigned Opcode = Merged->getOpcode();
+ if (Opcode != ARM::t2STRDi8 && Opcode != ARM::t2LDRDi8)
+ MergeBaseUpdateLSMultiple(Merged);
} else {
for (MachineInstr *MI : Candidate->Instrs) {
if (MergeBaseUpdateLoadStore(MI))
Modified: llvm/trunk/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll?rev=241926&r1=241925&r2=241926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/2013-05-02-AAPCS-ByVal-Structs-C4-C5-VFP.ll Fri Jul 10 13:28:49 2015
@@ -25,8 +25,7 @@ entry:
;CHECK: push {r7, lr}
;CHECK: sub sp, #4
;CHECK: add r0, sp, #12
- ;CHECK: str r2, [sp, #16]
- ;CHECK: str r1, [sp, #12]
+ ;CHECK: strd r1, r2, [sp, #12]
;CHECK: bl fooUseStruct
call void @fooUseStruct(%st_t* %p1)
ret void
Modified: llvm/trunk/test/CodeGen/ARM/byval-align.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/byval-align.ll?rev=241926&r1=241925&r2=241926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/byval-align.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/byval-align.ll Fri Jul 10 13:28:49 2015
@@ -28,8 +28,7 @@ define i32 @test_align8(i8*, [4 x i32]*
; CHECK: push {r4, r7, lr}
; CHECK: add r7, sp, #4
-; CHECK-DAG: str r2, [r7, #8]
-; CHECK-DAG: str r3, [r7, #12]
+; CHECK: strd r2, r3, [r7, #8]
; CHECK: ldr r0, [r7, #8]
Modified: llvm/trunk/test/CodeGen/ARM/ldrd.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/ldrd.ll?rev=241926&r1=241925&r2=241926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/ldrd.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/ldrd.ll Fri Jul 10 13:28:49 2015
@@ -3,6 +3,7 @@
; rdar://6949835
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=basic | FileCheck %s -check-prefix=BASIC -check-prefix=CHECK
; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 -regalloc=greedy | FileCheck %s -check-prefix=GREEDY -check-prefix=CHECK
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -mcpu=swift | FileCheck %s -check-prefix=SWIFT -check-prefix=CHECK
; Magic ARM pair hints works best with linearscan / fast.
@@ -110,5 +111,25 @@ entry:
ret void
}
+; CHECK-LABEL: strd_spill_ldrd_reload:
+; A8: strd r1, r0, [sp]
+; M3: strd r1, r0, [sp]
+; BASIC: strd r1, r0, [sp]
+; GREEDY: strd r0, r1, [sp]
+; CHECK: @ InlineAsm Start
+; CHECK: @ InlineAsm End
+; A8: ldrd r2, r1, [sp]
+; M3: ldrd r2, r1, [sp]
+; BASIC: ldrd r2, r1, [sp]
+; GREEDY: ldrd r1, r2, [sp]
+; CHECK: bl{{x?}} _extfunc
+define void @strd_spill_ldrd_reload(i32 %v0, i32 %v1) {
+ ; force %v0 and %v1 to be spilled
+ call void asm sideeffect "", "~{r0},~{r1},~{r2},~{r3},~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12},~{lr}"()
+ ; force the reloaded %v0, %v1 into different registers
+ call void @extfunc(i32 0, i32 %v0, i32 %v1, i32 7)
+ ret void
+}
+
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
Modified: llvm/trunk/test/CodeGen/ARM/memset-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/memset-inline.ll?rev=241926&r1=241925&r2=241926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/memset-inline.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/memset-inline.ll Fri Jul 10 13:28:49 2015
@@ -4,8 +4,7 @@ define void @t1(i8* nocapture %c) nounwi
entry:
; CHECK-LABEL: t1:
; CHECK: movs r1, #0
-; CHECK: str r1, [r0]
-; CHECK: str r1, [r0, #4]
+; CHECK: strd r1, r1, [r0]
; CHECK: str r1, [r0, #8]
call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 12, i32 8, i1 false)
ret void
Modified: llvm/trunk/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll?rev=241926&r1=241925&r2=241926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/wrong-t2stmia-size-opt.ll Fri Jul 10 13:28:49 2015
@@ -5,16 +5,20 @@ target triple = "thumbv7--linux-gnueabi"
declare i8* @llvm.returnaddress(i32)
-define i32* @wrong-t2stmia-size-reduction(i32* %addr, i32 %val0) minsize {
+define i32* @wrong-t2stmia-size-reduction(i32* %addr, i32 %val0, i32 %val1) minsize {
store i32 %val0, i32* %addr
%addr1 = getelementptr i32, i32* %addr, i32 1
+ %addr2 = getelementptr i32, i32* %addr, i32 2
%lr = call i8* @llvm.returnaddress(i32 0)
%lr32 = ptrtoint i8* %lr to i32
- store i32 %lr32, i32* %addr1
- %addr2 = getelementptr i32, i32* %addr1, i32 1
- ret i32* %addr2
+ store i32 %val1, i32* %addr1
+ store i32 %lr32, i32* %addr2
+
+ %addr3 = getelementptr i32, i32* %addr, i32 3
+ ret i32* %addr3
}
-; Check that stm writes two registers. The bug caused one of registers (LR,
+; Check that stm writes three registers. The bug caused one of registers (LR,
; which invalid for Thumb1 form of STMIA instruction) to be dropped.
-; CHECK: stm{{[^,]*}}, {{{.*,.*}}}
+; CHECK-LABEL: wrong-t2stmia-size-reduction:
+; CHECK: stm{{[^,]*}}, {{{.*,.*,.*}}}
Modified: llvm/trunk/test/CodeGen/Thumb2/aapcs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/aapcs.ll?rev=241926&r1=241925&r2=241926&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/aapcs.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/aapcs.ll Fri Jul 10 13:28:49 2015
@@ -33,8 +33,7 @@ define float @float_on_stack(double %a,
define double @double_on_stack(double %a, double %b, double %c, double %d, double %e, double %f, double %g, double %h, double %i) {
; CHECK-LABEL: double_on_stack:
-; SOFT: ldr r0, [sp, #48]
-; SOFT: ldr r1, [sp, #52]
+; SOFT: ldrd r0, r1, [sp, #48]
; HARD: vldr d0, [sp]
; CHECK-NEXT: bx lr
ret double %i
@@ -42,8 +41,7 @@ define double @double_on_stack(double %a
define double @double_not_split(double %a, double %b, double %c, double %d, double %e, double %f, double %g, float %h, double %i) {
; CHECK-LABEL: double_not_split:
-; SOFT: ldr r0, [sp, #48]
-; SOFT: ldr r1, [sp, #52]
+; SOFT: ldrd r0, r1, [sp, #48]
; HARD: vldr d0, [sp]
; CHECK-NEXT: bx lr
ret double %i
More information about the llvm-commits
mailing list