[llvm-commits] [llvm] r73291 - in /llvm/trunk: lib/CodeGen/LLVMTargetMachine.cpp lib/Target/ARM/ARM.h lib/Target/ARM/ARMLoadStoreOptimizer.cpp lib/Target/ARM/ARMTargetMachine.cpp lib/Target/ARM/ARMTargetMachine.h test/CodeGen/ARM/stm.ll
Evan Cheng
evan.cheng at apple.com
Sat Jun 13 02:13:10 PDT 2009
Author: evancheng
Date: Sat Jun 13 04:12:55 2009
New Revision: 73291
URL: http://llvm.org/viewvc/llvm-project?rev=73291&view=rev
Log:
Add a ARM specific pre-allocation pass that re-schedule loads / stores from
consecutive addresses togther. This makes it easier for the post-allocation pass
to form ldm / stm.
This is step 1. We are still missing a lot of ldm / stm opportunities because
of register allocation are not done in the desired order. More enhancements
coming.
Added:
llvm/trunk/test/CodeGen/ARM/stm.ll
Modified:
llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
llvm/trunk/lib/Target/ARM/ARM.h
llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
llvm/trunk/lib/Target/ARM/ARMTargetMachine.h
Modified: llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp?rev=73291&r1=73290&r2=73291&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp (original)
+++ llvm/trunk/lib/CodeGen/LLVMTargetMachine.cpp Sat Jun 13 04:12:55 2009
@@ -240,7 +240,7 @@
if (OptLevel != CodeGenOpt::None) {
PM.add(createMachineLICMPass());
PM.add(createMachineSinkingPass());
- printAndVerify(PM, /* allowDoubleDefs= */ true);
+ printAndVerify(PM, /* allowDoubleDefs= */ false);
}
// Run pre-ra passes.
Modified: llvm/trunk/lib/Target/ARM/ARM.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.h?rev=73291&r1=73290&r2=73291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARM.h (original)
+++ llvm/trunk/lib/Target/ARM/ARM.h Sat Jun 13 04:12:55 2009
@@ -98,12 +98,12 @@
FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
MachineCodeEmitter &MCE);
-FunctionPass *createARMCodeEmitterPass( ARMTargetMachine &TM,
- MachineCodeEmitter &MCE);
-FunctionPass *createARMJITCodeEmitterPass( ARMTargetMachine &TM,
- JITCodeEmitter &JCE);
+FunctionPass *createARMCodeEmitterPass(ARMTargetMachine &TM,
+ MachineCodeEmitter &MCE);
+FunctionPass *createARMJITCodeEmitterPass(ARMTargetMachine &TM,
+ JITCodeEmitter &JCE);
-FunctionPass *createARMLoadStoreOptimizationPass();
+FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
FunctionPass *createARMConstantIslandPass();
} // end namespace llvm;
Modified: llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp?rev=73291&r1=73290&r2=73291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMLoadStoreOptimizer.cpp Sat Jun 13 04:12:55 2009
@@ -17,24 +17,31 @@
#include "ARMAddressingModes.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
-#include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
-#include "llvm/Support/Compiler.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
using namespace llvm;
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
+
+/// ARMAllocLoadStoreOpt - Post- register allocation pass the combine
+/// load / store instructions to form ldm / stm instructions.
namespace {
struct VISIBILITY_HIDDEN ARMLoadStoreOpt : public MachineFunctionPass {
@@ -81,12 +88,6 @@
char ARMLoadStoreOpt::ID = 0;
}
-/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
-/// optimization pass.
-FunctionPass *llvm::createARMLoadStoreOptimizationPass() {
- return new ARMLoadStoreOpt();
-}
-
static int getLoadStoreMultipleOpcode(int Opcode) {
switch (Opcode) {
case ARM::LDR:
@@ -582,6 +583,23 @@
RS->forward(prior(Loc));
}
+static int getMemoryOpOffset(const MachineInstr *MI) {
+ int Opcode = MI->getOpcode();
+ bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
+ unsigned NumOperands = MI->getDesc().getNumOperands();
+ unsigned OffField = MI->getOperand(NumOperands-3).getImm();
+ int Offset = isAM2
+ ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
+ if (isAM2) {
+ if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ } else {
+ if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
+ Offset = -Offset;
+ }
+ return Offset;
+}
+
/// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR
/// ops of the same base and incrementing offset into LDM / STM ops.
bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
@@ -606,22 +624,11 @@
bool isMemOp = isMemoryOp(MBBI);
if (isMemOp) {
int Opcode = MBBI->getOpcode();
- bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
unsigned Size = getLSMultipleTransferSize(MBBI);
unsigned Base = MBBI->getOperand(1).getReg();
unsigned PredReg = 0;
ARMCC::CondCodes Pred = getInstrPredicate(MBBI, PredReg);
- unsigned NumOperands = MBBI->getDesc().getNumOperands();
- unsigned OffField = MBBI->getOperand(NumOperands-3).getImm();
- int Offset = isAM2
- ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4;
- if (isAM2) {
- if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- } else {
- if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub)
- Offset = -Offset;
- }
+ int Offset = getMemoryOpOffset(MBBI);
// Watch out for:
// r4 := ldr [r5]
// r5 := ldr [r5, #4]
@@ -744,6 +751,17 @@
return NumMerges > 0;
}
+namespace {
+ struct OffsetCompare {
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ int LOffset = getMemoryOpOffset(LHS);
+ int ROffset = getMemoryOpOffset(RHS);
+ assert(LHS == RHS || LOffset != ROffset);
+ return LOffset > ROffset;
+ }
+ };
+}
+
/// MergeReturnIntoLDM - If this is a exit BB, try merging the return op
/// (bx lr) into the preceeding stack restore so it directly restore the value
/// of LR into pc.
@@ -788,3 +806,277 @@
delete RS;
return Modified;
}
+
+
+/// ARMPreAllocLoadStoreOpt - Pre- register allocation pass that move
+/// load / stores from consecutive locations close to make it more
+/// likely they will be combined later.
+
+namespace {
+ struct VISIBILITY_HIDDEN ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
+ static char ID;
+ ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {}
+
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pre- register allocation load / store optimization pass";
+ }
+
+ private:
+ bool RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap);
+ bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
+ };
+ char ARMPreAllocLoadStoreOpt::ID = 0;
+}
+
+bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
+ TII = Fn.getTarget().getInstrInfo();
+ TRI = Fn.getTarget().getRegisterInfo();
+ MRI = &Fn.getRegInfo();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
+ ++MFI)
+ Modified |= RescheduleLoadStoreInstrs(MFI);
+
+ return Modified;
+}
+
+static bool IsSafeToMove(bool isLd, unsigned Base,
+ MachineBasicBlock::iterator I,
+ MachineBasicBlock::iterator E,
+ SmallPtrSet<MachineInstr*, 4> MoveOps,
+ const TargetRegisterInfo *TRI) {
+ // Are there stores / loads / calls between them?
+ // FIXME: This is overly conservative. We should make use of alias information
+ // some day.
+ while (++I != E) {
+ const TargetInstrDesc &TID = I->getDesc();
+ if (TID.isCall() || TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ return false;
+ if (isLd && TID.mayStore())
+ return false;
+ if (!isLd) {
+ if (TID.mayLoad())
+ return false;
+ // It's not safe to move the first 'str' down.
+ // str r1, [r0]
+ // strh r5, [r0]
+ // str r4, [r0, #+4]
+ if (TID.mayStore() && !MoveOps.count(&*I))
+ return false;
+ }
+ for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
+ MachineOperand &MO = I->getOperand(j);
+ if (MO.isReg() && MO.isDef() && TRI->regsOverlap(MO.getReg(), Base))
+ return false;
+ }
+ }
+ return true;
+}
+
+bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
+ SmallVector<MachineInstr*, 4> &Ops,
+ unsigned Base, bool isLd,
+ DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
+ bool RetVal = false;
+
+ // Sort by offset (in reverse order).
+ std::sort(Ops.begin(), Ops.end(), OffsetCompare());
+
+ // The loads / stores of the same base are in order. Scan them from first to
+ // last and check for the followins:
+ // 1. Any def of base.
+ // 2. Any gaps.
+ while (Ops.size() > 1) {
+ unsigned FirstLoc = ~0U;
+ unsigned LastLoc = 0;
+ MachineInstr *FirstOp = 0;
+ MachineInstr *LastOp = 0;
+ int LastOffset = 0;
+ unsigned LastBytes = 0;
+ unsigned NumMove = 0;
+ for (int i = Ops.size() - 1; i >= 0; --i) {
+ MachineInstr *Op = Ops[i];
+ unsigned Loc = MI2LocMap[Op];
+ if (Loc <= FirstLoc) {
+ FirstLoc = Loc;
+ FirstOp = Op;
+ }
+ if (Loc >= LastLoc) {
+ LastLoc = Loc;
+ LastOp = Op;
+ }
+
+ int Offset = getMemoryOpOffset(Op);
+ unsigned Bytes = getLSMultipleTransferSize(Op);
+ if (LastBytes) {
+ if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
+ break;
+ }
+ LastOffset = Offset;
+ LastBytes = Bytes;
+ if (++NumMove == 4)
+ break;
+ }
+
+ if (NumMove <= 1)
+ Ops.pop_back();
+ else {
+ SmallPtrSet<MachineInstr*, 4> MoveOps;
+ for (int i = NumMove-1; i >= 0; --i)
+ MoveOps.insert(Ops[i]);
+
+ // Be conservative, if the instructions are too far apart, don't
+ // move them. We want to limit the increase of register pressure.
+ bool DoMove = (LastLoc - FirstLoc) < NumMove*4;
+ if (DoMove)
+ DoMove = IsSafeToMove(isLd, Base, FirstOp, LastOp, MoveOps, TRI);
+ if (!DoMove) {
+ for (unsigned i = 0; i != NumMove; ++i)
+ Ops.pop_back();
+ } else {
+ // This is the new location for the loads / stores.
+ MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
+ while (InsertPos != MBB->end() && MoveOps.count(InsertPos))
+ ++InsertPos;
+ for (unsigned i = 0; i != NumMove; ++i) {
+ MachineInstr *Op = Ops.back();
+ Ops.pop_back();
+ MBB->splice(InsertPos, MBB, Op);
+ }
+
+ NumLdStMoved += NumMove;
+ RetVal = true;
+ }
+ }
+ }
+
+ return RetVal;
+}
+
+bool
+ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
+ bool RetVal = false;
+
+ DenseMap<MachineInstr*, unsigned> MI2LocMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
+ SmallVector<unsigned, 4> LdBases;
+ SmallVector<unsigned, 4> StBases;
+
+ unsigned Loc = 0;
+ MachineBasicBlock::iterator MBBI = MBB->begin();
+ MachineBasicBlock::iterator E = MBB->end();
+ while (MBBI != E) {
+ for (; MBBI != E; ++MBBI) {
+ MachineInstr *MI = MBBI;
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.isCall() || TID.isTerminator()) {
+ // Stop at barriers.
+ ++MBBI;
+ break;
+ }
+
+ MI2LocMap[MI] = Loc++;
+ if (!isMemoryOp(MI))
+ continue;
+ unsigned PredReg = 0;
+ if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
+ continue;
+
+ int Opcode = MI->getOpcode();
+ bool isLd = Opcode == ARM::LDR ||
+ Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ unsigned Base = MI->getOperand(1).getReg();
+ int Offset = getMemoryOpOffset(MI);
+
+ bool StopHere = false;
+ if (isLd) {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2LdsMap.find(Base);
+ if (BI != Base2LdsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2LdsMap[Base] = MIs;
+ LdBases.push_back(Base);
+ }
+ } else {
+ DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
+ Base2StsMap.find(Base);
+ if (BI != Base2StsMap.end()) {
+ for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
+ if (Offset == getMemoryOpOffset(BI->second[i])) {
+ StopHere = true;
+ break;
+ }
+ }
+ if (!StopHere)
+ BI->second.push_back(MI);
+ } else {
+ SmallVector<MachineInstr*, 4> MIs;
+ MIs.push_back(MI);
+ Base2StsMap[Base] = MIs;
+ StBases.push_back(Base);
+ }
+ }
+
+ if (StopHere) {
+ // Found a duplicate (a base+offset combination that's seen earlier). Backtrack.
+ --Loc;
+ break;
+ }
+ }
+
+ // Re-schedule loads.
+ for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
+ unsigned Base = LdBases[i];
+ SmallVector<MachineInstr*, 4> &Lds = Base2LdsMap[Base];
+ if (Lds.size() > 1)
+ RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
+ }
+
+ // Re-schedule stores.
+ for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
+ unsigned Base = StBases[i];
+ SmallVector<MachineInstr*, 4> &Sts = Base2StsMap[Base];
+ if (Sts.size() > 1)
+ RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
+ }
+
+ if (MBBI != E) {
+ Base2LdsMap.clear();
+ Base2StsMap.clear();
+ LdBases.clear();
+ StBases.clear();
+ }
+ }
+
+ return RetVal;
+}
+
+
+/// createARMLoadStoreOptimizationPass - returns an instance of the load / store
+/// optimization pass.
+FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
+ if (PreAlloc)
+ return new ARMPreAllocLoadStoreOpt();
+ return new ARMLoadStoreOpt();
+}
Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp?rev=73291&r1=73290&r2=73291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.cpp Sat Jun 13 04:12:55 2009
@@ -23,6 +23,9 @@
#include "llvm/Target/TargetOptions.h"
using namespace llvm;
+static cl::opt<bool>
+EnablePreLdStOpti("arm-pre-alloc-loadstore-opti", cl::Hidden,
+ cl::desc("Enable pre-regalloc load store optimization pass"));
static cl::opt<bool> DisableLdStOpti("disable-arm-loadstore-opti", cl::Hidden,
cl::desc("Disable load store optimization pass"));
static cl::opt<bool> DisableIfConversion("disable-arm-if-conversion",cl::Hidden,
@@ -144,6 +147,16 @@
return false;
}
+bool ARMTargetMachine::addPreRegAlloc(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel) {
+ if (!EnablePreLdStOpti)
+ return false;
+ // FIXME: temporarily disabling load / store optimization pass for Thumb mode.
+ if (OptLevel != CodeGenOpt::None && !DisableLdStOpti && !Subtarget.isThumb())
+ PM.add(createARMLoadStoreOptimizationPass(true));
+ return true;
+}
+
bool ARMTargetMachine::addPreEmitPass(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// FIXME: temporarily disabling load / store optimization pass for Thumb mode.
Modified: llvm/trunk/lib/Target/ARM/ARMTargetMachine.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMTargetMachine.h?rev=73291&r1=73290&r2=73291&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMTargetMachine.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMTargetMachine.h Sat Jun 13 04:12:55 2009
@@ -71,6 +71,7 @@
// Pass Pipeline Configuration
virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
+ virtual bool addPreRegAlloc(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel);
virtual bool addAssemblyEmitter(PassManagerBase &PM,
CodeGenOpt::Level OptLevel,
Added: llvm/trunk/test/CodeGen/ARM/stm.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/stm.ll?rev=73291&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/stm.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/stm.ll Sat Jun 13 04:12:55 2009
@@ -0,0 +1,13 @@
+; RUN: llvm-as < %s | llc -mtriple=arm-apple-darwin -mattr=+v6,+vfp2 -arm-pre-alloc-loadstore-opti | grep stm | count 2
+
+@"\01LC" = internal constant [32 x i8] c"Boolean Not: %d %d %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals" ; <[32 x i8]*> [#uses=1]
+@"\01LC1" = internal constant [26 x i8] c"Bitwise Not: %d %d %d %d\0A\00", section "__TEXT,__cstring,cstring_literals" ; <[26 x i8]*> [#uses=1]
+
+declare i32 @printf(i8* nocapture, ...) nounwind
+
+define i32 @main() nounwind {
+entry:
+ %0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([26 x i8]* @"\01LC1", i32 0, i32 0), i32 -2, i32 -3, i32 2, i32 -6) nounwind ; <i32> [#uses=0]
+ %1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr ([32 x i8]* @"\01LC", i32 0, i32 0), i32 0, i32 1, i32 0, i32 1, i32 0, i32 1) nounwind ; <i32> [#uses=0]
+ ret i32 0
+}
More information about the llvm-commits
mailing list