[llvm] r299379 - [CodeGenPrep] move aarch64-type-promotion to CGP
Jun Bum Lim via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 3 12:20:08 PDT 2017
Author: junbuml
Date: Mon Apr 3 14:20:07 2017
New Revision: 299379
URL: http://llvm.org/viewvc/llvm-project?rev=299379&view=rev
Log:
[CodeGenPrep] move aarch64-type-promotion to CGP
Summary:
Move the aarch64-type-promotion pass within the existing type promotion framework in CGP.
This change also support forking sexts when a new sext is required for promotion.
Note that change is based on D27853 and I am submitting this out early to provide a better idea on D27853.
Reviewers: jmolloy, mcrosier, javed.absar, qcolombet
Reviewed By: qcolombet
Subscribers: llvm-commits, aemerson, rengolin, mcrosier
Differential Revision: https://reviews.llvm.org/D28680
Added:
llvm/trunk/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll
Modified:
llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h?rev=299379&r1=299378&r2=299379&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfo.h Mon Apr 3 14:20:07 2017
@@ -526,6 +526,12 @@ public:
/// \return The width of the largest scalar or vector register type.
unsigned getRegisterBitWidth(bool Vector) const;
+ /// \return True if it should be considered for address type promotion.
+ /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
+ /// profitable without finding other extensions fed by the same input.
+ bool shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
+
/// \return The size of a cache line in bytes.
unsigned getCacheLineSize() const;
@@ -800,6 +806,8 @@ public:
Type *Ty) = 0;
virtual unsigned getNumberOfRegisters(bool Vector) = 0;
virtual unsigned getRegisterBitWidth(bool Vector) = 0;
+ virtual bool shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
virtual unsigned getCacheLineSize() = 0;
virtual unsigned getPrefetchDistance() = 0;
virtual unsigned getMinPrefetchStride() = 0;
@@ -1026,7 +1034,11 @@ public:
unsigned getRegisterBitWidth(bool Vector) override {
return Impl.getRegisterBitWidth(Vector);
}
-
+ bool shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
+ return Impl.shouldConsiderAddressTypePromotion(
+ I, AllowPromotionWithoutCommonHeader);
+ }
unsigned getCacheLineSize() override {
return Impl.getCacheLineSize();
}
Modified: llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h?rev=299379&r1=299378&r2=299379&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h (original)
+++ llvm/trunk/include/llvm/Analysis/TargetTransformInfoImpl.h Mon Apr 3 14:20:07 2017
@@ -303,6 +303,13 @@ public:
unsigned getRegisterBitWidth(bool Vector) { return 32; }
+ bool
+ shouldConsiderAddressTypePromotion(const Instruction &I,
+ bool &AllowPromotionWithoutCommonHeader) {
+ AllowPromotionWithoutCommonHeader = false;
+ return false;
+ }
+
unsigned getCacheLineSize() { return 0; }
unsigned getPrefetchDistance() { return 0; }
Modified: llvm/trunk/lib/Analysis/TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/TargetTransformInfo.cpp?rev=299379&r1=299378&r2=299379&view=diff
==============================================================================
--- llvm/trunk/lib/Analysis/TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Analysis/TargetTransformInfo.cpp Mon Apr 3 14:20:07 2017
@@ -269,6 +269,12 @@ unsigned TargetTransformInfo::getRegiste
return TTIImpl->getRegisterBitWidth(Vector);
}
+bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
+ return TTIImpl->shouldConsiderAddressTypePromotion(
+ I, AllowPromotionWithoutCommonHeader);
+}
+
unsigned TargetTransformInfo::getCacheLineSize() const {
return TTIImpl->getCacheLineSize();
}
Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=299379&r1=299378&r2=299379&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Mon Apr 3 14:20:07 2017
@@ -138,10 +138,17 @@ static cl::opt<bool> ForceSplitStore(
"force-split-store", cl::Hidden, cl::init(false),
cl::desc("Force store splitting no matter what the target query says."));
+static cl::opt<bool>
+EnableTypePromotionMerge("cgp-type-promotion-merge", cl::Hidden,
+ cl::desc("Enable merging of redundant sexts when one is dominating"
+ " the other."), cl::init(true));
+
namespace {
typedef SmallPtrSet<Instruction *, 16> SetOfInstrs;
typedef PointerIntPair<Type *, 1, bool> TypeIsSExt;
typedef DenseMap<Instruction *, TypeIsSExt> InstrToOrigTy;
+typedef SmallVector<Instruction *, 16> SExts;
+typedef DenseMap<Value *, SExts> ValueToSExts;
class TypePromotionTransaction;
class CodeGenPrepare : public FunctionPass {
@@ -170,6 +177,15 @@ class TypePromotionTransaction;
/// promotion for the current function.
InstrToOrigTy PromotedInsts;
+ /// Keep track of instructions removed during promotion.
+ SetOfInstrs RemovedInsts;
+
+ /// Keep track of sext chains based on their initial value.
+ DenseMap<Value *, Instruction *> SeenChainsForSExt;
+
+ /// Keep track of SExt promoted.
+ ValueToSExts ValToSExtendedUses;
+
/// True if CFG is modified in any way.
bool ModifiedDT;
@@ -211,7 +227,7 @@ class TypePromotionTransaction;
Type *AccessTy, unsigned AS);
bool optimizeInlineAsmInst(CallInst *CS);
bool optimizeCallInst(CallInst *CI, bool& ModifiedDT);
- bool moveExtToFormExtLoad(Instruction *&I);
+ bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *I);
bool optimizeSelectInst(SelectInst *SI);
@@ -226,6 +242,12 @@ class TypePromotionTransaction;
const SmallVectorImpl<Instruction *> &Exts,
SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
unsigned CreatedInstsCost = 0);
+ bool mergeSExts(Function &F);
+ bool performAddressTypePromotion(
+ Instruction *&Inst,
+ bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
bool splitBranchCondition(Function &F);
bool simplifyOffsetableRelocate(Instruction &I);
bool splitIndirectCriticalEdges(Function &F);
@@ -310,6 +332,9 @@ bool CodeGenPrepare::runOnFunction(Funct
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
+ SeenChainsForSExt.clear();
+ ValToSExtendedUses.clear();
+ RemovedInsts.clear();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -319,6 +344,13 @@ bool CodeGenPrepare::runOnFunction(Funct
if (ModifiedDTOnIteration)
break;
}
+ if (EnableTypePromotionMerge && !ValToSExtendedUses.empty())
+ MadeChange |= mergeSExts(F);
+
+ // Really free removed instructions during promotion.
+ for (Instruction *I : RemovedInsts)
+ delete I;
+
EverMadeChange |= MadeChange;
}
@@ -2793,25 +2825,30 @@ class TypePromotionTransaction {
OperandsHider Hider;
/// Keep track of the uses replaced, if any.
UsesReplacer *Replacer;
+ /// Keep track of instructions removed.
+ SetOfInstrs &RemovedInsts;
public:
/// \brief Remove all reference of \p Inst and optinally replace all its
/// uses with New.
+ /// \p RemovedInsts Keep track of the instructions removed by this Action.
/// \pre If !Inst->use_empty(), then New != nullptr
- InstructionRemover(Instruction *Inst, Value *New = nullptr)
+ InstructionRemover(Instruction *Inst, SetOfInstrs &RemovedInsts,
+ Value *New = nullptr)
: TypePromotionAction(Inst), Inserter(Inst), Hider(Inst),
- Replacer(nullptr) {
+ Replacer(nullptr), RemovedInsts(RemovedInsts) {
if (New)
Replacer = new UsesReplacer(Inst, New);
DEBUG(dbgs() << "Do: InstructionRemover: " << *Inst << "\n");
+ RemovedInsts.insert(Inst);
+ /// The instructions removed here will be freed after completing
+ /// optimizeBlock() for all blocks as we need to keep track of the
+ /// removed instructions during promotion.
Inst->removeFromParent();
}
~InstructionRemover() override { delete Replacer; }
- /// \brief Really remove the instruction.
- void commit() override { delete Inst; }
-
/// \brief Resurrect the instruction and reassign it to the proper uses if
/// new value was provided when build this action.
void undo() override {
@@ -2820,6 +2857,7 @@ class TypePromotionTransaction {
if (Replacer)
Replacer->undo();
Hider.undo();
+ RemovedInsts.erase(Inst);
}
};
@@ -2828,6 +2866,10 @@ public:
/// The restoration point is a pointer to an action instead of an iterator
/// because the iterator may be invalidated but not the pointer.
typedef const TypePromotionAction *ConstRestorationPt;
+
+ TypePromotionTransaction(SetOfInstrs &RemovedInsts)
+ : RemovedInsts(RemovedInsts) {}
+
/// Advocate every changes made in that transaction.
void commit();
/// Undo all the changes made after the given point.
@@ -2859,6 +2901,7 @@ private:
/// The ordered list of actions made so far.
SmallVector<std::unique_ptr<TypePromotionAction>, 16> Actions;
typedef SmallVectorImpl<std::unique_ptr<TypePromotionAction>>::iterator CommitPt;
+ SetOfInstrs &RemovedInsts;
};
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
@@ -2870,7 +2913,8 @@ void TypePromotionTransaction::setOperan
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- make_unique<TypePromotionTransaction::InstructionRemover>(Inst, NewVal));
+ make_unique<TypePromotionTransaction::InstructionRemover>(Inst,
+ RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
@@ -4097,7 +4141,7 @@ bool CodeGenPrepare::optimizeMemoryInst(
bool IsNumUsesConsensusValid = false;
SmallVector<Instruction*, 16> AddrModeInsts;
ExtAddrMode AddrMode;
- TypePromotionTransaction TPT;
+ TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
while (!worklist.empty()) {
@@ -4492,20 +4536,6 @@ static bool hasSameExtUse(Value *Val, co
/// them.
///
/// \return true if some promotion happened, false otherwise.
-///
-/// Example:
-/// \code
-/// %ld = load i32* %addr
-/// %add = add nuw i32 %ld, 4
-/// %zext = zext i32 %add to i64
-/// \endcode
-/// =>
-/// \code
-/// %ld = load i32* %addr
-/// %zext = zext i32 %ld to i64
-/// %add = add nuw i64 %zext, 4
-/// \endcode
-/// Thanks to the promotion, we can match zext(load i32*) to i64.
bool CodeGenPrepare::tryToPromoteExts(
TypePromotionTransaction &TPT, const SmallVectorImpl<Instruction *> &Exts,
SmallVectorImpl<Instruction *> &ProfitablyMovedExts,
@@ -4601,6 +4631,46 @@ bool CodeGenPrepare::tryToPromoteExts(
return Promoted;
}
+/// Merging redundant sexts when one is dominating the other.
+bool CodeGenPrepare::mergeSExts(Function &F) {
+ DominatorTree DT(F);
+ bool Changed = false;
+ for (auto &Entry : ValToSExtendedUses) {
+ SExts &Insts = Entry.second;
+ SExts CurPts;
+ for (Instruction *Inst : Insts) {
+ if (RemovedInsts.count(Inst) || !isa<SExtInst>(Inst) ||
+ Inst->getOperand(0) != Entry.first)
+ continue;
+ bool inserted = false;
+ for (auto &Pt : CurPts) {
+ if (DT.dominates(Inst, Pt)) {
+ Pt->replaceAllUsesWith(Inst);
+ RemovedInsts.insert(Pt);
+ Pt->removeFromParent();
+ Pt = Inst;
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!DT.dominates(Pt, Inst))
+ // Give up if we need to merge in a common dominator as the
+ // expermients show it is not profitable.
+ continue;
+ Inst->replaceAllUsesWith(Pt);
+ RemovedInsts.insert(Inst);
+ Inst->removeFromParent();
+ inserted = true;
+ Changed = true;
+ break;
+ }
+ if (!inserted)
+ CurPts.push_back(Inst);
+ }
+ }
+ return Changed;
+}
+
/// Return true, if an ext(load) can be formed from an extension in
/// \p MovedExts.
bool CodeGenPrepare::canFormExtLd(
@@ -4646,49 +4716,163 @@ bool CodeGenPrepare::canFormExtLd(
/// Move a zext or sext fed by a load into the same basic block as the load,
/// unless conditions are unfavorable. This allows SelectionDAG to fold the
/// extend into the load.
-/// \p I[in/out] the extension may be modified during the process if some
-/// promotions apply.
///
-bool CodeGenPrepare::moveExtToFormExtLoad(Instruction *&I) {
- // ExtLoad formation infrastructure requires TLI to be effective.
+/// E.g.,
+/// \code
+/// %ld = load i32* %addr
+/// %add = add nuw i32 %ld, 4
+/// %zext = zext i32 %add to i64
+// \endcode
+/// =>
+/// \code
+/// %ld = load i32* %addr
+/// %zext = zext i32 %ld to i64
+/// %add = add nuw i64 %zext, 4
+/// \encode
+/// Note that the promotion in %add to i64 is done in tryToPromoteExts(), which
+/// allow us to match zext(load i32*) to i64.
+///
+/// Also, try to promote the computations used to obtain a sign extended
+/// value used into memory accesses.
+/// E.g.,
+/// \code
+/// a = add nsw i32 b, 3
+/// d = sext i32 a to i64
+/// e = getelementptr ..., i64 d
+/// \endcode
+/// =>
+/// \code
+/// f = sext i32 b to i64
+/// a = add nsw i64 f, 3
+/// e = getelementptr ..., i64 a
+/// \endcode
+///
+/// \p Inst[in/out] the extension may be modified during the process if some
+/// promotions apply.
+bool CodeGenPrepare::optimizeExt(Instruction *&Inst) {
+ // ExtLoad formation and address type promotion infrastructure requires TLI to
+ // be effective.
if (!TLI)
return false;
- // Try to promote a chain of computation if it allows to form
- // an extended load.
- TypePromotionTransaction TPT;
+ bool AllowPromotionWithoutCommonHeader = false;
+ /// See if it is an interesting sext operations for the address type
+ /// promotion before trying to promote it, e.g., the ones with the right
+ /// type and used in memory accesses.
+ bool ATPConsiderable = TTI->shouldConsiderAddressTypePromotion(
+ *Inst, AllowPromotionWithoutCommonHeader);
+ TypePromotionTransaction TPT(RemovedInsts);
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
SmallVector<Instruction *, 1> Exts;
- SmallVector<Instruction *, 2> LastMovedExts;
- Exts.push_back(I);
+ SmallVector<Instruction *, 2> SpeculativelyMovedExts;
+ Exts.push_back(Inst);
- bool HasPromoted = tryToPromoteExts(TPT, Exts, LastMovedExts);
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, SpeculativelyMovedExts);
// Look for a load being extended.
LoadInst *LI = nullptr;
- Instruction *OldExt = I;
- if (!canFormExtLd(LastMovedExts, LI, I, HasPromoted)) {
- I = OldExt;
- TPT.rollback(LastKnownGood);
+ Instruction *ExtFedByLoad;
+
+ // Try to promote a chain of computation if it allows to form an extended
+ // load.
+ if (canFormExtLd(SpeculativelyMovedExts, LI, ExtFedByLoad, HasPromoted)) {
+ assert(LI && ExtFedByLoad && "Expect a valid load and extension");
+ TPT.commit();
+ // Move the extend into the same block as the load
+ ExtFedByLoad->removeFromParent();
+ ExtFedByLoad->insertAfter(LI);
+ // CGP does not check if the zext would be speculatively executed when moved
+ // to the same basic block as the load. Preserving its original location
+ // would pessimize the debugging experience, as well as negatively impact
+ // the quality of sample pgo. We don't want to use "line 0" as that has a
+ // size cost in the line-table section and logically the zext can be seen as
+ // part of the load. Therefore we conservatively reuse the same debug
+ // location for the load and the zext.
+ ExtFedByLoad->setDebugLoc(LI->getDebugLoc());
+ ++NumExtsMoved;
+ Inst = ExtFedByLoad;
+ return true;
+ }
+
+ // Continue promoting SExts if known as considerable depending on targets.
+ if (ATPConsiderable &&
+ performAddressTypePromotion(Inst, AllowPromotionWithoutCommonHeader,
+ HasPromoted, TPT, SpeculativelyMovedExts))
+ return true;
+
+ TPT.rollback(LastKnownGood);
+ return false;
+}
+
+// Perform address type promotion if doing so is profitable.
+// If AllowPromotionWithoutCommonHeader == false, we should find other sext
+// instructions that sign extended the same initial value. However, if
+// AllowPromotionWithoutCommonHeader == true, we expect promoting the
+// extension is just profitable.
+bool CodeGenPrepare::performAddressTypePromotion(
+ Instruction *&Inst, bool AllowPromotionWithoutCommonHeader,
+ bool HasPromoted, TypePromotionTransaction &TPT,
+ SmallVectorImpl<Instruction *> &SpeculativelyMovedExts) {
+ bool Promoted = false;
+ SmallPtrSet<Instruction *, 1> UnhandledExts;
+ bool AllSeenFirst = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ DenseMap<Value *, Instruction *>::iterator AlreadySeen =
+ SeenChainsForSExt.find(HeadOfChain);
+ // If there is an unhandled SExt which has the same header, try to promote
+ // it as well.
+ if (AlreadySeen != SeenChainsForSExt.end()) {
+ if (AlreadySeen->second != nullptr)
+ UnhandledExts.insert(AlreadySeen->second);
+ AllSeenFirst = false;
+ }
+ }
+
+ if (!AllSeenFirst || (AllowPromotionWithoutCommonHeader &&
+ SpeculativelyMovedExts.size() == 1)) {
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ // Update Inst as promotion happen.
+ Inst = SpeculativelyMovedExts.pop_back_val();
+ } else {
+ // This is the first chain visited from the header, keep the current chain
+ // as unhandled. Defer to promote this until we encounter another SExt
+ // chain derived from the same header.
+ for (auto I : SpeculativelyMovedExts) {
+ Value *HeadOfChain = I->getOperand(0);
+ SeenChainsForSExt[HeadOfChain] = Inst;
+ }
return false;
}
- // Move the extend into the same block as the load, so that SelectionDAG
- // can fold it.
- TPT.commit();
- I->removeFromParent();
- I->insertAfter(LI);
- // CGP does not check if the zext would be speculatively executed when moved
- // to the same basic block as the load. Preserving its original location would
- // pessimize the debugging experience, as well as negatively impact the
- // quality of sample pgo. We don't want to use "line 0" as that has a
- // size cost in the line-table section and logically the zext can be seen as
- // part of the load. Therefore we conservatively reuse the same debug location
- // for the load and the zext.
- I->setDebugLoc(LI->getDebugLoc());
- ++NumExtsMoved;
- return true;
+ if (!AllSeenFirst && !UnhandledExts.empty())
+ for (auto VisitedSExt : UnhandledExts) {
+ if (RemovedInsts.count(VisitedSExt))
+ continue;
+ TypePromotionTransaction TPT(RemovedInsts);
+ SmallVector<Instruction *, 1> Exts;
+ SmallVector<Instruction *, 2> Chains;
+ Exts.push_back(VisitedSExt);
+ bool HasPromoted = tryToPromoteExts(TPT, Exts, Chains);
+ TPT.commit();
+ if (HasPromoted)
+ Promoted = true;
+ for (auto I : Chains) {
+ Value *HeadOfChain = I->getOperand(0);
+ // Mark this as handled.
+ SeenChainsForSExt[HeadOfChain] = nullptr;
+ ValToSExtendedUses[HeadOfChain].push_back(I);
+ }
+ }
+ return Promoted;
}
bool CodeGenPrepare::optimizeExtUses(Instruction *I) {
@@ -5802,7 +5986,7 @@ bool CodeGenPrepare::optimizeInst(Instru
TargetLowering::TypeExpandInteger) {
return SinkCast(CI);
} else {
- bool MadeChange = moveExtToFormExtLoad(I);
+ bool MadeChange = optimizeExt(I);
return MadeChange | optimizeExtUses(I);
}
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=299379&r1=299378&r2=299379&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Mon Apr 3 14:20:07 2017
@@ -118,7 +118,7 @@ EnableA53Fix835769("aarch64-fix-cortex-a
static cl::opt<bool>
EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
cl::desc("Enable the type promotion pass"),
- cl::init(true));
+ cl::init(false));
static cl::opt<bool>
EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=299379&r1=299378&r2=299379&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Mon Apr 3 14:20:07 2017
@@ -626,6 +626,38 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(
return true;
}
+/// See if \p I should be considered for address type promotion. We check if \p
+/// I is a sext with right type and used in memory accesses. If it used in a
+/// "complex" getelementptr, we allow it to be promoted without finding other
+/// sext instructions that sign extended the same initial value. A getelementptr
+/// is considered as "complex" if it has more than 2 operands.
+bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
+ const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
+ bool Considerable = false;
+ AllowPromotionWithoutCommonHeader = false;
+ if (!isa<SExtInst>(&I))
+ return false;
+ Type *ConsideredSExtType =
+ Type::getInt64Ty(I.getParent()->getParent()->getContext());
+ if (I.getType() != ConsideredSExtType)
+ return false;
+ // See if the sext is the one with the right type and used in at least one
+ // GetElementPtrInst.
+ for (const User *U : I.users()) {
+ if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
+ Considerable = true;
+ // A getelementptr is considered as "complex" if it has more than 2
+ // operands. We will promote a SExt used in such complex GEP as we
+ // expect some computation to be merged if they are done on 64 bits.
+ if (GEPInst->getNumOperands() > 2) {
+ AllowPromotionWithoutCommonHeader = true;
+ break;
+ }
+ }
+ }
+ return Considerable;
+}
+
unsigned AArch64TTIImpl::getCacheLineSize() {
return ST->getCacheLineSize();
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h?rev=299379&r1=299378&r2=299379&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.h Mon Apr 3 14:20:07 2017
@@ -121,6 +121,10 @@ public:
ArrayRef<unsigned> Indices, unsigned Alignment,
unsigned AddressSpace);
+ bool
+ shouldConsiderAddressTypePromotion(const Instruction &I,
+ bool &AllowPromotionWithoutCommonHeader);
+
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
Added: llvm/trunk/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll?rev=299379&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/aarch64-codegen-prepare-atp.ll Mon Apr 3 14:20:07 2017
@@ -0,0 +1,68 @@
+; RUN: opt -codegenprepare < %s -S | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.match_state = type { i64, i64 }
+
+; %add is also promoted by forking an extra sext.
+define void @promoteTwoOne(i32 %i, i32 %j, i64* %P1, i64* %P2 ) {
+; CHECK-LABEL: @promoteTwoOne
+; CHECK-LABEL: entry:
+; CHECK: %[[SEXT1:.*]] = sext i32 %i to i64
+; CHECK: %[[SEXT2:.*]] = sext i32 %j to i64
+; CHECK: %add = add nsw i64 %[[SEXT1]], %[[SEXT2]]
+entry:
+ %add = add nsw i32 %i, %j
+ %s = sext i32 %add to i64
+ %addr1 = getelementptr inbounds i64, i64* %P1, i64 %s
+ store i64 %s, i64* %addr1
+ %s2 = sext i32 %i to i64
+ %addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2
+ store i64 %s2, i64* %addr2
+ ret void
+}
+
+; Both %add1 and %add2 are promoted by forking extra sexts.
+define void @promoteTwoTwo(i32 %i, i32 %j, i32 %k, i64* %P1, i64* %P2) {
+; CHECK-LABEL: @promoteTwoTwo
+; CHECK-LABEL:entry:
+; CHECK: %[[SEXT1:.*]] = sext i32 %j to i64
+; CHECK: %[[SEXT2:.*]] = sext i32 %i to i64
+; CHECK: %add1 = add nsw i64 %[[SEXT1]], %[[SEXT2]]
+; CHECK: %[[SEXT3:.*]] = sext i32 %k to i64
+; CHECK: %add2 = add nsw i64 %[[SEXT1]], %[[SEXT3]]
+entry:
+ %add1 = add nsw i32 %j, %i
+ %s = sext i32 %add1 to i64
+ %addr1 = getelementptr inbounds i64, i64* %P1, i64 %s
+ store i64 %s, i64* %addr1
+ %add2 = add nsw i32 %j, %k
+ %s2 = sext i32 %add2 to i64
+ %addr2 = getelementptr inbounds i64, i64* %P2, i64 %s2
+ store i64 %s2, i64* %addr2
+ ret void
+}
+
+define i64 @promoteGEPSunk(i1 %cond, i64* %base, i32 %i) {
+; CHECK-LABEL: @promoteGEPSunk
+; CHECK-LABEL: entry:
+; CHECK: %[[SEXT:.*]] = sext i32 %i to i64
+; CHECK: %add = add nsw i64 %[[SEXT]], 1
+; CHECK: %add2 = add nsw i64 %[[SEXT]], 2
+entry:
+ %add = add nsw i32 %i, 1
+ %s = sext i32 %add to i64
+ %addr = getelementptr inbounds i64, i64* %base, i64 %s
+ %add2 = add nsw i32 %i, 2
+ %s2 = sext i32 %add2 to i64
+ %addr2 = getelementptr inbounds i64, i64* %base, i64 %s2
+ br i1 %cond, label %if.then, label %if.then2
+if.then:
+ %v = load i64, i64* %addr
+ %v2 = load i64, i64* %addr2
+ %r = add i64 %v, %v2
+ ret i64 %r
+if.then2:
+ ret i64 0;
+}
Modified: llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll?rev=299379&r1=299378&r2=299379&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-addr-type-promotion.ll Mon Apr 3 14:20:07 2017
@@ -10,14 +10,17 @@ define zeroext i8 @fullGtU(i32 %i1, i32
; CHECK: fullGtU
; CHECK: adrp [[PAGE:x[0-9]+]], _block at GOTPAGE
; CHECK: ldr [[ADDR:x[0-9]+]], {{\[}}[[PAGE]], _block at GOTPAGEOFF]
+; CHECK: sxtw [[I1:x[0-9]+]], w0
+; CHECK: sxtw [[I2:x[0-9]+]], w1
; CHECK-NEXT: ldr [[BLOCKBASE:x[0-9]+]], {{\[}}[[ADDR]]]
-; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], w0, sxtw]
-; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], w1, sxtw]
+; CHECK-NEXT: ldrb [[BLOCKVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I1]]]
+; CHECK-NEXT: ldrb [[BLOCKVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE]], [[I2]]]
+
; CHECK-NEXT: cmp [[BLOCKVAL1]], [[BLOCKVAL2]]
; CHECK-NEXT: b.ne
; Next BB
-; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], w1, sxtw
-; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], w0, sxtw
+; CHECK: add [[BLOCKBASE2:x[0-9]+]], [[BLOCKBASE]], [[I2]]
+; CHECK-NEXT: add [[BLOCKBASE1:x[0-9]+]], [[BLOCKBASE]], [[I1]]
; CHECK-NEXT: ldrb [[LOADEDVAL1:w[0-9]+]], {{\[}}[[BLOCKBASE1]], #1]
; CHECK-NEXT: ldrb [[LOADEDVAL2:w[0-9]+]], {{\[}}[[BLOCKBASE2]], #1]
; CHECK-NEXT: cmp [[LOADEDVAL1]], [[LOADEDVAL2]]
More information about the llvm-commits
mailing list