[llvm] r214349 - [FastISel][AArch64] Add branch folding support for the XALU intrinsics.
Juergen Ributzka
juergen at apple.com
Wed Jul 30 15:04:34 PDT 2014
Author: ributzka
Date: Wed Jul 30 17:04:34 2014
New Revision: 214349
URL: http://llvm.org/viewvc/llvm-project?rev=214349&view=rev
Log:
[FastISel][AArch64] Add branch folding support for the XALU intrinsics.
This improves the code generation for the XALU intrinsics when the
condition is feeding a branch instruction.
This is related to <rdar://problem/17831117>.
Modified:
llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
Modified: llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp?rev=214349&r1=214348&r2=214349&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64FastISel.cpp Wed Jul 30 17:04:34 2014
@@ -122,6 +122,9 @@ private:
bool IsMemCpySmall(uint64_t Len, unsigned Alignment);
bool TryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len,
unsigned Alignment);
+ bool foldXALUIntrinsic(AArch64CC::CondCode &CC, const Instruction *I,
+ const Value *Cond);
+
// Emit functions.
bool EmitCmp(Value *Src1Value, Value *Src2Value, bool isZExt);
bool EmitLoad(MVT VT, unsigned &ResultReg, Address Addr,
@@ -768,10 +771,11 @@ bool AArch64FastISel::SelectBranch(const
MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)];
MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)];
+ AArch64CC::CondCode CC = AArch64CC::NE;
if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) {
if (CI->hasOneUse() && (CI->getParent() == I->getParent())) {
// We may not handle every CC for now.
- AArch64CC::CondCode CC = getCompareCC(CI->getPredicate());
+ CC = getCompareCC(CI->getPredicate());
if (CC == AArch64CC::AL)
return false;
@@ -814,7 +818,6 @@ bool AArch64FastISel::SelectBranch(const
.addImm(0)
.addImm(0);
- unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
@@ -834,6 +837,21 @@ bool AArch64FastISel::SelectBranch(const
.addMBB(Target);
FuncInfo.MBB->addSuccessor(Target);
return true;
+ } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) {
+ // Fake request the condition, otherwise the intrinsic might be completely
+ // optimized away.
+ unsigned CondReg = getRegForValue(BI->getCondition());
+ if (!CondReg)
+ return false;
+
+ // Emit the branch.
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc))
+ .addImm(CC)
+ .addMBB(TBB);
+ FuncInfo.MBB->addSuccessor(TBB);
+
+ FastEmitBranch(FBB, DbgLoc);
+ return true;
}
unsigned CondReg = getRegForValue(BI->getCondition());
@@ -853,7 +871,6 @@ bool AArch64FastISel::SelectBranch(const
.addImm(0)
.addImm(0);
- unsigned CC = AArch64CC::NE;
if (FuncInfo.MBB->isLayoutSuccessor(TBB)) {
std::swap(TBB, FBB);
CC = AArch64CC::EQ;
@@ -1445,6 +1462,63 @@ bool AArch64FastISel::TryEmitSmallMemCpy
return true;
}
+/// \brief Check if it is possible to fold the condition from the XALU intrinsic
+/// into the user. The condition code will only be updated on success.
+bool AArch64FastISel::foldXALUIntrinsic(AArch64CC::CondCode &CC,
+ const Instruction *I,
+ const Value *Cond) {
+ if (!isa<ExtractValueInst>(Cond))
+ return false;
+
+ const auto *EV = cast<ExtractValueInst>(Cond);
+ if (!isa<IntrinsicInst>(EV->getAggregateOperand()))
+ return false;
+
+ const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand());
+ MVT RetVT;
+ const Function *Callee = II->getCalledFunction();
+ Type *RetTy =
+ cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U);
+ if (!isTypeLegal(RetTy, RetVT))
+ return false;
+
+ if (RetVT != MVT::i32 && RetVT != MVT::i64)
+ return false;
+
+ AArch64CC::CondCode TmpCC;
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::sadd_with_overflow:
+ case Intrinsic::ssub_with_overflow: TmpCC = AArch64CC::VS; break;
+ case Intrinsic::uadd_with_overflow: TmpCC = AArch64CC::HS; break;
+ case Intrinsic::usub_with_overflow: TmpCC = AArch64CC::LO; break;
+ case Intrinsic::smul_with_overflow:
+ case Intrinsic::umul_with_overflow: TmpCC = AArch64CC::NE; break;
+ }
+
+ // Check if both instructions are in the same basic block.
+ if (II->getParent() != I->getParent())
+ return false;
+
+ // Make sure nothing is in the way
+ BasicBlock::const_iterator Start = I;
+ BasicBlock::const_iterator End = II;
+ for (auto Itr = std::prev(Start); Itr != End; --Itr) {
+ // We only expect extractvalue instructions between the intrinsic and the
+ // instruction to be selected.
+ if (!isa<ExtractValueInst>(Itr))
+ return false;
+
+ // Check that the extractvalue operand comes from the intrinsic.
+ const auto *EVI = cast<ExtractValueInst>(Itr);
+ if (EVI->getAggregateOperand() != II)
+ return false;
+ }
+
+ CC = TmpCC;
+ return true;
+}
+
bool AArch64FastISel::FastLowerIntrinsicCall(const IntrinsicInst *II) {
// FIXME: Handle more intrinsics.
switch (II->getIntrinsicID()) {
More information about the llvm-commits
mailing list