[llvm] r262670 - [InstCombine] Combine A->B->A BitCast
Guozhi Wei via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 3 15:21:39 PST 2016
Author: carrot
Date: Thu Mar 3 17:21:38 2016
New Revision: 262670
URL: http://llvm.org/viewvc/llvm-project?rev=262670&view=rev
Log:
[InstCombine] Combine A->B->A BitCast
This patch enhances InstCombine to handle following case:
A -> B bitcast
PHI
B -> A bitcast
Added:
llvm/trunk/test/Transforms/InstCombine/pr25342.ll
Modified:
llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp?rev=262670&r1=262669&r2=262670&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCasts.cpp Thu Mar 3 17:21:38 2016
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "InstCombineInternal.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/PatternMatch.h"
@@ -1786,6 +1787,103 @@ static Instruction *canonicalizeBitCastE
return ExtractElementInst::Create(NewBC, ExtElt->getIndexOperand());
}
+/// This function handles following case
+///
+/// A -> B cast
+/// PHI
+/// B -> A cast
+///
+/// All the related PHI nodes can be replaced by new PHI nodes with type A.
+/// The uses of \p CI can be changed to the new PHI node corresponding to \p PN.
+Instruction *InstCombiner::optimizeBitCastFromPhi(CastInst &CI, PHINode *PN) {
+ Value *Src = CI.getOperand(0);
+ Type *SrcTy = Src->getType(); // Type B
+ Type *DestTy = CI.getType(); // Type A
+
+ SmallVector<PHINode *, 4> Worklist;
+ SmallSetVector<PHINode *, 4> OldPhiNodes;
+
+ // Find all of the A->B casts and PHI nodes.
+ // We need to inpect all related PHI nodes, but PHIs can be cyclic, so
+ // OldPhiNodes is used to track all known PHI nodes, before adding a new
+ // PHI to Worklist, it is checked against and added to OldPhiNodes first.
+ Worklist.push_back(PN);
+ OldPhiNodes.insert(PN);
+ while (!Worklist.empty()) {
+ auto *OldPN = Worklist.pop_back_val();
+ for (Value *IncValue : OldPN->incoming_values()) {
+ if (isa<Constant>(IncValue))
+ continue;
+
+ if (isa<LoadInst>(IncValue)) {
+ if (IncValue->hasOneUse())
+ continue;
+ // If a LoadInst has more than one use, changing the type of loaded
+ // value may create another bitcast.
+ return nullptr;
+ }
+
+ auto *PNode = dyn_cast<PHINode>(IncValue);
+ if (PNode) {
+ if (OldPhiNodes.insert(PNode))
+ Worklist.push_back(PNode);
+ continue;
+ }
+
+ auto *BCI = dyn_cast<BitCastInst>(IncValue);
+ // We can't handle other instructions.
+ if (!BCI)
+ return nullptr;
+
+ // Verify it's a A->B cast.
+ Type *TyA = BCI->getOperand(0)->getType();
+ Type *TyB = BCI->getType();
+ if (TyA != DestTy || TyB != SrcTy)
+ return nullptr;
+ }
+ }
+
+ // For each old PHI node, create a corresponding new PHI node with a type A.
+ SmallDenseMap<PHINode *, PHINode *> NewPNodes;
+ for (auto *OldPN : OldPhiNodes) {
+ Builder->SetInsertPoint(OldPN);
+ PHINode *NewPN = Builder->CreatePHI(DestTy, OldPN->getNumOperands());
+ NewPNodes[OldPN] = NewPN;
+ }
+
+ // Fill in the operands of new PHI nodes.
+ for (auto *OldPN : OldPhiNodes) {
+ PHINode *NewPN = NewPNodes[OldPN];
+ for (unsigned j = 0, e = OldPN->getNumOperands(); j != e; ++j) {
+ Value *V = OldPN->getOperand(j);
+ Value *NewV = nullptr;
+ if (auto *C = dyn_cast<Constant>(V)) {
+ NewV = Builder->CreateBitCast(C, DestTy);
+ } else if (auto *LI = dyn_cast<LoadInst>(V)) {
+ Builder->SetInsertPoint(OldPN->getIncomingBlock(j)->getTerminator());
+ NewV = Builder->CreateBitCast(LI, DestTy);
+ } else if (auto *BCI = dyn_cast<BitCastInst>(V)) {
+ NewV = BCI->getOperand(0);
+ } else if (auto *PrevPN = dyn_cast<PHINode>(V)) {
+ NewV = NewPNodes[PrevPN];
+ }
+ assert(NewV);
+ NewPN->addIncoming(NewV, OldPN->getIncomingBlock(j));
+ }
+ }
+
+ // If there is a store with type B, change it to type A.
+ for (User *U : PN->users()) {
+ auto *SI = dyn_cast<StoreInst>(U);
+ if (SI && SI->getOperand(0) == PN) {
+ Builder->SetInsertPoint(SI);
+ SI->setOperand(0, Builder->CreateBitCast(NewPNodes[PN], SrcTy));
+ }
+ }
+
+ return replaceInstUsesWith(CI, NewPNodes[PN]);
+}
+
Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
// If the operands are integer typed then apply the integer transforms,
// otherwise just apply the common ones.
@@ -1902,6 +2000,11 @@ Instruction *InstCombiner::visitBitCast(
}
}
+ // Handle the A->B->A cast, and there is an intervening PHI node.
+ if (PHINode *PN = dyn_cast<PHINode>(Src))
+ if (Instruction *I = optimizeBitCastFromPhi(CI, PN))
+ return I;
+
if (Instruction *I = canonicalizeBitCastExtElt(CI, *this, DL))
return I;
Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h?rev=262670&r1=262669&r2=262670&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineInternal.h Thu Mar 3 17:21:38 2016
@@ -391,6 +391,7 @@ private:
Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN);
Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask);
Instruction *foldCastedBitwiseLogic(BinaryOperator &I);
+ Instruction *optimizeBitCastFromPhi(CastInst &CI, PHINode *PN);
public:
/// \brief Inserts an instruction \p New before instruction \p Old
Added: llvm/trunk/test/Transforms/InstCombine/pr25342.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/pr25342.ll?rev=262670&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/pr25342.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/pr25342.ll Thu Mar 3 17:21:38 2016
@@ -0,0 +1,93 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%"struct.std::complex" = type { { float, float } }
+ at dd = external global %"struct.std::complex", align 4
+ at dd2 = external global %"struct.std::complex", align 4
+
+define void @_Z3fooi(i32 signext %n) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %ldd.sroa.0.0 = phi i32 [ 0, %entry ], [ %5, %for.body ]
+ %ldd.sroa.6.0 = phi i32 [ 0, %entry ], [ %7, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i.0, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %0 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 0), align 4
+ %1 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1), align 4
+ %2 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 0), align 4
+ %3 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 1), align 4
+ %mul.i = fmul float %0, %2
+ %mul4.i = fmul float %1, %3
+ %sub.i = fsub float %mul.i, %mul4.i
+ %mul5.i = fmul float %1, %2
+ %mul6.i = fmul float %0, %3
+ %add.i4 = fadd float %mul5.i, %mul6.i
+ %4 = bitcast i32 %ldd.sroa.0.0 to float
+ %add.i = fadd float %sub.i, %4
+ %5 = bitcast float %add.i to i32
+ %6 = bitcast i32 %ldd.sroa.6.0 to float
+ %add4.i = fadd float %add.i4, %6
+ %7 = bitcast float %add4.i to i32
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ store i32 %ldd.sroa.0.0, i32* bitcast (%"struct.std::complex"* @dd to i32*), align 4
+ store i32 %ldd.sroa.6.0, i32* bitcast (float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1) to i32*), align 4
+ ret void
+
+; CHECK: phi float
+; CHECK: store float
+; CHECK-NOT: bitcast
+}
+
+
+define void @multi_phi(i32 signext %n) {
+entry:
+ br label %for.cond
+
+for.cond:
+ %ldd.sroa.0.0 = phi i32 [ 0, %entry ], [ %9, %odd.bb ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %odd.bb ]
+ %cmp = icmp slt i32 %i.0, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %0 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 0), align 4
+ %1 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1), align 4
+ %2 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 0), align 4
+ %3 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 1), align 4
+ %mul.i = fmul float %0, %2
+ %mul4.i = fmul float %1, %3
+ %sub.i = fsub float %mul.i, %mul4.i
+ %4 = bitcast i32 %ldd.sroa.0.0 to float
+ %add.i = fadd float %sub.i, %4
+ %5 = bitcast float %add.i to i32
+ %inc = add nsw i32 %i.0, 1
+ %bit0 = and i32 %inc, 1
+ %even = icmp slt i32 %bit0, 1
+ br i1 %even, label %even.bb, label %odd.bb
+
+even.bb:
+ %6 = bitcast i32 %5 to float
+ %7 = fadd float %sub.i, %6
+ %8 = bitcast float %7 to i32
+ br label %odd.bb
+
+odd.bb:
+ %9 = phi i32 [ %5, %for.body ], [ %8, %even.bb ]
+ br label %for.cond
+
+for.end:
+ store i32 %ldd.sroa.0.0, i32* bitcast (%"struct.std::complex"* @dd to i32*), align 4
+ ret void
+
+; CHECK-LABEL: @multi_phi(
+; CHECK: phi float
+; CHECK: store float
+; CHECK-NOT: bitcast
+}
More information about the llvm-commits
mailing list