[llvm] r270113 - Recommit r255691 since PR26509 has been fixed.
Wei Mi via llvm-commits
llvm-commits at lists.llvm.org
Thu May 19 15:02:00 PDT 2016
Ok, will do that. Thanks.
Wei.
On Thu, May 19, 2016 at 2:36 PM, Quentin Colombet <qcolombet at apple.com> wrote:
> Hi Wei,
>
> For future reference, this is nice to repeat the original commit message.
> That avoids digging into logs.
>
> Cheers,
> -Quentin
>> On May 19, 2016, at 1:38 PM, Wei Mi via llvm-commits <llvm-commits at lists.llvm.org> wrote:
>>
>> Author: wmi
>> Date: Thu May 19 15:38:03 2016
>> New Revision: 270113
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=270113&view=rev
>> Log:
>> Recommit r255691 since PR26509 has been fixed.
>>
>> Added:
>> llvm/trunk/test/Transforms/LoopVectorize/X86/reg-usage.ll
>> Modified:
>> llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>> llvm/trunk/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
>>
>> Modified: llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp?rev=270113&r1=270112&r2=270113&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp (original)
>> +++ llvm/trunk/lib/Transforms/Vectorize/LoopVectorize.cpp Thu May 19 15:38:03 2016
>> @@ -1518,15 +1518,14 @@ private:
>> /// different operations.
>> class LoopVectorizationCostModel {
>> public:
>> - LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
>> - LoopVectorizationLegality *Legal,
>> + LoopVectorizationCostModel(Loop *L, PredicatedScalarEvolution &PSE,
>> + LoopInfo *LI, LoopVectorizationLegality *Legal,
>> const TargetTransformInfo &TTI,
>> const TargetLibraryInfo *TLI, DemandedBits *DB,
>> AssumptionCache *AC, const Function *F,
>> - const LoopVectorizeHints *Hints,
>> - SmallPtrSetImpl<const Value *> &ValuesToIgnore)
>> - : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
>> - TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {}
>> + const LoopVectorizeHints *Hints)
>> + : TheLoop(L), PSE(PSE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB),
>> + AC(AC), TheFunction(F), Hints(Hints) {}
>>
>> /// Information about vectorization costs
>> struct VectorizationFactor {
>> @@ -1573,6 +1572,9 @@ public:
>> /// given vectorization factors.
>> SmallVector<RegisterUsage, 8> calculateRegisterUsage(ArrayRef<unsigned> VFs);
>>
>> + /// Collect values we want to ignore in the cost model.
>> + void collectValuesToIgnore();
>> +
>> private:
>> /// The vectorization cost is a combination of the cost itself and a boolean
>> /// indicating whether any of the contributing operations will actually
>> @@ -1617,8 +1619,8 @@ public:
>>
>> /// The loop that we evaluate.
>> Loop *TheLoop;
>> - /// Scev analysis.
>> - ScalarEvolution *SE;
>> + /// Predicated scalar evolution analysis.
>> + PredicatedScalarEvolution &PSE;
>> /// Loop Info analysis.
>> LoopInfo *LI;
>> /// Vectorization legality.
>> @@ -1627,13 +1629,17 @@ public:
>> const TargetTransformInfo &TTI;
>> /// Target Library Info.
>> const TargetLibraryInfo *TLI;
>> - /// Demanded bits analysis
>> + /// Demanded bits analysis.
>> DemandedBits *DB;
>> + /// Assumption cache.
>> + AssumptionCache *AC;
>> const Function *TheFunction;
>> - // Loop Vectorize Hint.
>> + /// Loop Vectorize Hint.
>> const LoopVectorizeHints *Hints;
>> - // Values to ignore in the cost model.
>> - const SmallPtrSetImpl<const Value *> &ValuesToIgnore;
>> + /// Values to ignore in the cost model.
>> + SmallPtrSet<const Value *, 16> ValuesToIgnore;
>> + /// Values to ignore in the cost model when VF > 1.
>> + SmallPtrSet<const Value *, 16> VecValuesToIgnore;
>> };
>>
>> /// \brief This holds vectorization requirements that must be verified late in
>> @@ -1881,19 +1887,10 @@ struct LoopVectorize : public FunctionPa
>> return false;
>> }
>>
>> - // Collect values we want to ignore in the cost model. This includes
>> - // type-promoting instructions we identified during reduction detection.
>> - SmallPtrSet<const Value *, 32> ValuesToIgnore;
>> - CodeMetrics::collectEphemeralValues(L, AC, ValuesToIgnore);
>> - for (auto &Reduction : *LVL.getReductionVars()) {
>> - RecurrenceDescriptor &RedDes = Reduction.second;
>> - SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
>> - ValuesToIgnore.insert(Casts.begin(), Casts.end());
>> - }
>> -
>> // Use the cost model.
>> - LoopVectorizationCostModel CM(L, PSE.getSE(), LI, &LVL, *TTI, TLI, DB, AC,
>> - F, &Hints, ValuesToIgnore);
>> + LoopVectorizationCostModel CM(L, PSE, LI, &LVL, *TTI, TLI, DB, AC, F,
>> + &Hints);
>> + CM.collectValuesToIgnore();
>>
>> // Check the function attributes to find out if this function should be
>> // optimized for size.
>> @@ -5190,7 +5187,7 @@ LoopVectorizationCostModel::selectVector
>> }
>>
>> // Find the trip count.
>> - unsigned TC = SE->getSmallConstantTripCount(TheLoop);
>> + unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
>> DEBUG(dbgs() << "LV: Found trip count: " << TC << '\n');
>>
>> MinBWs = computeMinimumValueSizes(TheLoop->getBlocks(), *DB, &TTI);
>> @@ -5409,7 +5406,7 @@ unsigned LoopVectorizationCostModel::sel
>> return 1;
>>
>> // Do not interleave loops with a relatively small trip count.
>> - unsigned TC = SE->getSmallConstantTripCount(TheLoop);
>> + unsigned TC = PSE.getSE()->getSmallConstantTripCount(TheLoop);
>> if (TC > 1 && TC < TinyTripCountInterleaveThreshold)
>> return 1;
>>
>> @@ -5639,15 +5636,15 @@ LoopVectorizationCostModel::calculateReg
>> if (!Ends.count(I))
>> continue;
>>
>> - // Skip ignored values.
>> - if (ValuesToIgnore.count(I))
>> - continue;
>> -
>> // Remove all of the instructions that end at this location.
>> InstrList &List = TransposeEnds[i];
>> for (unsigned int j = 0, e = List.size(); j < e; ++j)
>> OpenIntervals.erase(List[j]);
>>
>> + // Skip ignored values.
>> + if (ValuesToIgnore.count(I))
>> + continue;
>> +
>> // For each VF find the maximum usage of registers.
>> for (unsigned j = 0, e = VFs.size(); j < e; ++j) {
>> if (VFs[j] == 1) {
>> @@ -5657,8 +5654,12 @@ LoopVectorizationCostModel::calculateReg
>>
>> // Count the number of live intervals.
>> unsigned RegUsage = 0;
>> - for (auto Inst : OpenIntervals)
>> + for (auto Inst : OpenIntervals) {
>> + // Skip ignored values for VF > 1.
>> + if (VecValuesToIgnore.count(Inst))
>> + continue;
>> RegUsage += GetRegUsage(Inst->getType(), VFs[j]);
>> + }
>> MaxUsages[j] = std::max(MaxUsages[j], RegUsage);
>> }
>>
>> @@ -5830,6 +5831,7 @@ unsigned LoopVectorizationCostModel::get
>> if (VF > 1 && MinBWs.count(I))
>> RetTy = IntegerType::get(RetTy->getContext(), MinBWs[I]);
>> VectorTy = ToVectorTy(RetTy, VF);
>> + auto SE = PSE.getSE();
>>
>> // TODO: We need to estimate the cost of intrinsic calls.
>> switch (I->getOpcode()) {
>> @@ -6158,6 +6160,79 @@ bool LoopVectorizationCostModel::isConse
>> return false;
>> }
>>
>> +void LoopVectorizationCostModel::collectValuesToIgnore() {
>> + // Ignore ephemeral values.
>> + CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
>> +
>> + // Ignore type-promoting instructions we identified during reduction
>> + // detection.
>> + for (auto &Reduction : *Legal->getReductionVars()) {
>> + RecurrenceDescriptor &RedDes = Reduction.second;
>> + SmallPtrSetImpl<Instruction *> &Casts = RedDes.getCastInsts();
>> + VecValuesToIgnore.insert(Casts.begin(), Casts.end());
>> + }
>> +
>> + // Ignore induction phis that are only used in either GetElementPtr or ICmp
>> + // instruction to exit loop. Induction variables usually have large types and
>> + // can have big impact when estimating register usage.
>> + // This is for when VF > 1.
>> + for (auto &Induction : *Legal->getInductionVars()) {
>> + auto *PN = Induction.first;
>> + auto *UpdateV = PN->getIncomingValueForBlock(TheLoop->getLoopLatch());
>> +
>> + // Check that the PHI is only used by the induction increment (UpdateV) or
>> + // by GEPs. Then check that UpdateV is only used by a compare instruction or
>> + // the loop header PHI.
>> + // FIXME: Need precise def-use analysis to determine if this instruction
>> + // variable will be vectorized.
>> + if (std::all_of(PN->user_begin(), PN->user_end(),
>> + [&](const User *U) -> bool {
>> + return U == UpdateV || isa<GetElementPtrInst>(U);
>> + }) &&
>> + std::all_of(UpdateV->user_begin(), UpdateV->user_end(),
>> + [&](const User *U) -> bool {
>> + return U == PN || isa<ICmpInst>(U);
>> + })) {
>> + VecValuesToIgnore.insert(PN);
>> + VecValuesToIgnore.insert(UpdateV);
>> + }
>> + }
>> +
>> + // Ignore instructions that will not be vectorized.
>> + // This is for when VF > 1.
>> + for (auto bb = TheLoop->block_begin(), be = TheLoop->block_end(); bb != be;
>> + ++bb) {
>> + for (auto &Inst : **bb) {
>> + switch (Inst.getOpcode())
>> + case Instruction::GetElementPtr: {
>> + // Ignore GEP if its last operand is an induction variable so that it is
>> + // a consecutive load/store and won't be vectorized as scatter/gather
>> + // pattern.
>> +
>> + GetElementPtrInst *Gep = cast<GetElementPtrInst>(&Inst);
>> + unsigned NumOperands = Gep->getNumOperands();
>> + unsigned InductionOperand = getGEPInductionOperand(Gep);
>> + bool GepToIgnore = true;
>> +
>> + // Check that all of the gep indices are uniform except for the
>> + // induction operand.
>> + for (unsigned i = 0; i != NumOperands; ++i) {
>> + if (i != InductionOperand &&
>> + !PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)),
>> + TheLoop)) {
>> + GepToIgnore = false;
>> + break;
>> + }
>> + }
>> +
>> + if (GepToIgnore)
>> + VecValuesToIgnore.insert(&Inst);
>> + break;
>> + }
>> + }
>> + }
>> +}
>> +
>> void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr,
>> bool IfPredicateStore) {
>> assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
>>
>> Added: llvm/trunk/test/Transforms/LoopVectorize/X86/reg-usage.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/reg-usage.ll?rev=270113&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopVectorize/X86/reg-usage.ll (added)
>> +++ llvm/trunk/test/Transforms/LoopVectorize/X86/reg-usage.ll Thu May 19 15:38:03 2016
>> @@ -0,0 +1,71 @@
>> +; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -S 2>&1 | FileCheck %s
>> +; REQUIRES: asserts
>> +
>> +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
>> +target triple = "x86_64-unknown-linux-gnu"
>> +
>> + at a = global [1024 x i8] zeroinitializer, align 16
>> + at b = global [1024 x i8] zeroinitializer, align 16
>> +
>> +define i32 @foo() {
>> +; This function has a loop of SAD pattern. Here we check when VF = 16 the
>> +; register usage doesn't exceed 16.
>> +;
>> +; CHECK-LABEL: foo
>> +; CHECK: LV(REG): VF = 4
>> +; CHECK-NEXT: LV(REG): Found max usage: 4
>> +; CHECK: LV(REG): VF = 8
>> +; CHECK-NEXT: LV(REG): Found max usage: 7
>> +; CHECK: LV(REG): VF = 16
>> +; CHECK-NEXT: LV(REG): Found max usage: 13
>> +
>> +entry:
>> + br label %for.body
>> +
>> +for.cond.cleanup:
>> + %add.lcssa = phi i32 [ %add, %for.body ]
>> + ret i32 %add.lcssa
>> +
>> +for.body:
>> + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
>> + %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
>> + %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
>> + %0 = load i8, i8* %arrayidx, align 1
>> + %conv = zext i8 %0 to i32
>> + %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
>> + %1 = load i8, i8* %arrayidx2, align 1
>> + %conv3 = zext i8 %1 to i32
>> + %sub = sub nsw i32 %conv, %conv3
>> + %ispos = icmp sgt i32 %sub, -1
>> + %neg = sub nsw i32 0, %sub
>> + %2 = select i1 %ispos, i32 %sub, i32 %neg
>> + %add = add nsw i32 %2, %s.015
>> + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
>> + %exitcond = icmp eq i64 %indvars.iv.next, 1024
>> + br i1 %exitcond, label %for.cond.cleanup, label %for.body
>> +}
>> +
>> +define i64 @bar(i64* nocapture %a) {
>> +; CHECK-LABEL: bar
>> +; CHECK: LV(REG): VF = 2
>> +; CHECK: LV(REG): Found max usage: 4
>> +;
>> +entry:
>> + br label %for.body
>> +
>> +for.cond.cleanup:
>> + %add2.lcssa = phi i64 [ %add2, %for.body ]
>> + ret i64 %add2.lcssa
>> +
>> +for.body:
>> + %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
>> + %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
>> + %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
>> + %0 = load i64, i64* %arrayidx, align 8
>> + %add = add nsw i64 %0, %i.012
>> + store i64 %add, i64* %arrayidx, align 8
>> + %add2 = add nsw i64 %add, %s.011
>> + %inc = add nuw nsw i64 %i.012, 1
>> + %exitcond = icmp eq i64 %inc, 1024
>> + br i1 %exitcond, label %for.cond.cleanup, label %for.body
>> +}
>>
>> Modified: llvm/trunk/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll?rev=270113&r1=270112&r2=270113&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll (original)
>> +++ llvm/trunk/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll Thu May 19 15:38:03 2016
>> @@ -16,7 +16,7 @@ target triple = "x86_64-unknown-linux-gn
>> ; -vectorizer-maximize-bandwidth is indicated.
>> ;
>> ; CHECK-label: foo
>> -; CHECK: LV: Selecting VF: 16.
>> +; CHECK: LV: Selecting VF: 32.
>> define void @foo() {
>> entry:
>> br label %for.body
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list