[llvm] a3c964a - [LV] Fix recording of BranchTakenCount for FoldTail

Ayal Zaks via llvm-commits llvm-commits at lists.llvm.org
Sun Apr 26 10:16:51 PDT 2020


Author: Ayal Zaks
Date: 2020-04-26T20:13:10+03:00
New Revision: a3c964a278b434a587f447dac144bb7d92227dab

URL: https://github.com/llvm/llvm-project/commit/a3c964a278b434a587f447dac144bb7d92227dab
DIFF: https://github.com/llvm/llvm-project/commit/a3c964a278b434a587f447dac144bb7d92227dab.diff

LOG: [LV] Fix recording of BranchTakenCount for FoldTail

When folding tail, branch taken count is computed during initial VPlan execution
and recorded to be used by the compare computing the loop's mask. This recording
should directly set the State, instead of reusing Value2VPValue mapping which
serves original Values present prior to vectorization.
The branch taken count may be a constant Value, which may be used elsewhere in
the loop; trying to employ Value2VPValue for both leads to the issue reported in
https://reviews.llvm.org/D76992#inline-721028

Differential Revision: https://reviews.llvm.org/D78847

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VPlan.cpp
    llvm/lib/Transforms/Vectorize/VPlan.h
    llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
    llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index b094a6f33fa4..1a38090bede8 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -442,7 +442,9 @@ void VPlan::execute(VPTransformState *State) {
     IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
     auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
                                    "trip.count.minus.1");
-    Value2VPValue[TCMO] = BackedgeTakenCount;
+    Value *VTCMO = Builder.CreateVectorSplat(State->VF, TCMO, "broadcast");
+    for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part)
+      State->set(BackedgeTakenCount, VTCMO, Part);
   }
 
   // 0. Set the reverse mapping from VPValues to Values for code generation.

diff  --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 25afb09129ff..1a112d35d959 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1544,10 +1544,9 @@ class VPlan {
     if (Entry)
       VPBlockBase::deleteCFG(Entry);
     for (auto &MapEntry : Value2VPValue)
-      if (MapEntry.second != BackedgeTakenCount)
-        delete MapEntry.second;
+      delete MapEntry.second;
     if (BackedgeTakenCount)
-      delete BackedgeTakenCount; // Delete once, if in Value2VPValue or not.
+      delete BackedgeTakenCount;
     for (VPValue *Def : VPExternalDefs)
       delete Def;
     for (VPValue *CBV : VPCBVs)

diff  --git a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
index 27250dd59a53..ca468b785fda 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll
@@ -202,10 +202,10 @@ for.end:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
 ; ENABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
-; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
+; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
+; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -300,10 +300,10 @@ for.end:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[N_RND_UP:%.*]] = add i32 [[N]], 7
 ; ENABLED_MASKED_STRIDED-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
 ; ENABLED_MASKED_STRIDED-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[N]], -1
-; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
-; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> undef, i32 [[TRIP_COUNT_MINUS_1]], i32 0
 ; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> undef, <8 x i32> zeroinitializer
+; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i32> undef, i32 [[CONV]], i32 0
+; ENABLED_MASKED_STRIDED-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT]], <8 x i32> undef, <8 x i32> zeroinitializer
 ; ENABLED_MASKED_STRIDED-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; ENABLED_MASKED_STRIDED:       vector.body:
 ; ENABLED_MASKED_STRIDED-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

diff  --git a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
index b51cb56045bf..a090b15bda09 100644
--- a/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
+++ b/llvm/test/Transforms/LoopVectorize/tail-folding-counting-down.ll
@@ -39,3 +39,36 @@ while.end.loopexit:
 while.end:
   ret void
 }
+
+; Make sure a loop is successfully vectorized with fold-tail when the backedge
+; taken count is constant and used inside the loop. Issue revealed by D76992.
+;
+define void @reuse_const_btc(i8* %A) optsize {
+; CHECK-LABEL: @reuse_const_btc
+; CHECK: {{%.*}} = icmp ule <4 x i32> {{%.*}}, <i32 13, i32 13, i32 13, i32 13>
+; CHECK: {{%.*}} = select <4 x i1> {{%.*}}, <4 x i32> <i32 12, i32 12, i32 12, i32 12>, <4 x i32> <i32 13, i32 13, i32 13, i32 13>
+;
+entry:
+  br label %loop
+
+loop:
+  %riv = phi i32 [ 13, %entry ], [ %rivMinus1, %merge ]
+  %sub = sub nuw nsw i32 20, %riv
+  %arrayidx = getelementptr inbounds i8, i8* %A, i32 %sub
+  %cond0 = icmp eq i32 %riv, 7
+  br i1 %cond0, label %then, label %else
+then:
+  br label %merge
+else:
+  br label %merge
+merge:
+  %blend = phi i32 [ 13, %then ], [ 12, %else ]
+  %trunc = trunc i32 %blend to i8
+  store i8 %trunc, i8* %arrayidx, align 1
+  %rivMinus1 = add nuw nsw i32 %riv, -1
+  %cond = icmp eq i32 %riv, 0
+  br i1 %cond, label %exit, label %loop
+
+exit:
+  ret void
+}


        


More information about the llvm-commits mailing list