[llvm] 1439ef1 - [LoopVectorize] Classify pointer induction updates as scalar only if they have one use

Tue Oct 12 05:33:08 PDT 2021

Author: Kerry McLaughlin
Date: 2021-10-12T13:24:49+01:00
New Revision: 1439ef1a3f05c708bf5a57c4623cf895d7268413

URL: https://github.com/llvm/llvm-project/commit/1439ef1a3f05c708bf5a57c4623cf895d7268413
DIFF: https://github.com/llvm/llvm-project/commit/1439ef1a3f05c708bf5a57c4623cf895d7268413.diff

LOG: [LoopVectorize] Classify pointer induction updates as scalar only if they have one use

collectLoopScalars collects pointer induction updates in ScalarPtrs, assuming
that the instruction will be scalar after vectorization. This may crash later
in VPReplicateRecipe::execute() if there there is another user of the instruction
other than the Phi node which needs to be widened.

This changes collectLoopScalars so that if there are any other users of
Update other than a Phi node, it is not added to ScalarPtrs.

Reviewed By: david-arm, fhahn

Differential Revision: https://reviews.llvm.org/D111294

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index d6b4dce3834fb..acc85ceb693eb 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5130,8 +5130,14 @@ void LoopVectorizationCostModel::collectLoopScalars(ElementCount VF) {
 
       Instruction *Update = cast<Instruction>(
           cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
-      ScalarPtrs.insert(Update);
-      return;
+
+      // If there is more than one user of Update (Ptr), we shouldn't assume it
+      // will be scalar after vectorisation as other users of the instruction
+      // may require widening. Otherwise, add it to ScalarPtrs.
+      if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) {
+        ScalarPtrs.insert(Update);
+        return;
+      }
     }
     // We only care about bitcast and getelementptr instructions contained in
     // the loop.

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
index 62271b89f6fb1..c2a7f3edea0c9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -116,6 +116,51 @@ exit:                            ; preds = %loop.body
   ret void
 }
 
+define void @pointer_induction(i8* noalias %start, i64 %N) {
+; CHECK-LABEL: @pointer_induction(
+; CHECK:       vector.ph:
+; CHECK:         [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8*> poison, i8* [[START:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8*> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX1]], i32 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]]
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[START]], <vscale x 2 x i64> [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX1]], 0
+; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX1]], 1
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[START]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX1]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <vscale x 2 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP12]], align 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[NEXT_GEP]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq <vscale x 2 x i8*> [[TMP13]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %ptr.phi = phi i8* [ %ptr.phi.next, %for.body ], [ %start, %entry ]
+  %index = phi i64 [ %index_nxt, %for.body ], [ 0, %entry ]
+  %index_nxt = add i64 %index, 1
+  %0 = load i8, i8* %ptr.phi, align 1
+  %ptr.phi.next = getelementptr inbounds i8, i8* %ptr.phi, i64 1
+  %cmp.i.not = icmp eq i8* %ptr.phi.next, %start
+  %cmp = icmp ult i64 %index, %N
+  br i1 %cmp, label %for.body, label %end, !llvm.loop !0
+
+end:
+  ret void
+}
 
 attributes #0 = {"target-features"="+sve"}