[PATCH] D111294: [LoopVectorize] Classify pointer induction updates as scalar only if they have one use

Kerry McLaughlin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 8 07:01:06 PDT 2021


kmclaughlin updated this revision to Diff 378200.
kmclaughlin marked 3 inline comments as done.
kmclaughlin retitled this revision from "[LoopVectorize] Fix crash in VPReplicateRecipe::execute() for scalable vectors" to "[LoopVectorize] Classify pointer induction updates as scalar only if they have one use".
kmclaughlin edited the summary of this revision.
kmclaughlin added a comment.

- Return early from collectLoopScalars if Update has one use. Removed line to insert Update into PossibleNonScalarPtrs if this is not the case to instead let the code below the `if (isScalarPtrInduction...` block handle other cases.
- Return void from `@pointer_induction` test


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D111294/new/

https://reviews.llvm.org/D111294

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll


Index: llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
===================================================================
--- llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
+++ llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll
@@ -116,6 +116,55 @@
   ret void
 }
 
+define void @pointer_induction([64 x i8]* noalias %buffer, i64 %N) {
+; CHECK-LABEL: @pointer_induction(
+; CHECK:       entry:
+; CHECK:         [[BUFFER_START:%.*]] = getelementptr inbounds [64 x i8], [64 x i8]* [[BUFFER:%.*]], i64 0, i64 0
+; CHECK:       vector.ph:
+; CHECK:         [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i8*> poison, i8* [[BUFFER_START]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i8*> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i8*> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64()
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[INDEX1]], i32 0
+; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = add <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 0, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer), [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT]], [[TMP6]]
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[BUFFER_START]], <vscale x 2 x i64> [[TMP7]]
+; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX1]], 0
+; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr i8, i8* [[BUFFER_START]], i64 [[TMP8]]
+; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX1]], 1
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[BUFFER_START]], i64 [[TMP9]]
+; CHECK-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX1]], 0
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i8, i8* [[NEXT_GEP3]], i32 0
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i8* [[TMP11]] to <vscale x 2 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 2 x i8>, <vscale x 2 x i8>* [[TMP12]], align 1
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i8, <vscale x 2 x i8*> [[NEXT_GEP]], i64 1
+; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq <vscale x 2 x i8*> [[TMP13]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT:    [[TMP16:%.*]] = mul i64 [[TMP15]], 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+;
+entry:
+  %buffer.start = getelementptr inbounds [64 x i8], [64 x i8]* %buffer, i64 0, i64 0
+  %add.ptr.i = getelementptr inbounds [64 x i8], [64 x i8]* %buffer, i64 64, i64 64
+  br label %for.body
+
+for.body:
+  %first.sroa = phi i8* [ %incdec.ptr, %for.body ], [ %buffer.start, %entry ]
+  %index = phi i64 [ %index_nxt, %for.body ], [ 0, %entry ]
+  %index_nxt = add i64 %index, 1
+  %0 = load i8, i8* %first.sroa, align 1
+  %incdec.ptr = getelementptr inbounds i8, i8* %first.sroa, i64 1
+  %cmp.i.not = icmp eq i8* %incdec.ptr, %buffer.start
+  %cmp = icmp ult i64 %index, %N
+  br i1 %cmp, label %for.body, label %end, !llvm.loop !0
+
+end:
+  ret void
+}
 
 attributes #0 = {"target-features"="+sve"}
 
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5128,8 +5128,14 @@
 
       Instruction *Update = cast<Instruction>(
           cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
-      ScalarPtrs.insert(Update);
-      return;
+
+      // If there is more than one user of Update (Ptr), we shouldn't assume it
+      // will be scalar after vectorisation as other users of the instruction
+      // may require widening. Otherwise, add it to ScalarPtrs.
+      if (Update->hasOneUse() && cast<Value>(*Update->user_begin()) == Ptr) {
+        ScalarPtrs.insert(Update);
+        return;
+      }
     }
     // We only care about bitcast and getelementptr instructions contained in
     // the loop.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D111294.378200.patch
Type: text/x-patch
Size: 4522 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20211008/85d2040d/attachment-0001.bin>


More information about the llvm-commits mailing list