[PATCH] D105824: [LV] Avoid scalable vectorization for loops containing alloca

Kerry McLaughlin via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 14 10:46:26 PDT 2021


kmclaughlin updated this revision to Diff 358664.
kmclaughlin marked 4 inline comments as done.
kmclaughlin added a comment.

Changes to scalable-alloca.ll:

- Added a loop hint to force vectorization with a scalable VF
- Rebased the patch and added CHECK lines for Invalid costs found in the loop (from D105806 <https://reviews.llvm.org/D105806>)
- Passed `i32** %vla` to the @alloca function


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105824/new/

https://reviews.llvm.org/D105824

Files:
  llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
  llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll


Index: llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
===================================================================
--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/AArch64/scalable-alloca.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -loop-vectorize -mattr=+sve -mtriple aarch64-unknown-linux-gnu -scalable-vectorization=preferred -pass-remarks-analysis=loop-vectorize < %s 2>%t | FileCheck %s
+; RUN: FileCheck %s --check-prefix=CHECK-REMARKS < %t
+
+; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2): alloca
+; CHECK-REMARKS: Instruction with invalid costs prevented vectorization at VF=(vscale x 1): store
+define void @alloca(i32** %vla, i64 %N) {
+; CHECK-LABEL: @alloca(
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = alloca i32, align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = alloca i32, align 16
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32*, i32** [[VLA:%.*]], i64 [[INDUCTION]]
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32*, i32** [[VLA]], i64 [[INDUCTION1]]
+; CHECK-NEXT:    store i32* [[TMP0]], i32** [[TMP2]], align 8
+; CHECK-NEXT:    store i32* [[TMP1]], i32** [[TMP3]], align 8
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ {{.*}}, %middle.block ], [ 0, %entry ]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %for.body ], [ [[BC_RESUME_VAL]], %scalar.ph ]
+; CHECK-NEXT:    [[ALLOCA:%.*]] = alloca i32, align 16
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32*, i32** [[VLA]], i64 [[IV]]
+; CHECK-NEXT:    store i32* [[ALLOCA]], i32** [[ARRAYIDX]], align 8
+; CHECK:       for.end:
+; CHECK-NEXT:    call void @foo(i32** nonnull [[VLA]])
+; CHECK-NEXT:    ret void
+;
+
+; CHECK-NOT: <vscale x
+
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ %iv.next, %for.body ], [ 0, %entry ]
+  %alloca = alloca i32, align 16
+  %arrayidx = getelementptr inbounds i32*, i32** %vla, i64 %iv
+  store i32* %alloca, i32** %arrayidx, align 8
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, %N
+  br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+  call void @foo(i32** nonnull %vla)
+  ret void
+}
+
+declare void @foo(i32**)
+
+!0 = !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
Index: llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -7892,6 +7892,13 @@
   }
   case Instruction::ExtractValue:
     return TTI.getInstructionCost(I, TTI::TCK_RecipThroughput);
+  case Instruction::Alloca:
+    // We cannot easily widen alloca to a scalable alloca, as
+    // the result would need to be a vector of pointers.
+    // Return an Invalid cost if the VF is Scalable.
+    if (VF.isScalable())
+      return InstructionCost::getInvalid();
+    LLVM_FALLTHROUGH;
   default:
     // This opcode is unknown. Assume that it is the same as 'mul'.
     return TTI.getArithmeticInstrCost(Instruction::Mul, VectorTy, CostKind);


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D105824.358664.patch
Type: text/x-patch
Size: 3402 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210714/8baa0fc4/attachment.bin>


More information about the llvm-commits mailing list