[PATCH] D97278: [VPlan] Support to widen call intructions in VPlan native path

Tue Feb 23 06:03:01 PST 2021

Kazhuu created this revision.
Herald added subscribers: tschuett, psnobl, rogfer01, bollu, hiraditya.
Kazhuu requested review of this revision.
Herald added subscribers: llvm-commits, vkmr.
Herald added a project: LLVM.

Add support to widen call instructions in VPlan native path by using a correct recipe when such instructions are encountered. This is already used by inner loop vectorizer.

Previously call instructions got handled by wrong recipes and resulted in unreachable instruction errors like this one: https://bugs.llvm.org/show_bug.cgi?id=48139.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D97278

Files:
  llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
  llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll


Index: llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll
===================================================================

--- /dev/null
+++ llvm/test/Transforms/LoopVectorize/vplan-widen-call-instruction.ll
@@ -0,0 +1,73 @@
+; RUN: opt -loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s
+
+; Test that VPlan native path is able to widen call intructions like
+; llvm.sqrt.* intrincis calls.
+
+declare double @llvm.sqrt.f64(double %0)
+define void @widen_call_instruction(double* noalias nocapture readonly %a.in, double* noalias nocapture readonly %b.in, double* noalias nocapture %c.out) {
+; CHECK-LABEL: @widen_call_instruction(
+
+; CHECK: vector.body:
+; CHECK-NEXT: %[[FOR1_INDEX:.*]] = phi i64 [ 0, %[[LABEL_PR:.*]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH:.*]] ]
+; CHECK: %[[VEC_INDEX:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[LABEL_PR]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH]] ]
+; CHECK-NEXT: %[[A_PTR:.*]] = getelementptr inbounds double, double* %a.in, <4 x i64> %[[VEC_INDEX]]
+; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %[[A_PTR]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
+; CHECK-NEXT: %[[B_PTR:.*]] = getelementptr inbounds double, double* %b.in, <4 x i64> %[[VEC_INDEX]]
+; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %[[B_PTR]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef)
+; CHECK-NEXT: %[[B_SQRT:.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %[[MASKED_GATHER2]])
+; CHECK-NEXT: br label %[[FOR2_HEADER:.*]]
+
+; CHECK: [[FOR2_HEADER]]:
+; CHECK-NEXT: %[[FOR2_INDEX:.*]] = phi <4 x i32> [ zeroinitializer, %vector.body ], [ %[[FOR2_INDEX_NEXT:.*]], %[[FOR2_HEADER]] ]
+; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[MASKED_GATHER1]], %vector.body ], [ %[[REDUCTION_NEXT:.*]], %[[FOR2_HEADER]] ]
+; CHECK-NEXT: %[[REDUCTION_NEXT]] = fadd <4 x double> %[[B_SQRT]], %[[REDUCTION]]
+; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], <i32 1, i32 1, i32 1, i32 1>
+; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], <i32 10000, i32 10000, i32 10000, i32 10000>
+; CHECK-NEXT: %[[EXIT_COND:.*]] = extractelement <4 x i1> %[[VEC_PTR]], i32 0
+; CHECK-NEXT: br i1 %[[EXIT_COND]], label %[[FOR1_LATCH:.*]], label %{{.*}}
+
+; CHECK: [[FOR1_LATCH]]:
+; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ]
+; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, double* %c.out, <4 x i64> %[[VEC_INDEX]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %[[REDUCTION]], <4 x double*> %[[C_PTR]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: %[[VEC_INDEX_NEXT:.*]] = add nuw nsw <4 x i64> %[[VEC_INDEX]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i64> %[[VEC_INDEX_NEXT]], <i64 1000, i64 1000, i64 1000, i64 1000>
+; CHECK-NEXT: %{{.*}} = extractelement <4 x i1> %[[VEC_PTR]], i32 0
+; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add i64 %[[FOR1_INDEX]], 4
+; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], <i64 4, i64 4, i64 4, i64 4>
+; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body
+
+entry:
+  br label %for1.header
+
+for1.header:
+  %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ]
+  %a.ptr = getelementptr inbounds double, double* %a.in, i64 %indvar1
+  %a = load double, double* %a.ptr, align 8
+  %b.ptr = getelementptr inbounds double, double* %b.in, i64 %indvar1
+  %b = load double, double* %b.ptr, align 8
+  %b.sqrt = call double @llvm.sqrt.f64(double %b)
+  br label %for2.header
+
+for2.header:
+  %indvar2 = phi i32 [ 0, %for1.header ], [ %indvar21, %for2.header ]
+  %a.reduction = phi double [ %a, %for1.header ], [ %a.reduction1, %for2.header ]
+  %a.reduction1 = fadd double %b.sqrt, %a.reduction
+  %indvar21 = add nuw nsw i32 %indvar2, 1
+  %for2.cond = icmp eq i32 %indvar21, 10000
+  br i1 %for2.cond, label %for1.latch, label %for2.header
+
+for1.latch:
+  %c.ptr = getelementptr inbounds double, double* %c.out, i64 %indvar1
+  store double %a.reduction1, double* %c.ptr, align 8
+  %indvar11 = add nuw nsw i64 %indvar1, 1
+  %for1.cond = icmp eq i64 %indvar11, 1000
+  br i1 %for1.cond, label %exit, label %for1.header, !llvm.loop !0
+
+exit:
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
Index: llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
===================================================================
--- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -71,6 +71,9 @@
         } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
           NewRecipe = new VPWidenGEPRecipe(
               GEP, Plan->mapToVPValues(GEP->operands()), OrigLoop);
+        } else if (CallInst *CI = dyn_cast<CallInst>(Inst)) {
+          NewRecipe = new VPWidenCallRecipe(
+              *CI, Plan->mapToVPValues(CI->arg_operands()));
         } else {
           NewRecipe =
               new VPWidenRecipe(*Inst, Plan->mapToVPValues(Inst->operands()));


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D97278.325760.patch
Type: text/x-patch
Size: 5347 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210223/f70d6cb1/attachment.bin>