[llvm] 43a5c75 - Revert "[LoopVectorize] Add support for replication of more intrinsics with scalable vectors"
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 6 01:48:22 PDT 2021
Author: David Sherwood
Date: 2021-08-06T09:48:16+01:00
New Revision: 43a5c750d1830e50b4c1b256aea82126caba6b2e
URL: https://github.com/llvm/llvm-project/commit/43a5c750d1830e50b4c1b256aea82126caba6b2e
DIFF: https://github.com/llvm/llvm-project/commit/43a5c750d1830e50b4c1b256aea82126caba6b2e.diff
LOG: Revert "[LoopVectorize] Add support for replication of more intrinsics with scalable vectors"
This reverts commit 95800da914938129083df2fa0165c1901909c273.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/assume.ll
Removed:
llvm/test/Transforms/LoopVectorize/scalable-assume.ll
llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 375079efbb967..e208e2c77e3cd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5410,21 +5410,6 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
// lane 0 demanded or b) are uses which demand only lane 0 of their operand.
for (auto *BB : TheLoop->blocks())
for (auto &I : *BB) {
- if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(&I)) {
- switch (II->getIntrinsicID()) {
- case Intrinsic::sideeffect:
- case Intrinsic::experimental_noalias_scope_decl:
- case Intrinsic::assume:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- if (TheLoop->hasLoopInvariantOperands(&I))
- addToWorklistIfAllowed(&I);
- LLVM_FALLTHROUGH;
- default:
- break;
- }
- }
-
// ExtractValue instructions must be uniform, because the operands are
// known to be loop-invariant.
if (auto *EVI = dyn_cast<ExtractValueInst>(&I)) {
@@ -8950,37 +8935,6 @@ VPBasicBlock *VPRecipeBuilder::handleReplication(
bool IsPredicated = LoopVectorizationPlanner::getDecisionAndClampRange(
[&](ElementCount VF) { return CM.isPredicatedInst(I); }, Range);
- // Even if the instruction is not marked as uniform, there are certain
- // intrinsic calls that can be effectively treated as such, so we check for
- // them here. Conservatively, we only do this for scalable vectors, since
- // for fixed-width VFs we can always fall back on full scalarization.
- if (!IsUniform && Range.Start.isScalable() && isa<IntrinsicInst>(I)) {
- switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
- case Intrinsic::assume:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- // For scalable vectors if one of the operands is variant then we still
- // want to mark as uniform, which will generate one instruction for just
- // the first lane of the vector. We can't scalarize the call in the same
- // way as for fixed-width vectors because we don't know how many lanes
- // there are.
- //
- // The reasons for doing it this way for scalable vectors are:
- // 1. For the assume intrinsic generating the instruction for the first
- // lane is still be better than not generating any at all. For
- // example, the input may be a splat across all lanes.
- // 2. For the lifetime start/end intrinsics the pointer operand only
- // does anything useful when the input comes from a stack object,
- // which suggests it should always be uniform. For non-stack objects
- // the effect is to poison the object, which still allows us to
- // remove the call.
- IsUniform = true;
- break;
- default:
- break;
- }
- }
-
auto *Recipe = new VPReplicateRecipe(I, Plan->mapToVPValues(I->operands()),
IsUniform, IsPredicated);
setRecipe(I, Recipe);
diff --git a/llvm/test/Transforms/LoopVectorize/assume.ll b/llvm/test/Transforms/LoopVectorize/assume.ll
index b1cb79efa2245..10cb67fd4e6a0 100644
--- a/llvm/test/Transforms/LoopVectorize/assume.ll
+++ b/llvm/test/Transforms/LoopVectorize/assume.ll
@@ -49,8 +49,12 @@ define void @test2(%struct.data* nocapture readonly %d) {
; CHECK: vector.body:
; CHECK: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
; CHECK: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
+; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
; CHECK: for.body:
entry:
%b = getelementptr inbounds %struct.data, %struct.data* %d, i64 0, i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll b/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
deleted file mode 100644
index 808cb70b59999..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/scalable-assume.ll
+++ /dev/null
@@ -1,111 +0,0 @@
-; RUN: opt < %s -scalable-vectorization=on -force-target-supports-scalable-vectors=true -loop-vectorize -force-vector-width=2 -force-vector-interleave=2 -S | FileCheck %s
-
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
-; CHECK-LABEL: @test1(
-; CHECK: vector.body:
-; CHECK: [[FCMP1:%.*]] = fcmp ogt <vscale x 2 x float>
-; CHECK-NEXT: [[FCMP2:%.*]] = fcmp ogt <vscale x 2 x float>
-; CHECK-NEXT: [[FCMP1L0:%.*]] = extractelement <vscale x 2 x i1> [[FCMP1]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP1L0]])
-; CHECK-NEXT: [[FCMP2L0:%.*]] = extractelement <vscale x 2 x i1> [[FCMP2]], i32 0
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[FCMP2L0]])
-entry:
- br label %for.body
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
- %0 = load float, float* %arrayidx, align 4
- %cmp1 = fcmp ogt float %0, 1.000000e+02
- tail call void @llvm.assume(i1 %cmp1)
- %add = fadd float %0, 1.000000e+00
- %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
- store float %add, float* %arrayidx5, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv, 1599
- br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
-
-for.end: ; preds = %for.body
- ret void
-}
-
-declare void @llvm.assume(i1) #0
-
-attributes #0 = { nounwind willreturn }
-
-%struct.data = type { float*, float* }
-
-define void @test2(float *%a, float *%b) {
-; CHECK-LABEL: @test2(
-; CHECK: entry:
-; CHECK: [[MASKCOND:%.*]] = icmp eq i64 %ptrint1, 0
-; CHECK: [[MASKCOND4:%.*]] = icmp eq i64 %ptrint2, 0
-; CHECK: vector.body:
-; CHECK: tail call void @llvm.assume(i1 [[MASKCOND]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND]])
-; CHECK: tail call void @llvm.assume(i1 [[MASKCOND4]])
-; CHECK-NEXT: tail call void @llvm.assume(i1 [[MASKCOND4]])
-entry:
- %ptrint1 = ptrtoint float* %a to i64
- %maskcond = icmp eq i64 %ptrint1, 0
- %ptrint2 = ptrtoint float* %b to i64
- %maskcond4 = icmp eq i64 %ptrint2, 0
- br label %for.body
-
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- tail call void @llvm.assume(i1 %maskcond)
- %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
- %0 = load float, float* %arrayidx, align 4
- %add = fadd float %0, 1.000000e+00
- tail call void @llvm.assume(i1 %maskcond4)
- %arrayidx5 = getelementptr inbounds float, float* %b, i64 %indvars.iv
- store float %add, float* %arrayidx5, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv, 1599
- br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
-
-for.end: ; preds = %for.body
- ret void
-}
-
-; Test case for PR43620. Make sure we can vectorize with predication in presence
-; of assume calls. For now, check that we drop all assumes in predicated blocks
-; in the vector body.
-define void @predicated_assume(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %n) {
-; Check that the vector.body does not contain any assumes.
-; CHECK-LABEL: @predicated_assume(
-; CHECK: vector.body:
-; CHECK-NOT: llvm.assume
-; CHECK: for.body:
-entry:
- br label %for.body
-
-for.body: ; preds = %for.body.preheader, %if.end5
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end5 ]
- %cmp1 = icmp ult i64 %indvars.iv, 495616
- br i1 %cmp1, label %if.end5, label %if.else
-
-if.else: ; preds = %for.body
- %cmp2 = icmp ult i64 %indvars.iv, 991232
- tail call void @llvm.assume(i1 %cmp2)
- br label %if.end5
-
-if.end5: ; preds = %for.body, %if.else
- %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
- %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
- %0 = load float, float* %arrayidx, align 4
- %mul = fmul float %x.0, %0
- %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
- store float %mul, float* %arrayidx7, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %cmp = icmp eq i64 %indvars.iv.next, %n
- br i1 %cmp, label %for.cond.cleanup, label %for.body, !llvm.loop !0
-
-for.cond.cleanup: ; preds = %if.end5, %entry
- ret void
-}
-
-!0 = distinct !{!0, !1}
-!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll b/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
deleted file mode 100644
index eeb5efe5c8de9..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/scalable-lifetime.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: opt -S -scalable-vectorization=on -force-target-supports-scalable-vectors=true -loop-vectorize -force-vector-width=2 -force-vector-interleave=1 < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-; Make sure we can vectorize loops which contain lifetime markers.
-
-define void @test(i32 *%d) {
-; CHECK-LABEL: @test(
-; CHECK: entry:
-; CHECK: [[ALLOCA:%.*]] = alloca [1024 x i32], align 16
-; CHECK-NEXT: [[BC:%.*]] = bitcast [1024 x i32]* [[ALLOCA]] to i8*
-; CHECK: vector.body:
-; CHECK: call void @llvm.lifetime.end.p0i8(i64 4096, i8* [[BC]])
-; CHECK: store <vscale x 2 x i32>
-; CHECK: call void @llvm.lifetime.start.p0i8(i64 4096, i8* [[BC]])
-
-entry:
- %arr = alloca [1024 x i32], align 16
- %0 = bitcast [1024 x i32]* %arr to i8*
- call void @llvm.lifetime.start.p0i8(i64 4096, i8* %0) #1
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- call void @llvm.lifetime.end.p0i8(i64 4096, i8* %0) #1
- %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
- %1 = load i32, i32* %arrayidx, align 8
- store i32 100, i32* %arrayidx, align 8
- call void @llvm.lifetime.start.p0i8(i64 4096, i8* %0) #1
- %indvars.iv.next = add i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp ne i32 %lftr.wideiv, 128
- br i1 %exitcond, label %for.body, label %for.end, !llvm.loop !0
-
-for.end:
- call void @llvm.lifetime.end.p0i8(i64 4096, i8* %0) #1
- ret void
-}
-
-; CHECK-LABEL: @testloopvariant(
-; CHECK: entry:
-; CHECK: [[ALLOCA:%.*]] = alloca [1024 x i32], align 16
-; CHECK: vector.ph:
-; CHECK: [[TMP1:%.*]] = insertelement <vscale x 2 x [1024 x i32]*> poison, [1024 x i32]* %arr, i32 0
-; CHECK-NEXT: [[SPLAT_ALLOCA:%.*]] = shufflevector <vscale x 2 x [1024 x i32]*> [[TMP1]], <vscale x 2 x [1024 x i32]*> poison, <vscale x 2 x i32> zeroinitializer
-; CHECK: vector.body:
-; CHECK: [[BC_ALLOCA:%.*]] = bitcast <vscale x 2 x [1024 x i32]*> [[SPLAT_ALLOCA]] to <vscale x 2 x i8*>
-; CHECK-NEXT: [[ONE_LIFETIME:%.*]] = extractelement <vscale x 2 x i8*> [[BC_ALLOCA]], i32 0
-; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 4096, i8* [[ONE_LIFETIME]])
-; CHECK: store <vscale x 2 x i32>
-; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 4096, i8* [[ONE_LIFETIME]])
-
-define void @testloopvariant(i32 *%d) {
-entry:
- %arr = alloca [1024 x i32], align 16
- br label %for.body
-
-for.body:
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %0 = getelementptr [1024 x i32], [1024 x i32]* %arr, i32 0, i64 %indvars.iv
- %1 = bitcast [1024 x i32]* %arr to i8*
- call void @llvm.lifetime.end.p0i8(i64 4096, i8* %1) #1
- %arrayidx = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
- %2 = load i32, i32* %arrayidx, align 8
- store i32 100, i32* %arrayidx, align 8
- call void @llvm.lifetime.start.p0i8(i64 4096, i8* %1) #1
- %indvars.iv.next = add i64 %indvars.iv, 1
- %lftr.wideiv = trunc i64 %indvars.iv.next to i32
- %exitcond = icmp ne i32 %lftr.wideiv, 128
- br i1 %exitcond, label %for.body, label %for.end, !llvm.loop !0
-
-for.end:
- ret void
-}
-
-declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
-
-declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
-
-!0 = distinct !{!0, !1}
-!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll b/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
deleted file mode 100644
index 813dfbaa40b51..0000000000000
--- a/llvm/test/Transforms/LoopVectorize/scalable-noalias-scope-decl.ll
+++ /dev/null
@@ -1,127 +0,0 @@
-; RUN: opt < %s -scalable-vectorization=on -force-target-supports-scalable-vectors=true -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S | FileCheck %s
-
-define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) {
-entry:
- br label %for.body
-
-; CHECK-LABEL: @test1
-; CHECK: vector.body:
-; CHECK: @llvm.experimental.noalias.scope.decl
-; CHECK-NOT: @llvm.experimental.noalias.scope.decl
-; CHECK: for.body:
-; CHECK: @llvm.experimental.noalias.scope.decl
-; CHECK-NOT: @llvm.experimental.noalias.scope.decl
-; CHECK: ret void
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
- %0 = load float, float* %arrayidx, align 4
- %cmp1 = fcmp ogt float %0, 1.000000e+02
- tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
- %add = fadd float %0, 1.000000e+00
- %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
- store float %add, float* %arrayidx5, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv, 1599
- br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
-
-for.end: ; preds = %for.body
- ret void
-}
-
-declare void @llvm.experimental.noalias.scope.decl(metadata)
-
-%struct.data = type { float*, float* }
-
-define void @test2(float* %a, float* %b) {
-; CHECK-LABEL: @test2
-; CHECK: vector.body:
-; CHECK: @llvm.experimental.noalias.scope.decl(metadata [[SCOPE0_LIST:!.*]])
-; CHECK: @llvm.experimental.noalias.scope.decl(metadata [[SCOPE4_LIST:!.*]])
-; CHECK-NOT: @llvm.experimental.noalias.scope.decl
-; CHECK: for.body:
-; CHECK: @llvm.experimental.noalias.scope.decl(metadata [[SCOPE0_LIST]])
-; CHECK: @llvm.experimental.noalias.scope.decl(metadata [[SCOPE4_LIST]])
-; CHECK-NOT: @llvm.experimental.noalias.scope.decl
-; CHECK: ret void
-entry:
- %ptrint = ptrtoint float* %b to i64
- %maskcond = icmp eq i64 %ptrint, 0
- %ptrint2 = ptrtoint float* %a to i64
- %maskcond4 = icmp eq i64 %ptrint2, 0
- br label %for.body
-
-for.body: ; preds = %for.body, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
- tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
- %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
- %0 = load float, float* %arrayidx, align 4
- %add = fadd float %0, 1.000000e+00
- tail call void @llvm.experimental.noalias.scope.decl(metadata !4)
- %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
- store float %add, float* %arrayidx5, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %exitcond = icmp eq i64 %indvars.iv, 1599
- br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !5
-
-for.end: ; preds = %for.body
- ret void
-}
-
-define void @predicated_noalias_scope_decl(float* noalias nocapture readonly %a, float* noalias nocapture %b, i64 %n) {
-
-; Check that the vector.body still contains a llvm.experimental.noalias.scope.decl
-
-; CHECK-LABEL: @predicated_noalias_scope_decl(
-; CHECK: vector.body:
-; CHECK: call void @llvm.experimental.noalias.scope.decl
-; CHECK-NOT: @llvm.experimental.noalias.scope.decl
-; CHECK: scalar.ph:
-; CHECK-NOT: @llvm.experimental.noalias.scope.decl
-; CHECK: if.else:
-; CHECK: call void @llvm.experimental.noalias.scope.decl
-; CHECK-NOT: @llvm.experimental.noalias.scope.decl
-; CHECK: }
-
-entry:
- br label %for.body
-
-for.body: ; preds = %entry, %if.end5
- %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %if.end5 ]
- %cmp1 = icmp ult i64 %indvars.iv, 495616
- br i1 %cmp1, label %if.end5, label %if.else
-
-if.else: ; preds = %for.body
- %cmp2 = icmp ult i64 %indvars.iv, 991232
- tail call void @llvm.experimental.noalias.scope.decl(metadata !0)
- br label %if.end5
-
-if.end5: ; preds = %for.body, %if.else
- %x.0 = phi float [ 4.200000e+01, %if.else ], [ 2.300000e+01, %for.body ]
- %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
- %0 = load float, float* %arrayidx, align 4
- %mul = fmul float %x.0, %0
- %arrayidx7 = getelementptr inbounds float, float* %b, i64 %indvars.iv
- store float %mul, float* %arrayidx7, align 4
- %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
- %cmp = icmp eq i64 %indvars.iv.next, %n
- br i1 %cmp, label %for.cond.cleanup, label %for.body, !llvm.loop !5
-
-for.cond.cleanup: ; preds = %if.end5
- ret void
-}
-
-!0 = !{ !1 }
-!1 = distinct !{ !1, !2 }
-!2 = distinct !{ !2 }
-!3 = distinct !{ !3, !2 }
-!4 = !{ !3 }
-!5 = distinct !{!5, !6}
-!6 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
-
-; CHECK: [[SCOPE0_LIST]] = !{[[SCOPE0:!.*]]}
-; CHECK: [[SCOPE0]] = distinct !{[[SCOPE0]], [[SCOPE0_DOM:!.*]]}
-; CHECK: [[SCOPE0_DOM]] = distinct !{[[SCOPE0_DOM]]}
-; CHECK: [[SCOPE4_LIST]] = !{[[SCOPE4:!.*]]}
-; CHECK: [[SCOPE4]] = distinct !{[[SCOPE4]], [[SCOPE0_DOM]]}
More information about the llvm-commits
mailing list