[llvm] ef198cd - [SVE] Remove usage of getMaxVScale for AArch64, in favour of IR Attribute
Dylan Fleming via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 17 06:43:27 PDT 2021
Author: Dylan Fleming
Date: 2021-08-17T14:42:47+01:00
New Revision: ef198cd99e6bac3a2e87adb6c8a18fb461056fa6
URL: https://github.com/llvm/llvm-project/commit/ef198cd99e6bac3a2e87adb6c8a18fb461056fa6
DIFF: https://github.com/llvm/llvm-project/commit/ef198cd99e6bac3a2e87adb6c8a18fb461056fa6.diff
LOG: [SVE] Remove usage of getMaxVScale for AArch64, in favour of IR Attribute
Removed AArch64 usage of the getMaxVScale interface, replacing it with
the vscale_range(min, max) IR Attribute.
Reviewed By: paulwalker-arm
Differential Revision: https://reviews.llvm.org/D106277
Added:
Modified:
clang/include/clang/Basic/TargetInfo.h
clang/lib/Basic/Targets/AArch64.cpp
clang/lib/Basic/Targets/AArch64.h
clang/lib/CodeGen/CodeGenFunction.cpp
clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h
index c7a57a7dba9a8..21289b0dfd04c 100644
--- a/clang/include/clang/Basic/TargetInfo.h
+++ b/clang/include/clang/Basic/TargetInfo.h
@@ -871,6 +871,11 @@ class TargetInfo : public virtual TransferrableTargetInfo,
/// across the current set of primary and secondary targets.
virtual ArrayRef<Builtin::Info> getTargetBuiltins() const = 0;
+ /// Returns target-specific min and max values VScale_Range.
+ virtual Optional<std::pair<unsigned, unsigned>>
+ getVScaleRange(const LangOptions &LangOpts) const {
+ return None;
+ }
/// The __builtin_clz* and __builtin_ctz* built-in
/// functions are specified to have undefined results for zero inputs, but
/// on targets that support these operations in a way that provides
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index e163ebfa2348b..2b5bf34a7b23f 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -424,6 +424,17 @@ ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
Builtin::FirstTSBuiltin);
}
+Optional<std::pair<unsigned, unsigned>>
+AArch64TargetInfo::getVScaleRange(const LangOptions &LangOpts) const {
+ if (LangOpts.ArmSveVectorBits) {
+ unsigned VScale = LangOpts.ArmSveVectorBits / 128;
+ return std::pair<unsigned, unsigned>(VScale, VScale);
+ }
+ if (hasFeature("sve"))
+ return std::pair<unsigned, unsigned>(0, 16);
+ return None;
+}
+
bool AArch64TargetInfo::hasFeature(StringRef Feature) const {
return Feature == "aarch64" || Feature == "arm64" || Feature == "arm" ||
(Feature == "neon" && (FPU & NeonMode)) ||
diff --git a/clang/lib/Basic/Targets/AArch64.h b/clang/lib/Basic/Targets/AArch64.h
index 46882a808336b..12830348fb453 100644
--- a/clang/lib/Basic/Targets/AArch64.h
+++ b/clang/lib/Basic/Targets/AArch64.h
@@ -96,6 +96,9 @@ class LLVM_LIBRARY_VISIBILITY AArch64TargetInfo : public TargetInfo {
ArrayRef<Builtin::Info> getTargetBuiltins() const override;
+ Optional<std::pair<unsigned, unsigned>>
+ getVScaleRange(const LangOptions &LangOpts) const override;
+
bool hasFeature(StringRef Feature) const override;
bool handleTargetFeatures(std::vector<std::string> &Features,
DiagnosticsEngine &Diags) override;
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index f5eed8572daa3..dca42045325df 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -484,11 +484,13 @@ void CodeGenFunction::FinishFunction(SourceLocation EndLoc) {
// function.
CurFn->addFnAttr("min-legal-vector-width", llvm::utostr(LargestVectorWidth));
- // Add vscale attribute if appropriate.
- if (getLangOpts().ArmSveVectorBits) {
- unsigned VScale = getLangOpts().ArmSveVectorBits / 128;
- CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(getLLVMContext(),
- VScale, VScale));
+ // Add vscale_range attribute if appropriate.
+ Optional<std::pair<unsigned, unsigned>> VScaleRange =
+ getContext().getTargetInfo().getVScaleRange(getLangOpts());
+ if (VScaleRange) {
+ CurFn->addFnAttr(llvm::Attribute::getWithVScaleRangeArgs(
+ getLLVMContext(), VScaleRange.getValue().first,
+ VScaleRange.getValue().second));
}
// If we generated an unreachable return block, delete it now.
diff --git a/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
index 84541f9cb12db..eb5c4f31044cf 100644
--- a/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
+++ b/clang/test/CodeGen/arm-sve-vector-bits-vscale-range.c
@@ -3,10 +3,13 @@
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=512 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=512
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=1024 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=1024
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=2048 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=2048
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=128 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=128
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=256 -S -emit-llvm -o - %s | FileCheck %s -D#VBITS=256
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -msve-vector-bits=scalable -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -S -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-NONE
// CHECK-LABEL: @func() #0
// CHECK: attributes #0 = { {{.*}} vscale_range([[#div(VBITS,128)]],[[#div(VBITS,128)]]) {{.*}} }
-// CHECK-NONE-NOT: vscale_range
+// CHECK-NONE: attributes #0 = { {{.*}} vscale_range(0,16) {{.*}} }
void func() {}
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 882f1c01664fc..1c20dddfbf4b9 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1589,7 +1589,7 @@ InstructionCost AArch64TTIImpl::getGatherScatterOpCost(
ElementCount LegalVF = LT.second.getVectorElementCount();
InstructionCost MemOpCost =
getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind, I);
- return LT.first * MemOpCost * getMaxNumElements(LegalVF);
+ return LT.first * MemOpCost * getMaxNumElements(LegalVF, I->getFunction());
}
bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 5369eb7b7e828..5c095048ba0a3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -125,22 +125,25 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
return ST->getMinVectorRegisterBitWidth();
}
- Optional<unsigned> getMaxVScale() const {
- if (ST->hasSVE())
- return AArch64::SVEMaxBitsPerVector / AArch64::SVEBitsPerBlock;
- return BaseT::getMaxVScale();
- }
/// Try to return an estimate cost factor that can be used as a multiplier
/// when scalarizing an operation for a vector with ElementCount \p VF.
/// For scalable vectors this currently takes the most pessimistic view based
/// upon the maximum possible value for vscale.
- unsigned getMaxNumElements(ElementCount VF) const {
+ unsigned getMaxNumElements(ElementCount VF,
+ const Function *F = nullptr) const {
if (!VF.isScalable())
return VF.getFixedValue();
- Optional<unsigned> MaxNumVScale = getMaxVScale();
- assert(MaxNumVScale && "Expected valid max vscale value");
- return *MaxNumVScale * VF.getKnownMinValue();
+
+ unsigned MaxNumVScale = 16;
+ if (F && F->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned VScaleMax =
+ F->getFnAttribute(Attribute::VScaleRange).getVScaleRangeArgs().second;
+ if (VScaleMax > 0)
+ MaxNumVScale = VScaleMax;
+ }
+
+ return MaxNumVScale * VF.getKnownMinValue();
}
unsigned getMaxInterleaveFactor(unsigned VF);
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b842d15fe1874..00416efb03253 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -5636,6 +5636,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
// Limit MaxScalableVF by the maximum safe dependence distance.
Optional<unsigned> MaxVScale = TTI.getMaxVScale();
+ if (!MaxVScale && TheFunction->hasFnAttribute(Attribute::VScaleRange)) {
+ unsigned VScaleMax = TheFunction->getFnAttribute(Attribute::VScaleRange)
+ .getVScaleRangeArgs()
+ .second;
+ if (VScaleMax > 0)
+ MaxVScale = VScaleMax;
+ }
MaxScalableVF = ElementCount::getScalable(
MaxVScale ? (MaxSafeElements / MaxVScale.getValue()) : 0);
if (!MaxScalableVF)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
index 302c191d4fc1d..866e038f14544 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-gather.ll
@@ -2,7 +2,7 @@
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
-define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
+define void @masked_gathers(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
; CHECK-LABEL: 'masked_gathers'
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %res.nxv4i32 = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %res.nxv8i32 = call <vscale x 8 x i32> @llvm.masked.gather.nxv8i32.nxv8p0i32
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
index 503e6ce5e104f..a5fa33277b79e 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-scatter.ll
@@ -2,7 +2,7 @@
; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
-define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) {
+define void @masked_scatters(<vscale x 4 x i1> %nxv4i1mask, <vscale x 8 x i1> %nxv8i1mask, <4 x i1> %v4i1mask, <1 x i1> %v1i1mask, <vscale x 1 x i1> %nxv1i1mask) vscale_range(0, 16) {
; CHECK-LABEL: 'masked_scatters'
; CHECK-NEXT: Cost Model: Found an estimated cost of 64 for instruction: call void @llvm.masked.scatter.nxv4i32.nxv4p0i32
; CHECK-NEXT: Cost Model: Found an estimated cost of 128 for instruction: call void @llvm.masked.scatter.nxv8i32.nxv8p0i32
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
index 1669f4aa476ea..f9065a6126574 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -18,7 +18,7 @@
; return a;
; }
;
-define i32 @PR33613(double* %b, double %j, i32 %d) {
+define i32 @PR33613(double* %b, double %j, i32 %d) #0 {
; CHECK-VF4UF2-LABEL: @PR33613
; CHECK-VF4UF2: vector.body
; CHECK-VF4UF2: %[[VEC_RECUR:.*]] = phi <vscale x 4 x double> [ {{.*}}, %vector.ph ], [ {{.*}}, %vector.body ]
@@ -66,7 +66,7 @@ for.body:
; }
;
; Check that the sext sank after the load in the vector loop.
-define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
+define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) #0 {
; CHECK-VF4UF1-LABEL: @PR34711
; CHECK-VF4UF1: vector.body
; CHECK-VF4UF1: %[[VEC_RECUR:.*]] = phi <vscale x 4 x i16> [ %vector.recur.init, %vector.ph ], [ %[[MGATHER:.*]], %vector.body ]
@@ -100,5 +100,6 @@ for.end:
ret void
}
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
index 007670324fadc..cba948ed1dae0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-strict-fadd.ll
@@ -4,7 +4,7 @@
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -force-ordered-reductions=true -hints-allow-reordering=true -S 2>%t | FileCheck %s --check-prefix=CHECK-UNORDERED
; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple aarch64-unknown-linux-gnu -mattr=+sve -hints-allow-reordering=false -S 2>%t | FileCheck %s --check-prefix=CHECK-NOT-VECTORIZED
-define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict
; CHECK-ORDERED: vector.body:
; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -49,7 +49,7 @@ for.end:
ret float %add
}
-define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) {
+define float @fadd_strict_unroll(float* noalias nocapture readonly %a, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict_unroll
; CHECK-ORDERED: vector.body:
; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX4:.*]], %vector.body ]
@@ -113,7 +113,7 @@ for.end:
ret float %add
}
-define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @fadd_strict_interleave(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_strict_interleave
; CHECK-ORDERED: entry
; CHECK-ORDERED: %[[ARRAYIDX:.*]] = getelementptr inbounds float, float* %a, i64 1
@@ -206,7 +206,7 @@ for.end:
ret void
}
-define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @fadd_of_sum(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_of_sum
; CHECK-ORDERED: vector.body
; CHECK-ORDERED: %[[VEC_PHI1:.*]] = phi float [ 0.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -268,7 +268,7 @@ for.end: ; preds = %for.body, %entry
ret float %res
}
-define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define float @fadd_conditional(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_conditional
; CHECK-ORDERED: vector.body
; CHECK-ORDERED: %[[VEC_PHI:.*]] = phi float [ 1.000000e+00, %vector.ph ], [ %[[RDX:.*]], %vector.body ]
@@ -343,7 +343,7 @@ for.end:
}
; Negative test - loop contains multiple fadds which we cannot safely reorder
-define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) {
+define float @fadd_multiple(float* noalias nocapture %a, float* noalias nocapture %b, i64 %n) #0 {
; CHECK-ORDERED-LABEL: @fadd_multiple
; CHECK-ORDERED-NOT: vector.body
@@ -390,6 +390,7 @@ for.end: ; preds = %for.body
ret float %rdx
}
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !3, !6, !8}
!1 = distinct !{!1, !3, !7, !8}
!2 = distinct !{!2, !4, !6, !8}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
index 1d9d9d8545408..8d53ae5a0b5d9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll
@@ -7,7 +7,7 @@
; Test that the MaxVF for the following loop, that has no dependence distances,
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
; (maximized bandwidth for i8 in the loop).
-define void @test0(i32* %a, i8* %b, i32* %c) {
+define void @test0(i32* %a, i8* %b, i32* %c) #0 {
; CHECK: LV: Checking a loop in "test0"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -40,7 +40,7 @@ exit:
; Test that the MaxVF for the following loop, with a dependence distance
; of 64 elements, is calculated as (maxvscale = 16) * 4.
-define void @test1(i32* %a, i8* %b) {
+define void @test1(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test1"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -74,7 +74,7 @@ exit:
; Test that the MaxVF for the following loop, with a dependence distance
; of 32 elements, is calculated as (maxvscale = 16) * 2.
-define void @test2(i32* %a, i8* %b) {
+define void @test2(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test2"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -108,7 +108,7 @@ exit:
; Test that the MaxVF for the following loop, with a dependence distance
; of 16 elements, is calculated as (maxvscale = 16) * 1.
-define void @test3(i32* %a, i8* %b) {
+define void @test3(i32* %a, i8* %b) #0 {
; CHECK: LV: Checking a loop in "test3"
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -142,7 +142,7 @@ exit:
; Test the fallback mechanism when scalable vectors are not feasible due
; to e.g. dependence distance.
-define void @test4(i32* %a, i32* %b) {
+define void @test4(i32* %a, i32* %b) #0 {
; CHECK: LV: Checking a loop in "test4"
; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
@@ -172,3 +172,5 @@ loop:
exit:
ret void
}
+
+attributes #0 = { vscale_range(0, 16) }
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
index a04b3c759e9b0..246dcd2370880 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-vf-hint.ll
@@ -45,7 +45,7 @@ target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test1
; CHECK: <4 x i32>
-define void @test1(i32* %a, i32* %b) {
+define void @test1(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -90,7 +90,7 @@ exit:
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test2
; CHECK: <4 x i32>
-define void @test2(i32* %a, i32* %b) {
+define void @test2(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -138,7 +138,7 @@ exit:
; CHECK-DBG: LV: Using user VF vscale x 2.
; CHECK-LABEL: @test3
; CHECK: <vscale x 2 x i32>
-define void @test3(i32* %a, i32* %b) {
+define void @test3(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -190,7 +190,7 @@ exit:
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test4
; CHECK: <4 x i32>
-define void @test4(i32* %a, i32* %b) {
+define void @test4(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -238,7 +238,7 @@ exit:
; CHECK-DBG: LV: Using user VF vscale x 4
; CHECK-LABEL: @test5
; CHECK: <vscale x 4 x i32>
-define void @test5(i32* %a, i32* %b) {
+define void @test5(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -289,7 +289,7 @@ exit:
; CHECK-DBG: Selecting VF: vscale x 4.
; CHECK-LABEL: @test6
; CHECK: <vscale x 4 x i32>
-define void @test6(i32* %a, i32* %b) {
+define void @test6(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -322,7 +322,7 @@ exit:
; CHECK-NO-SVE-LABEL: @test_no_sve
; CHECK-NO-SVE: <4 x i32>
; CHECK-NO-SVE-NOT: <vscale x 4 x i32>
-define void @test_no_sve(i32* %a, i32* %b) {
+define void @test_no_sve(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -356,7 +356,7 @@ exit:
; CHECK-DBG: LV: Selecting VF: 4.
; CHECK-LABEL: @test_no_max_vscale
; CHECK: <4 x i32>
-define void @test_no_max_vscale(i32* %a, i32* %b) {
+define void @test_no_max_vscale(i32* %a, i32* %b) #0 {
entry:
br label %loop
@@ -378,6 +378,7 @@ exit:
ret void
}
+attributes #0 = { vscale_range(0, 16) }
!21 = !{!21, !22, !23}
!22 = !{!"llvm.loop.vectorize.width", i32 4}
!23 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
index 3054f3a6ac971..bc083a2bc870c 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-cond-inv-loads.ll
@@ -1,6 +1,6 @@
; RUN: opt -loop-vectorize -scalable-vectorization=on -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -o - | FileCheck %s
-define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) {
+define void @cond_inv_load_i32i32i16(i32* noalias nocapture %a, i32* noalias nocapture readonly %cond, i16* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @cond_inv_load_i32i32i16
; CHECK: vector.ph:
; CHECK: %[[INVINS:.*]] = insertelement <vscale x 4 x i16*> poison, i16* %inv, i32 0
@@ -39,7 +39,7 @@ exit: ; preds = %for.inc
ret void
}
-define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) {
+define void @cond_inv_load_f64f64f64(double* noalias nocapture %a, double* noalias nocapture readonly %cond, double* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @cond_inv_load_f64f64f64
; CHECK: vector.ph:
; CHECK: %[[INVINS:.*]] = insertelement <vscale x 4 x double*> poison, double* %inv, i32 0
@@ -76,7 +76,7 @@ exit: ; preds = %for.inc
ret void
}
-define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) {
+define void @invariant_load_cond(i32* noalias nocapture %a, i32* nocapture readonly %b, i32* nocapture readonly %cond, i64 %n) #0 {
; CHECK-LABEL: @invariant_load_cond
; CHECK: vector.body
; CHECK: %[[GEP:.*]] = getelementptr inbounds i32, i32* %b, i64 42
@@ -117,6 +117,7 @@ for.end:
ret void
}
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
index 4fbad7ab4e247..a2760c79a838e 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll
@@ -1,6 +1,6 @@
; RUN: opt -loop-vectorize -dce -instcombine -mtriple aarch64-linux-gnu -mattr=+sve -S %s -scalable-vectorization=preferred -force-target-instruction-cost=1 -o - | FileCheck %s
-define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) {
+define void @gather_nxv4i32_ind64(float* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, float* noalias nocapture %c, i64 %n) #0 {
; CHECK-LABEL: @gather_nxv4i32_ind64
; CHECK: vector.body:
; CHECK: %[[IND:.*]] = load <vscale x 4 x i64>, <vscale x 4 x i64>*
@@ -29,7 +29,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo
; NOTE: I deliberately chose '%b' as an array of i32 indices, since the
; additional 'sext' in the for.body loop exposes additional code paths
; during vectorisation.
-define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) {
+define void @scatter_nxv4i32_ind32(float* noalias nocapture %a, i32* noalias nocapture readonly %b, float* noalias nocapture readonly %c, i64 %n) #0 {
; CHECK-LABEL: @scatter_nxv4i32_ind32
; CHECK: vector.body:
; CHECK: %[[VALS:.*]] = load <vscale x 4 x float>
@@ -57,7 +57,7 @@ for.cond.cleanup: ; preds = %for.body, %entry
ret void
}
-define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) {
+define void @scatter_inv_nxv4i32(i32* noalias nocapture %inv, i32* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-LABEL: @scatter_inv_nxv4i32
; CHECK: vector.ph:
; CHECK: %[[INS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %inv, i32 0
@@ -89,7 +89,7 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo
ret void
}
-define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) {
+define void @gather_inv_nxv4i32(i32* noalias nocapture %a, i32* noalias nocapture readonly %inv, i64 %n) #0 {
; CHECK-LABEL: @gather_inv_nxv4i32
; CHECK: vector.ph:
; CHECK: %[[INS:.*]] = insertelement <vscale x 4 x i32*> poison, i32* %inv, i32 0
@@ -124,7 +124,7 @@ for.cond.cleanup: ; preds = %for.inc, %entry
-define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) {
+define void @gather_nxv4i32_ind64_stride2(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i64 %n) #0 {
; CHECK-LABEL: @gather_nxv4i32_ind64_stride2
; CHECK: vector.body:
; CHECK: %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -153,6 +153,8 @@ for.cond.cleanup: ; preds = %for.cond.cleanup.lo
ret void
}
+attributes #0 = { vscale_range(0, 16) }
+
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
index 0e02af631d205..b534171274047 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-inv-store.ll
@@ -59,7 +59,7 @@ for.end: ; preds = %for.inc, %entry
ret void
}
-attributes #0 = { "target-features"="+neon,+sve" }
+attributes #0 = { "target-features"="+neon,+sve" vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
index 8327e09063b68..23eb2d0b0aba0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-large-strides.ll
@@ -1,6 +1,6 @@
; RUN: opt -mtriple aarch64-linux-gnu -mattr=+sve -loop-vectorize -scalable-vectorization=on -dce -instcombine -S <%s | FileCheck %s
-define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) {
+define void @stride7_i32(i32* noalias nocapture %dst, i64 %n) #0 {
; CHECK-LABEL: @stride7_i32(
; CHECK: vector.body
; CHECK: %[[VEC_IND:.*]] = phi <vscale x 4 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -27,7 +27,7 @@ for.end: ; preds = %for.end.loopexit, %
ret void
}
-define void @stride7_f64(double* noalias nocapture %dst, i64 %n) {
+define void @stride7_f64(double* noalias nocapture %dst, i64 %n) #0 {
; CHECK-LABEL: @stride7_f64(
; CHECK: vector.body
; CHECK: %[[VEC_IND:.*]] = phi <vscale x 2 x i64> [ %{{.*}}, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -55,7 +55,7 @@ for.end: ; preds = %for.end.loopexit, %
}
-define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) {
+define void @cond_stride7_f64(double* noalias nocapture %dst, i64* noalias nocapture readonly %cond, i64 %n) #0 {
; CHECK-LABEL: @cond_stride7_f64(
; CHECK: vector.body
; CHECK: %[[MASK:.*]] = icmp ne <vscale x 2 x i64>
@@ -90,7 +90,7 @@ for.end: ; preds = %for.end.loopexit, %
ret void
}
-
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
index 1aef842b297fb..0221c890a6e1b 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-strict-fadd-cost.ll
@@ -49,7 +49,7 @@ for.end:
ret double %add
}
-attributes #0 = { "target-features"="+sve" }
+attributes #0 = { "target-features"="+sve" vscale_range(0, 16) }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
index afa2bd093c273..1881801ec2579 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll
@@ -12,7 +12,7 @@
; that we can use gather instructions with the correct offsets, taking
; vscale into account.
-define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) {
+define void @widen_ptr_phi_unrolled(i32* noalias nocapture %a, i32* noalias nocapture %b, i32* nocapture readonly %c, i64 %n) #0 {
; CHECK-LABEL: @widen_ptr_phi_unrolled(
; CHECK: vector.body:
; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi i32* [ %c, %vector.ph ], [ %[[PTR_IND:.*]], %vector.body ]
@@ -122,7 +122,7 @@ for.cond.cleanup: ; preds = %for.body
; because it is stored to memory.
;
-define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) {
+define i32 @pointer_iv_mixed(i32* noalias %a, i32** noalias %b, i64 %n) #0 {
; CHECK-LABEL: @pointer_iv_mixed(
; CHECK: vector.body
; CHECK: %[[IDX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ]
@@ -170,7 +170,7 @@ for.end:
ret i32 %tmp5
}
-
+attributes #0 = { vscale_range(0, 16) }
!0 = distinct !{!0, !1, !2, !3, !4, !5}
!1 = !{!"llvm.loop.mustprogress"}
!2 = !{!"llvm.loop.vectorize.width", i32 4}
More information about the llvm-commits
mailing list