[llvm] a751240 - [LV] Prevent vectorization with unsupported element types.
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 6 05:33:40 PDT 2021
Author: Kerry McLaughlin
Date: 2021-07-06T13:06:21+01:00
New Revision: a7512401e5a2cc7a1d0805fc4daf0c808e9d502d
URL: https://github.com/llvm/llvm-project/commit/a7512401e5a2cc7a1d0805fc4daf0c808e9d502d
DIFF: https://github.com/llvm/llvm-project/commit/a7512401e5a2cc7a1d0805fc4daf0c808e9d502d.diff
LOG: [LV] Prevent vectorization with unsupported element types.
This patch adds a TTI function, isElementTypeLegalForScalableVector, to query
whether it is possible to vectorize a given element type. This is called by
isLegalToVectorizeInstTypesForScalable to reject scalable vectorization if
any of the instruction types in the loop are unsupported, e.g:
int foo(__int128_t* ptr, int N)
#pragma clang loop vectorize_width(4, scalable)
for (int i=0; i<N; ++i)
ptr[i] = ptr[i] + 42;
This example currently crashes if we attempt to vectorize since i128 is not a
supported type for scalable vectorization.
Reviewed By: sdesmalen, david-arm
Differential Revision: https://reviews.llvm.org/D102253
Added:
llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
Modified:
llvm/include/llvm/Analysis/TargetTransformInfo.h
llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
llvm/lib/Analysis/TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index c222593c9347..a9b103b54576 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1327,6 +1327,9 @@ class TargetTransformInfo {
bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const;
+ /// \returns True if the given type is supported for scalable vectors
+ bool isElementTypeLegalForScalableVector(Type *Ty) const;
+
/// \returns The new vector factor value if the target doesn't support \p
/// SizeInBytes loads or has a better vector factor.
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
@@ -1710,6 +1713,7 @@ class TargetTransformInfo::Concept {
unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc,
ElementCount VF) const = 0;
+ virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0;
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const = 0;
@@ -2261,6 +2265,9 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
ElementCount VF) const override {
return Impl.isLegalToVectorizeReduction(RdxDesc, VF);
}
+ bool isElementTypeLegalForScalableVector(Type *Ty) const override {
+ return Impl.isElementTypeLegalForScalableVector(Ty);
+ }
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const override {
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index 657e8d81aa73..eff4a7cdb85b 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -721,6 +721,8 @@ class TargetTransformInfoImplBase {
return true;
}
+ bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; }
+
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
VectorType *VecTy) const {
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 05894bf69546..3b89a9807863 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1003,6 +1003,10 @@ bool TargetTransformInfo::isLegalToVectorizeReduction(
return TTIImpl->isLegalToVectorizeReduction(RdxDesc, VF);
}
+bool TargetTransformInfo::isElementTypeLegalForScalableVector(Type *Ty) const {
+ return TTIImpl->isElementTypeLegalForScalableVector(Ty);
+}
+
unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
unsigned LoadSize,
unsigned ChainSizeInBytes,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index cd1a8e6e4359..6005852f1710 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1736,7 +1736,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
return true;
Type *Ty = RdxDesc.getRecurrenceType();
- if (Ty->isBFloatTy() || !isLegalElementTypeForSVE(Ty))
+ if (Ty->isBFloatTy() || !isElementTypeLegalForScalableVector(Ty))
return false;
switch (RdxDesc.getRecurrenceKind()) {
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 646d1db73e22..83956fbbe05b 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -208,7 +208,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info);
- bool isLegalElementTypeForSVE(Type *Ty) const {
+ bool isElementTypeLegalForScalableVector(Type *Ty) const {
if (Ty->isPointerTy())
return true;
@@ -218,7 +218,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())
return true;
- if (Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
+ if (Ty->isIntegerTy(1) || Ty->isIntegerTy(8) || Ty->isIntegerTy(16) ||
Ty->isIntegerTy(32) || Ty->isIntegerTy(64))
return true;
@@ -233,7 +233,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
if (isa<FixedVectorType>(DataType) && !ST->useSVEForFixedLengthVectors())
return false; // Fall back to scalarization of masked operations.
- return isLegalElementTypeForSVE(DataType->getScalarType());
+ return !DataType->getScalarType()->isIntegerTy(1) &&
+ isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedLoad(Type *DataType, Align Alignment) {
@@ -254,7 +255,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
DataTypeFVTy->getNumElements() < 2))
return false;
- return isLegalElementTypeForSVE(DataType->getScalarType());
+ return !DataType->getScalarType()->isIntegerTy(1) &&
+ isElementTypeLegalForScalableVector(DataType->getScalarType());
}
bool isLegalMaskedGather(Type *DataType, Align Alignment) const {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index f3e5b020e8b6..038822f7063e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1519,7 +1519,7 @@ class LoopVectorizationCostModel {
/// Returns true if the target machine supports all of the reduction
/// variables found for the given VF.
- bool canVectorizeReductions(ElementCount VF) {
+ bool canVectorizeReductions(ElementCount VF) const {
return (all_of(Legal->getReductionVars(), [&](auto &Reduction) -> bool {
const RecurrenceDescriptor &RdxDesc = Reduction.second;
return TTI.isLegalToVectorizeReduction(RdxDesc, VF);
@@ -5677,12 +5677,13 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
auto MaxScalableVF = ElementCount::getScalable(
std::numeric_limits<ElementCount::ScalarTy>::max());
- // Disable scalable vectorization if the loop contains unsupported reductions.
// Test that the loop-vectorizer can legalize all operations for this MaxVF.
// FIXME: While for scalable vectors this is currently sufficient, this should
// be replaced by a more detailed mechanism that filters out specific VFs,
// instead of invalidating vectorization for a whole set of VFs based on the
// MaxVF.
+
+ // Disable scalable vectorization if the loop contains unsupported reductions.
if (!canVectorizeReductions(MaxScalableVF)) {
reportVectorizationInfo(
"Scalable vectorization not supported for the reduction "
@@ -5691,6 +5692,18 @@ LoopVectorizationCostModel::getMaxLegalScalableVF(unsigned MaxSafeElements) {
return ElementCount::getScalable(0);
}
+ // Disable scalable vectorization if the loop contains any instructions
+ // with element types not supported for scalable vectors.
+ if (any_of(ElementTypesInLoop, [&](Type *Ty) {
+ return !Ty->isVoidTy() &&
+ !this->TTI.isElementTypeLegalForScalableVector(Ty);
+ })) {
+ reportVectorizationInfo("Scalable vectorization is not supported "
+ "for all element types found in this loop.",
+ "ScalableVFUnfeasible", ORE, TheLoop);
+ return ElementCount::getScalable(0);
+ }
+
if (Legal->isSafeForAnyVectorWidth())
return MaxScalableVF;
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
index e45c535a49ab..21b8afddb1a9 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/scalable-reductions.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
+; RUN: opt < %s -loop-vectorize -pass-remarks=loop-vectorize -pass-remarks-analysis=loop-vectorize -pass-remarks-missed=loop-vectorize -mtriple aarch64-unknown-linux-gnu -mattr=+sve,+bf16 -S -scalable-vectorization=on 2>%t | FileCheck %s -check-prefix=CHECK
; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARK
; Reduction can be vectorized
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
new file mode 100644
index 000000000000..b13cd9cff9db
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-illegal-type.ll
@@ -0,0 +1,106 @@
+; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mattr=+sve -force-vector-width=4 -pass-remarks-analysis=loop-vectorize -S 2>%t | FileCheck %s
+; RUN: cat %t | FileCheck %s -check-prefix=CHECK-REMARKS
+target triple = "aarch64-linux-gnu"
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_sve_i128(i128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_sve_i128
+; CHECK: vector.body
+; CHECK: %[[LOAD1:.*]] = load i128, i128* {{.*}}
+; CHECK-NEXT: %[[LOAD2:.*]] = load i128, i128* {{.*}}
+; CHECK-NEXT: %[[ADD1:.*]] = add nsw i128 %[[LOAD1]], 42
+; CHECK-NEXT: %[[ADD2:.*]] = add nsw i128 %[[LOAD2]], 42
+; CHECK-NEXT: store i128 %[[ADD1]], i128* {{.*}}
+; CHECK-NEXT: store i128 %[[ADD2]], i128* {{.*}}
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+ %0 = load i128, i128* %arrayidx, align 16
+ %add = add nsw i128 %0, 42
+ store i128 %add, i128* %arrayidx, align 16
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+ ret void
+}
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_sve_f128(fp128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_sve_f128
+; CHECK: vector.body
+; CHECK: %[[LOAD1:.*]] = load fp128, fp128*
+; CHECK-NEXT: %[[LOAD2:.*]] = load fp128, fp128*
+; CHECK-NEXT: %[[FSUB1:.*]] = fsub fp128 %[[LOAD1]], 0xL00000000000000008000000000000000
+; CHECK-NEXT: %[[FSUB2:.*]] = fsub fp128 %[[LOAD2]], 0xL00000000000000008000000000000000
+; CHECK-NEXT: store fp128 %[[FSUB1]], fp128* {{.*}}
+; CHECK-NEXT: store fp128 %[[FSUB2]], fp128* {{.*}}
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds fp128, fp128* %ptr, i64 %iv
+ %0 = load fp128, fp128* %arrayidx, align 16
+ %add = fsub fp128 %0, 0xL00000000000000008000000000000000
+ store fp128 %add, fp128* %arrayidx, align 16
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+ ret void
+}
+
+; CHECK-REMARKS: Scalable vectorization is not supported for all element types found in this loop
+define dso_local void @loop_invariant_sve_i128(i128* nocapture %ptr, i128 %val, i64 %N) {
+; CHECK-LABEL: @loop_invariant_sve_i128
+; CHECK: vector.body
+; CHECK: %[[GEP1:.*]] = getelementptr inbounds i128, i128* %ptr
+; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i128, i128* %ptr
+; CHECK-NEXT: store i128 %val, i128* %[[GEP1]]
+; CHECK-NEXT: store i128 %val, i128* %[[GEP2]]
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+ store i128 %val, i128* %arrayidx, align 16
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
+
+for.end:
+ ret void
+}
+
+define dso_local void @loop_fixed_width_i128(i128* nocapture %ptr, i64 %N) {
+; CHECK-LABEL: @loop_fixed_width_i128
+; CHECK: load <4 x i128>, <4 x i128>*
+; CHECK: add nsw <4 x i128> {{.*}}, <i128 42, i128 42, i128 42, i128 42>
+; CHECK: store <4 x i128> {{.*}} <4 x i128>*
+; CHECK-NOT: vscale
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i128, i128* %ptr, i64 %iv
+ %0 = load i128, i128* %arrayidx, align 16
+ %add = add nsw i128 %0, 42
+ store i128 %add, i128* %arrayidx, align 16
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, %N
+ br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true}
More information about the llvm-commits
mailing list