[llvm] 37f4ccb - [AArch64]Add memory op cost model for SVE
Caroline Concatto via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 11 04:55:08 PST 2020
Author: Caroline Concatto
Date: 2020-11-11T12:49:19Z
New Revision: 37f4ccb27545ca28a52a1a1c21cbccee03044d04
URL: https://github.com/llvm/llvm-project/commit/37f4ccb27545ca28a52a1a1c21cbccee03044d04
DIFF: https://github.com/llvm/llvm-project/commit/37f4ccb27545ca28a52a1a1c21cbccee03044d04.diff
LOG: [AArch64]Add memory op cost model for SVE
This patch adds/fixes memory op cost model for SVE with fixed-width
vector.
Differential Revision: https://reviews.llvm.org/D90950
Added:
llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/lib/Target/AArch64/AArch64Subtarget.h
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 5d30b5fd0e90..15c67b438b32 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -269,7 +269,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::nxv8bf16, &AArch64::ZPRRegClass);
}
- if (useSVEForFixedLengthVectors()) {
+ if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addRegisterClass(VT, &AArch64::ZPRRegClass);
@@ -1085,7 +1085,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
- if (useSVEForFixedLengthVectors()) {
+ if (Subtarget->useSVEForFixedLengthVectors()) {
for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
if (useSVEForFixedLengthVectorVT(VT))
addTypeForFixedLengthSVE(VT);
@@ -4140,14 +4140,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
}
}
-bool AArch64TargetLowering::useSVEForFixedLengthVectors() const {
- // Prefer NEON unless larger SVE registers are available.
- return Subtarget->hasSVE() && Subtarget->getMinSVEVectorSizeInBits() >= 256;
+bool AArch64TargetLowering::mergeStoresAfterLegalization(EVT VT) const {
+ return !Subtarget->useSVEForFixedLengthVectors();
}
bool AArch64TargetLowering::useSVEForFixedLengthVectorVT(
EVT VT, bool OverrideNEON) const {
- if (!useSVEForFixedLengthVectors())
+ if (!Subtarget->useSVEForFixedLengthVectors())
return false;
if (!VT.isFixedLengthVector())
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index bfc83a9a34b9..47248b948203 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -773,9 +773,7 @@ class AArch64TargetLowering : public TargetLowering {
/// illegal as the original, thus leading to an infinite legalisation loop.
/// NOTE: Once BUILD_VECTOR is legal or can be custom lowered for all legal
/// vector types this override can be removed.
- bool mergeStoresAfterLegalization(EVT VT) const override {
- return !useSVEForFixedLengthVectors();
- }
+ bool mergeStoresAfterLegalization(EVT VT) const override;
private:
/// Keep a pointer to the AArch64Subtarget around so that we can
@@ -1008,7 +1006,6 @@ class AArch64TargetLowering : public TargetLowering {
bool shouldLocalize(const MachineInstr &MI,
const TargetTransformInfo *TTI) const override;
- bool useSVEForFixedLengthVectors() const;
// Normally SVE is only used for byte size vectors that do not fit within a
// NEON vector. This changes when OverrideNEON is true, allowing SVE to be
// used for 64bit and 128bit vectors as well.
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index fdf979beed9b..b4d71acadf67 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -368,3 +368,8 @@ unsigned AArch64Subtarget::getMinSVEVectorSizeInBits() const {
return (SVEVectorBitsMin / 128) * 128;
return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
}
+
+bool AArch64Subtarget::useSVEForFixedLengthVectors() const {
+ // Prefer NEON unless larger SVE registers are available.
+ return hasSVE() && getMinSVEVectorSizeInBits() >= 256;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 67c682ca433a..4eb484364d58 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -555,6 +555,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
// implied by the architecture.
unsigned getMaxSVEVectorSizeInBits() const;
unsigned getMinSVEVectorSizeInBits() const;
+ bool useSVEForFixedLengthVectors() const;
};
} // End llvm namespace
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 595f403c66a7..4f7ebff2b44f 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -751,6 +751,10 @@ AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
return Options;
}
+bool AArch64TTIImpl::useNeonVector(const Type *Ty) const {
+ return isa<FixedVectorType>(Ty) && !ST->useSVEForFixedLengthVectors();
+}
+
int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
MaybeAlign Alignment, unsigned AddressSpace,
TTI::TargetCostKind CostKind,
@@ -778,7 +782,7 @@ int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
return LT.first * 2 * AmortizationCost;
}
- if (Ty->isVectorTy() &&
+ if (useNeonVector(Ty) &&
cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
unsigned ProfitableNumElements;
if (Opcode == Instruction::Store)
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a624f8bfd745..baf11cd7fa23 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -147,6 +147,7 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
+ bool useNeonVector(const Type *Ty) const;
int getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
unsigned AddressSpace,
diff --git a/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
new file mode 100644
index 000000000000..3a4e0f080a49
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/mem-op-cost-model.ll
@@ -0,0 +1,88 @@
+; Check memory cost model action for fixed vector SVE and Neon
+; Vector bits size lower than 256 bits end up assuming Neon cost model
+; CHECK-NEON has same performance as CHECK-SVE-128
+
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+neon < %s | FileCheck %s --check-prefix=CHECK-NEON
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s --check-prefix=CHECK-SVE-128
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s --check-prefix=CHECK-SVE-256
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s --check-prefix=CHECK-SVE-512
+
+define <16 x i8> @load16(<16 x i8>* %ptr) {
+; CHECK: 'Cost Model Analysis' for function 'load16':
+; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+ %out = load <16 x i8>, <16 x i8>* %ptr
+ ret <16 x i8> %out
+}
+
+define void @store16(<16 x i8>* %ptr, <16 x i8> %val) {
+; CHECK: 'Cost Model Analysis' for function 'store16':
+; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+ store <16 x i8> %val, <16 x i8>* %ptr
+ ret void
+}
+
+define <8 x i8> @load8(<8 x i8>* %ptr) {
+; CHECK: 'Cost Model Analysis' for function 'load8':
+; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+ %out = load <8 x i8>, <8 x i8>* %ptr
+ ret <8 x i8> %out
+}
+
+define void @store8(<8 x i8>* %ptr, <8 x i8> %val) {
+; CHECK: 'Cost Model Analysis' for function 'store8':
+; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+ store <8 x i8> %val, <8 x i8>* %ptr
+ ret void
+}
+
+define <4 x i8> @load4(<4 x i8>* %ptr) {
+; CHECK: 'Cost Model Analysis' for function 'load4':
+; CHECK-NEON: Cost Model: Found an estimated cost of 64 for instruction:
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 64 for instruction:
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+ %out = load <4 x i8>, <4 x i8>* %ptr
+ ret <4 x i8> %out
+}
+
+define void @store4(<4 x i8>* %ptr, <4 x i8> %val) {
+; CHECK: 'Cost Model Analysis' for function 'store4':
+; CHECK-NEON: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+ store <4 x i8> %val, <4 x i8>* %ptr
+ ret void
+}
+
+define <16 x i16> @load_256(<16 x i16>* %ptr) {
+; CHECK: 'Cost Model Analysis' for function 'load_256':
+; CHECK-NEON: Cost Model: Found an estimated cost of 2 for instruction:
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 2 for instruction:
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+ %out = load <16 x i16>, <16 x i16>* %ptr
+ ret <16 x i16> %out
+}
+
+define <8 x i64> @load_512(<8 x i64>* %ptr) {
+; CHECK: 'Cost Model Analysis' for function 'load_512':
+; CHECK-NEON: Cost Model: Found an estimated cost of 4 for instruction:
+; CHECK-SVE-128: Cost Model: Found an estimated cost of 4 for instruction:
+; CHECK-SVE-256: Cost Model: Found an estimated cost of 2 for instruction:
+; CHECK-SVE-512: Cost Model: Found an estimated cost of 1 for instruction:
+ %out = load <8 x i64>, <8 x i64>* %ptr
+ ret <8 x i64> %out
+}
diff --git a/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll b/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll
new file mode 100644
index 000000000000..1a7b262ae627
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/scalable-mem-op-cost-model.ll
@@ -0,0 +1,51 @@
+; Checks if the memory cost model does not break when using scalable vectors
+
+; RUN: opt -cost-model -analyze -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
+
+define <vscale x 8 x i8> @load-sve-8(<vscale x 8 x i8>* %ptr) {
+; CHECK-LABEL: 'load-sve-8':
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
+ %retval = load <vscale x 8 x i8>, <vscale x 8 x i8>* %ptr
+ ret <vscale x 8 x i8> %retval
+}
+
+define void @store-sve-8(<vscale x 8 x i8>* %ptr, <vscale x 8 x i8> %val) {
+; CHECK-LABEL: 'store-sve-8'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
+ store <vscale x 8 x i8> %val, <vscale x 8 x i8>* %ptr
+ ret void
+}
+
+define <vscale x 16 x i8> @load-sve-16(<vscale x 16 x i8>* %ptr) {
+; CHECK-LABEL: 'load-sve-16':
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
+ %retval = load <vscale x 16 x i8>, <vscale x 16 x i8>* %ptr
+ ret <vscale x 16 x i8> %retval
+}
+
+define void @store-sve-16(<vscale x 16 x i8>* %ptr, <vscale x 16 x i8> %val) {
+; CHECK-LABEL: 'store-sve-16'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction:
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
+ store <vscale x 16 x i8> %val, <vscale x 16 x i8>* %ptr
+ ret void
+}
+
+define <vscale x 32 x i8> @load-sve-32(<vscale x 32 x i8>* %ptr) {
+; CHECK-LABEL: 'load-sve-32':
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
+ %retval = load <vscale x 32 x i8>, <vscale x 32 x i8>* %ptr
+ ret <vscale x 32 x i8> %retval
+}
+
+define void @store-sve-32(<vscale x 32 x i8>* %ptr, <vscale x 32 x i8> %val) {
+; CHECK-LABEL: 'store-sve-32'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction:
+; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction:
+ store <vscale x 32 x i8> %val, <vscale x 32 x i8>* %ptr
+ ret void
+}
More information about the llvm-commits
mailing list