[llvm] 079c488 - [TTI][AArch64] Cost model insertelement and indexed LD1 instructions
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 9 08:28:38 PST 2023
Author: Sjoerd Meijer
Date: 2023-02-09T16:28:11Z
New Revision: 079c488c66056f7bf22c30ee280489f6c43b7658
URL: https://github.com/llvm/llvm-project/commit/079c488c66056f7bf22c30ee280489f6c43b7658
DIFF: https://github.com/llvm/llvm-project/commit/079c488c66056f7bf22c30ee280489f6c43b7658.diff
LOG: [TTI][AArch64] Cost model insertelement and indexed LD1 instructions
An indexed LD1 instruction, or "ASIMD load, 1 element, one lane, B/H/S"
instruction that loads a value and inserts an element into a vector is
an expensive instruction. It has a latency of 8 on modern cores. We
generate an indexed LD1 when an insertelement instruction has a load as an
operand and this patch is recognising and makes indexed LD1 more expensive.
Differential Revision: https://reviews.llvm.org/D141602
Added:
Modified:
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index c6e9e0550117e..06e5589616c94 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2184,7 +2184,8 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
return 0;
}
-InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
+InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(const Instruction *I,
+ Type *Val,
unsigned Index,
bool HasRealUse) {
assert(Val->isVectorTy() && "This must be a vector type");
@@ -2210,14 +2211,21 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
// needed. So it has non-zero cost.
// - For the rest of cases (virtual instruction or element type is float),
// consider the instruction free.
- //
+ if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
+ return 0;
+
+ // This is recognising a LD1 single-element structure to one lane of one
+ // register instruction. I.e., if this is an `insertelement` instruction,
+ // and its second operand is a load, then we will generate a LD1, which
+ // are expensive instructions.
+ if (I && dyn_cast<LoadInst>(I->getOperand(1)))
+ return ST->getVectorInsertExtractBaseCost() + 1;
+
// FIXME:
// If the extract-element and insert-element instructions could be
// simplified away (e.g., could be combined into users by looking at use-def
// context), they have no cost. This is not done in the first place for
// compile-time considerations.
- if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
- return 0;
}
// All other insert/extracts cost this much.
@@ -2228,14 +2236,14 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index, Value *Op0,
Value *Op1) {
- return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */);
+ return getVectorInstrCostHelper(nullptr, Val, Index, false /* HasRealUse */);
}
InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
Type *Val,
TTI::TargetCostKind CostKind,
unsigned Index) {
- return getVectorInstrCostHelper(Val, Index, true /* HasRealUse */);
+ return getVectorInstrCostHelper(&I, Val, Index, true /* HasRealUse */);
}
InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a987c8ef6a7fa..fa518605dc8e4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -65,8 +65,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
// 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
// indicates whether the vector instruction is available in the input IR or
// just imaginary in vectorizer passes.
- InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index,
- bool HasRealUse);
+ InstructionCost getVectorInstrCostHelper(const Instruction *I, Type *Val,
+ unsigned Index, bool HasRealUse);
public:
explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)
diff --git a/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
index 844b23dc52ba9..870b6631af242 100644
--- a/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
@@ -108,12 +108,12 @@ define void @vectorInstrCost() {
define <8 x i8> @LD1_B(<8 x i8> %vec, ptr noundef %i) {
; KRYO-LABEL: 'LD1_B'
; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1
-; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
+; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2
;
; NEO-LABEL: 'LD1_B'
; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1
-; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
+; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2
;
entry:
@@ -125,12 +125,12 @@ entry:
define <4 x i16> @LD1_H(<4 x i16> %vec, ptr noundef %i) {
; KRYO-LABEL: 'LD1_H'
; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2
-; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
+; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2
;
; NEO-LABEL: 'LD1_H'
; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2
-; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
+; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2
;
entry:
@@ -142,12 +142,12 @@ entry:
define <4 x i32> @LD1_W(<4 x i32> %vec, ptr noundef %i) {
; KRYO-LABEL: 'LD1_W'
; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4
-; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
+; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2
;
; NEO-LABEL: 'LD1_W'
; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4
-; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
+; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2
;
entry:
@@ -159,12 +159,12 @@ entry:
define <2 x i64> @LD1_X(<2 x i64> %vec, ptr noundef %i) {
; KRYO-LABEL: 'LD1_X'
; KRYO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8
-; KRYO-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
+; KRYO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
; KRYO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2
;
; NEO-LABEL: 'LD1_X'
; NEO-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8
-; NEO-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
+; NEO-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
; NEO-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2
;
entry:
More information about the llvm-commits
mailing list