[llvm] 079c488 - [TTI][AArch64] Cost model insertelement and indexed LD1 instructions

Sjoerd Meijer via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 9 08:28:38 PST 2023


Author: Sjoerd Meijer
Date: 2023-02-09T16:28:11Z
New Revision: 079c488c66056f7bf22c30ee280489f6c43b7658

URL: https://github.com/llvm/llvm-project/commit/079c488c66056f7bf22c30ee280489f6c43b7658
DIFF: https://github.com/llvm/llvm-project/commit/079c488c66056f7bf22c30ee280489f6c43b7658.diff

LOG: [TTI][AArch64] Cost model insertelement and indexed LD1 instructions

An indexed LD1 instruction, or "ASIMD load, 1 element, one lane, B/H/S"
instruction that loads a value and inserts an element into a vector is
an expensive instruction. It has a latency of 8 on modern cores. We
generate an indexed LD1 when an insertelement instruction has a load as an
operand and this patch is recognising and makes indexed LD1 more expensive.

Differential Revision: https://reviews.llvm.org/D141602

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
    llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
    llvm/test/Analysis/CostModel/AArch64/insert-extract.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index c6e9e0550117e..06e5589616c94 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -2184,7 +2184,8 @@ InstructionCost AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
   return 0;
 }
 
-InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
+InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(const Instruction *I,
+                                                         Type *Val,
                                                          unsigned Index,
                                                          bool HasRealUse) {
   assert(Val->isVectorTy() && "This must be a vector type");
@@ -2210,14 +2211,21 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(Type *Val,
     // needed. So it has non-zero cost.
     // - For the rest of cases (virtual instruction or element type is float),
     // consider the instruction free.
-    //
+    if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
+      return 0;
+
+    // This is recognising a LD1 single-element structure to one lane of one
+    // register instruction. I.e., if this is an `insertelement` instruction,
+    // and its second operand is a load, then we will generate a LD1, which
+    // are expensive instructions.
+    if (I && dyn_cast<LoadInst>(I->getOperand(1)))
+      return ST->getVectorInsertExtractBaseCost() + 1;
+
     // FIXME:
     // If the extract-element and insert-element instructions could be
     // simplified away (e.g., could be combined into users by looking at use-def
     // context), they have no cost. This is not done in the first place for
     // compile-time considerations.
-    if (Index == 0 && (!HasRealUse || !Val->getScalarType()->isIntegerTy()))
-      return 0;
   }
 
   // All other insert/extracts cost this much.
@@ -2228,14 +2236,14 @@ InstructionCost AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
                                                    TTI::TargetCostKind CostKind,
                                                    unsigned Index, Value *Op0,
                                                    Value *Op1) {
-  return getVectorInstrCostHelper(Val, Index, false /* HasRealUse */);
+  return getVectorInstrCostHelper(nullptr, Val, Index, false /* HasRealUse */);
 }
 
 InstructionCost AArch64TTIImpl::getVectorInstrCost(const Instruction &I,
                                                    Type *Val,
                                                    TTI::TargetCostKind CostKind,
                                                    unsigned Index) {
-  return getVectorInstrCostHelper(Val, Index, true /* HasRealUse */);
+  return getVectorInstrCostHelper(&I, Val, Index, true /* HasRealUse */);
 }
 
 InstructionCost AArch64TTIImpl::getArithmeticInstrCost(

diff  --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a987c8ef6a7fa..fa518605dc8e4 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -65,8 +65,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
   // 'Val' and 'Index' are forwarded from 'getVectorInstrCost'; 'HasRealUse'
   // indicates whether the vector instruction is available in the input IR or
   // just imaginary in vectorizer passes.
-  InstructionCost getVectorInstrCostHelper(Type *Val, unsigned Index,
-                                           bool HasRealUse);
+  InstructionCost getVectorInstrCostHelper(const Instruction *I, Type *Val,
+                                           unsigned Index, bool HasRealUse);
 
 public:
   explicit AArch64TTIImpl(const AArch64TargetMachine *TM, const Function &F)

diff  --git a/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll b/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
index 844b23dc52ba9..870b6631af242 100644
--- a/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/insert-extract.ll
@@ -108,12 +108,12 @@ define void @vectorInstrCost() {
 define <8 x i8> @LD1_B(<8 x i8> %vec, ptr noundef %i) {
 ; KRYO-LABEL: 'LD1_B'
 ; KRYO-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1
-; KRYO-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
+; KRYO-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
 ; KRYO-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2
 ;
 ; NEO-LABEL: 'LD1_B'
 ; NEO-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i8, ptr %i, align 1
-; NEO-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
+; NEO-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <8 x i8> %vec, i8 %v1, i32 1
 ; NEO-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i8> %v2
 ;
 entry:
@@ -125,12 +125,12 @@ entry:
 define <4 x i16> @LD1_H(<4 x i16> %vec, ptr noundef %i) {
 ; KRYO-LABEL: 'LD1_H'
 ; KRYO-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2
-; KRYO-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
+; KRYO-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
 ; KRYO-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2
 ;
 ; NEO-LABEL: 'LD1_H'
 ; NEO-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i16, ptr %i, align 2
-; NEO-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
+; NEO-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i16> %vec, i16 %v1, i32 2
 ; NEO-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i16> %v2
 ;
 entry:
@@ -142,12 +142,12 @@ entry:
 define <4 x i32> @LD1_W(<4 x i32> %vec, ptr noundef %i) {
 ; KRYO-LABEL: 'LD1_W'
 ; KRYO-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4
-; KRYO-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
+; KRYO-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
 ; KRYO-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2
 ;
 ; NEO-LABEL: 'LD1_W'
 ; NEO-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i32, ptr %i, align 4
-; NEO-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
+; NEO-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <4 x i32> %vec, i32 %v1, i32 3
 ; NEO-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %v2
 ;
 entry:
@@ -159,12 +159,12 @@ entry:
 define <2 x i64> @LD1_X(<2 x i64> %vec, ptr noundef %i) {
 ; KRYO-LABEL: 'LD1_X'
 ; KRYO-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8
-; KRYO-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
+; KRYO-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
 ; KRYO-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2
 ;
 ; NEO-LABEL: 'LD1_X'
 ; NEO-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v1 = load i64, ptr %i, align 8
-; NEO-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
+; NEO-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2 = insertelement <2 x i64> %vec, i64 %v1, i32 0
 ; NEO-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %v2
 ;
 entry:


        


More information about the llvm-commits mailing list