[llvm] [PowerPC] adjust cost for vector insert/extract with non const index (PR #79092)

Chen Zheng via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 18 17:31:36 PST 2024


https://github.com/chenzheng1030 updated https://github.com/llvm/llvm-project/pull/79092

>From 60b6553cdf1470b621542614a2f1b0d7ac71ce49 Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz at cn.ibm.com>
Date: Mon, 22 Jan 2024 23:11:21 -0500
Subject: [PATCH 1/3] [PowerPC] adjust cost for vector insert/extract with non
 const index

---
 .../Target/PowerPC/PPCTargetTransformInfo.cpp | 56 +++++++++++--------
 .../CostModel/PowerPC/insert_extract.ll       | 36 ++++++------
 2 files changed, 51 insertions(+), 41 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 062b53e24a0d79..4768d0a1ba693e 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -697,34 +697,44 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
 
     return Cost;
 
-  } else if (Val->getScalarType()->isIntegerTy() && Index != -1U) {
+  } else if (Val->getScalarType()->isIntegerTy()) {
     unsigned EltSize = Val->getScalarSizeInBits();
     // Computing on 1 bit values requires extra mask or compare operations.
     unsigned MaskCost = VecMaskCost && EltSize == 1 ? 1 : 0;
     if (ST->hasP9Altivec()) {
-      if (ISD == ISD::INSERT_VECTOR_ELT)
-        // A move-to VSR and a permute/insert.  Assume vector operation cost
-        // for both (cost will be 2x on P9).
-        return 2 * CostFactor;
-
-      // It's an extract.  Maybe we can do a cheap move-from VSR.
-      unsigned EltSize = Val->getScalarSizeInBits();
-      if (EltSize == 64) {
-        unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
-        if (Index == MfvsrdIndex)
-          return 1;
-      } else if (EltSize == 32) {
-        unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
-        if (Index == MfvsrwzIndex)
-          return 1;
-      }
-
-      // We need a vector extract (or mfvsrld).  Assume vector operation cost.
-      // The cost of the load constant for a vector extract is disregarded
-      // (invariant, easily schedulable).
-      return CostFactor + MaskCost;
+      // P10 has vxform insert which can handle non const index. The MaskCost is
+      // for masking the index.
+      // P9 has insert for const index. A move-to VSR and a permute/insert.
+      // Assume vector operation cost for both (cost will be 2x on P9).
+      if (ISD == ISD::INSERT_VECTOR_ELT) {
+        if (ST->isISA3_1())
+          return CostFactor + MaskCost;
+        else if (Index != -1U)
+          return 2 * CostFactor;
+      } else if (ISD == ISD::EXTRACT_VECTOR_ELT) {
+        // It's an extract.  Maybe we can do a cheap move-from VSR.
+        unsigned EltSize = Val->getScalarSizeInBits();
+        if (EltSize == 64) {
+          // FIXME: no need to worry about endian, P9 has both mfvsrd/mfvsrld.
+          unsigned MfvsrdIndex = ST->isLittleEndian() ? 1 : 0;
+          if (Index == MfvsrdIndex)
+            return 1;
+        } else if (EltSize == 32) {
+          unsigned MfvsrwzIndex = ST->isLittleEndian() ? 2 : 1;
+          if (Index == MfvsrwzIndex)
+            return 1;
+
+          // For other indexs like non const, P9 has vxform extract. The
+          // MaskCost is for masking the index.
+          return CostFactor + MaskCost;
+        }
 
-    } else if (ST->hasDirectMove()) {
+        // We need a vector extract (or mfvsrld).  Assume vector operation cost.
+        // The cost of the load constant for a vector extract is disregarded
+        // (invariant, easily schedulable).
+        return CostFactor + MaskCost;
+      }
+    } else if (ST->hasDirectMove() && Index != -1U) {
       // Assume permute has standard cost.
       // Assume move-to/move-from VSR have 2x standard cost.
       if (ISD == ISD::INSERT_VECTOR_ELT)
diff --git a/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll b/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll
index 607d15790b5f31..512c83f0182377 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll
@@ -27,7 +27,7 @@ define i32 @insert(i32 %arg) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
 ; CHECK-P10-LABEL: 'insert'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <4 x i32> undef, i32 %arg, i32 0
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
 ;
   %x = insertelement <4 x i32> undef, i32 %arg, i32 0
@@ -109,7 +109,7 @@ define void @test4xi32(<4 x i32> %v1, i32 %x1) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
 ; CHECK-P10-LABEL: 'test4xi32'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret void
 ;
   %v2 = insertelement <4 x i32> %v1, i32 %x1, i32 2
@@ -239,7 +239,7 @@ define <2 x i64> @insert_i64_x(<2 x i64> %dest, i64 %arg, i32 %idx) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %x
 ;
 ; CHECK-P10-LABEL: 'insert_i64_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %x
 ;
   %x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
@@ -264,7 +264,7 @@ define <4 x i32> @insert_i32_x(<4 x i32> %dest, i32 %arg, i32 %idx) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x
 ;
 ; CHECK-P10-LABEL: 'insert_i32_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x
 ;
   %x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
@@ -289,7 +289,7 @@ define  <8 x i16> @insert_i16_x(<8 x i16> %dest, i16 %arg, i32 %idx) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %x
 ;
 ; CHECK-P10-LABEL: 'insert_i16_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %x
 ;
   %x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
@@ -314,7 +314,7 @@ define  <16 x i8> @insert_i8_x(<16 x i8> %dest, i8 %arg, i32 %idx) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %x
 ;
 ; CHECK-P10-LABEL: 'insert_i8_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: %x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %x
 ;
   %x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
@@ -331,15 +331,15 @@ define i64 @extract_i64_x(<2 x i64> %arg, i32 %idx) {
 ; CHECK-P8LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
 ;
 ; CHECK-P9BE-LABEL: 'extract_i64_x'
-; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
+; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
 ; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
 ;
 ; CHECK-P9LE-LABEL: 'extract_i64_x'
-; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
+; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
 ;
 ; CHECK-P10-LABEL: 'extract_i64_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
 ;
   %x = extractelement <2 x i64> %arg, i32 %idx
@@ -356,15 +356,15 @@ define i32 @extract_i32_x(<4 x i32> %arg, i32 %idx) {
 ; CHECK-P8LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
 ;
 ; CHECK-P9BE-LABEL: 'extract_i32_x'
-; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
+; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
 ; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
 ;
 ; CHECK-P9LE-LABEL: 'extract_i32_x'
-; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
+; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
 ;
 ; CHECK-P10-LABEL: 'extract_i32_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
 ;
   %x = extractelement <4 x i32> %arg, i32 %idx
@@ -381,15 +381,15 @@ define i16 @extract_i16_x(<8 x i16> %arg, i32 %idx) {
 ; CHECK-P8LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
 ;
 ; CHECK-P9BE-LABEL: 'extract_i16_x'
-; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
+; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
 ; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
 ;
 ; CHECK-P9LE-LABEL: 'extract_i16_x'
-; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
+; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
 ;
 ; CHECK-P10-LABEL: 'extract_i16_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
 ;
   %x = extractelement <8 x i16> %arg, i32 %idx
@@ -406,15 +406,15 @@ define i8 @extract_i8_x(<16 x i8> %arg, i32 %idx) {
 ; CHECK-P8LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
 ;
 ; CHECK-P9BE-LABEL: 'extract_i8_x'
-; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
+; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
 ; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
 ;
 ; CHECK-P9LE-LABEL: 'extract_i8_x'
-; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
+; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
 ;
 ; CHECK-P10-LABEL: 'extract_i8_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
 ;
   %x = extractelement <16 x i8> %arg, i32 %idx

>From 735ccb474fb387afcc6218c6b4affa113443cc91 Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz at cn.ibm.com>
Date: Thu, 25 Jan 2024 08:09:29 -0500
Subject: [PATCH 2/3] address comments

---
 .../Target/PowerPC/PPCTargetTransformInfo.cpp | 16 ++++++----
 .../CostModel/PowerPC/insert_extract.ll       | 32 +++++++++----------
 2 files changed, 25 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 4768d0a1ba693e..ddb1bf3f78dd66 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -700,15 +700,17 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
   } else if (Val->getScalarType()->isIntegerTy()) {
     unsigned EltSize = Val->getScalarSizeInBits();
     // Computing on 1 bit values requires extra mask or compare operations.
-    unsigned MaskCost = VecMaskCost && EltSize == 1 ? 1 : 0;
+    unsigned MaskCostForOneBitSize = (VecMaskCost && EltSize == 1) ? 1 : 0;
+    // Computing on non const index requires extra mask or compare operations.
+    unsigned MaskCostForIdx = (Index != -1U) ? 0 : 1;
     if (ST->hasP9Altivec()) {
       // P10 has vxform insert which can handle non const index. The MaskCost is
       // for masking the index.
       // P9 has insert for const index. A move-to VSR and a permute/insert.
       // Assume vector operation cost for both (cost will be 2x on P9).
       if (ISD == ISD::INSERT_VECTOR_ELT) {
-        if (ST->isISA3_1())
-          return CostFactor + MaskCost;
+        if (ST->hasP10Vector())
+          return CostFactor + MaskCostForIdx;
         else if (Index != -1U)
           return 2 * CostFactor;
       } else if (ISD == ISD::EXTRACT_VECTOR_ELT) {
@@ -725,21 +727,21 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
             return 1;
 
           // For other indexs like non const, P9 has vxform extract. The
-          // MaskCost is for masking the index.
-          return CostFactor + MaskCost;
+          // MaskCostForIdx is for masking the index.
+          return CostFactor + MaskCostForIdx;
         }
 
         // We need a vector extract (or mfvsrld).  Assume vector operation cost.
         // The cost of the load constant for a vector extract is disregarded
         // (invariant, easily schedulable).
-        return CostFactor + MaskCost;
+        return CostFactor + MaskCostForOneBitSize + MaskCostForIdx;
       }
     } else if (ST->hasDirectMove() && Index != -1U) {
       // Assume permute has standard cost.
       // Assume move-to/move-from VSR have 2x standard cost.
       if (ISD == ISD::INSERT_VECTOR_ELT)
         return 3;
-      return 3 + MaskCost;
+      return 3 + MaskCostForOneBitSize;
     }
   }
 
diff --git a/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll b/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll
index 512c83f0182377..fda6a4ae51fc2c 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/insert_extract.ll
@@ -239,7 +239,7 @@ define <2 x i64> @insert_i64_x(<2 x i64> %dest, i64 %arg, i32 %idx) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %x
 ;
 ; CHECK-P10-LABEL: 'insert_i64_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <2 x i64> %x
 ;
   %x = insertelement <2 x i64> %dest, i64 %arg, i32 %idx
@@ -264,7 +264,7 @@ define <4 x i32> @insert_i32_x(<4 x i32> %dest, i32 %arg, i32 %idx) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x
 ;
 ; CHECK-P10-LABEL: 'insert_i32_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <4 x i32> %x
 ;
   %x = insertelement <4 x i32> %dest, i32 %arg, i32 %idx
@@ -289,7 +289,7 @@ define  <8 x i16> @insert_i16_x(<8 x i16> %dest, i16 %arg, i32 %idx) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %x
 ;
 ; CHECK-P10-LABEL: 'insert_i16_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <8 x i16> %x
 ;
   %x = insertelement <8 x i16> %dest, i16 %arg, i32 %idx
@@ -314,7 +314,7 @@ define  <16 x i8> @insert_i8_x(<16 x i8> %dest, i8 %arg, i32 %idx) {
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %x
 ;
 ; CHECK-P10-LABEL: 'insert_i8_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret <16 x i8> %x
 ;
   %x = insertelement <16 x i8> %dest, i8 %arg, i32 %idx
@@ -331,15 +331,15 @@ define i64 @extract_i64_x(<2 x i64> %arg, i32 %idx) {
 ; CHECK-P8LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
 ;
 ; CHECK-P9BE-LABEL: 'extract_i64_x'
-; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
+; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
 ; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
 ;
 ; CHECK-P9LE-LABEL: 'extract_i64_x'
-; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
+; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
 ;
 ; CHECK-P10-LABEL: 'extract_i64_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <2 x i64> %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i64 %x
 ;
   %x = extractelement <2 x i64> %arg, i32 %idx
@@ -356,15 +356,15 @@ define i32 @extract_i32_x(<4 x i32> %arg, i32 %idx) {
 ; CHECK-P8LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
 ;
 ; CHECK-P9BE-LABEL: 'extract_i32_x'
-; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
+; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
 ; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
 ;
 ; CHECK-P9LE-LABEL: 'extract_i32_x'
-; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
+; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
 ;
 ; CHECK-P10-LABEL: 'extract_i32_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <4 x i32> %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i32 %x
 ;
   %x = extractelement <4 x i32> %arg, i32 %idx
@@ -381,15 +381,15 @@ define i16 @extract_i16_x(<8 x i16> %arg, i32 %idx) {
 ; CHECK-P8LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
 ;
 ; CHECK-P9BE-LABEL: 'extract_i16_x'
-; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
+; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
 ; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
 ;
 ; CHECK-P9LE-LABEL: 'extract_i16_x'
-; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
+; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
 ;
 ; CHECK-P10-LABEL: 'extract_i16_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <8 x i16> %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i16 %x
 ;
   %x = extractelement <8 x i16> %arg, i32 %idx
@@ -406,15 +406,15 @@ define i8 @extract_i8_x(<16 x i8> %arg, i32 %idx) {
 ; CHECK-P8LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
 ;
 ; CHECK-P9BE-LABEL: 'extract_i8_x'
-; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
+; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
 ; CHECK-P9BE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
 ;
 ; CHECK-P9LE-LABEL: 'extract_i8_x'
-; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
+; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
 ; CHECK-P9LE-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
 ;
 ; CHECK-P10-LABEL: 'extract_i8_x'
-; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
+; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %x = extractelement <16 x i8> %arg, i32 %idx
 ; CHECK-P10-NEXT:  Cost Model: Found an estimated cost of 0 for instruction: ret i8 %x
 ;
   %x = extractelement <16 x i8> %arg, i32 %idx

>From 58da0d2c4059ba32343b08d5cc528215c5f13283 Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz at cn.ibm.com>
Date: Sun, 18 Feb 2024 20:31:18 -0500
Subject: [PATCH 3/3] address the nit

---
 llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index ddb1bf3f78dd66..63214e3da79fde 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -704,8 +704,8 @@ InstructionCost PPCTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
     // Computing on non const index requires extra mask or compare operations.
     unsigned MaskCostForIdx = (Index != -1U) ? 0 : 1;
     if (ST->hasP9Altivec()) {
-      // P10 has vxform insert which can handle non const index. The MaskCost is
-      // for masking the index.
+      // P10 has vxform insert which can handle non const index. The
+      // MaskCostForIdx is for masking the index.
       // P9 has insert for const index. A move-to VSR and a permute/insert.
       // Assume vector operation cost for both (cost will be 2x on P9).
       if (ISD == ISD::INSERT_VECTOR_ELT) {



More information about the llvm-commits mailing list