[llvm] 06c8210 - update P7 32-bit partial vector load cost (#108261)

via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 3 09:28:47 PDT 2024


Author: RolandF77
Date: 2024-10-03T12:28:43-04:00
New Revision: 06c8210a67a47a3693e7c8932e8ace0302bd1eb7

URL: https://github.com/llvm/llvm-project/commit/06c8210a67a47a3693e7c8932e8ace0302bd1eb7
DIFF: https://github.com/llvm/llvm-project/commit/06c8210a67a47a3693e7c8932e8ace0302bd1eb7.diff

LOG: update P7 32-bit partial vector load cost (#108261)

Update cost model to reflect codegen change to use lfiwzx 
for 32-bit partial vector loads on pwr7 with
https://github.com/llvm/llvm-project/pull/104507.

Added: 
    

Modified: 
    llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
    llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index ec3d3dbc8f6aa4..33ad30351c51c0 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -800,13 +800,19 @@ InstructionCost PPCTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
   // PPCTargetLowering can't compute the cost appropriately. So here we
   // explicitly check this case. There are also corresponding store
   // instructions.
-  unsigned MemBytes = Src->getPrimitiveSizeInBits();
-  if (ST->hasVSX() && IsAltivecType &&
-      (MemBytes == 64 || (ST->hasP8Vector() && MemBytes == 32)))
-    return 1;
+  unsigned MemBits = Src->getPrimitiveSizeInBits();
+  unsigned SrcBytes = LT.second.getStoreSize();
+  if (ST->hasVSX() && IsAltivecType) {
+    if (MemBits == 64 || (ST->hasP8Vector() && MemBits == 32))
+      return 1;
+
+    // Use lfiwax/xxspltw
+    Align AlignBytes = Alignment ? *Alignment : Align(1);
+    if (Opcode == Instruction::Load && MemBits == 32 && AlignBytes < SrcBytes)
+      return 2;
+  }
 
   // Aligned loads and stores are easy.
-  unsigned SrcBytes = LT.second.getStoreSize();
   if (!SrcBytes || !Alignment || *Alignment >= SrcBytes)
     return Cost;
 

diff  --git a/llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll b/llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll
index 54cafa0ae59f39..17bcdd4d3f44cf 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/vsr_load_32_64.ll
@@ -1,15 +1,16 @@
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck -DCOST32=1 %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -mattr=+vsx | FileCheck -DCOST32=2 %s
 target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
 
 define i32 @loads(i32 %arg) {
-  ; CHECK: cost of 1 {{.*}} load
+  ; CHECK: cost of [[COST32]] {{.*}} load
   load <4 x i8>, ptr undef, align 1
 
   ; CHECK: cost of 1 {{.*}} load
   load <8 x i8>, ptr undef, align 1
 
-  ; CHECK: cost of 1 {{.*}} load
+  ; CHECK: cost of [[COST32]] {{.*}} load
   load <2 x i16>, ptr undef, align 2
 
   ; CHECK: cost of 1 {{.*}} load


        


More information about the llvm-commits mailing list