[llvm] [PowerPC] Change default for auto gen stxvp for cpu=future (PR #142826)

Lei Huang via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 4 14:15:01 PDT 2025


https://github.com/lei137 updated https://github.com/llvm/llvm-project/pull/142826

>From 739664920d30fa4b695ba0ba59c666914266e321 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 4 Jun 2025 13:01:16 -0500
Subject: [PATCH 1/2] [PowerPC] For cpu=future allow auto generation of stxvp
 instructions by default

---
 llvm/lib/Target/PowerPC/PPC.td                 |  3 ++-
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp    | 13 +++++++++----
 llvm/test/CodeGen/PowerPC/dmr-spill.ll         |  6 +++---
 llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll |  2 --
 4 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 6b058d1a74772..fd850faf7b2fb 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -482,7 +482,8 @@ def ProcessorFeatures {
   // Future
   // For future CPU we assume that all of the existing features from Power11
   // still exist with the exception of those we know are Power11 specific.
-  list<SubtargetFeature> FutureAdditionalFeatures = [FeatureISAFuture];
+  list<SubtargetFeature> FutureAdditionalFeatures = [DirectivePwrFuture,
+                                                     FeatureISAFuture];
   list<SubtargetFeature> FutureSpecificFeatures = [];
   list<SubtargetFeature> FutureInheritableFeatures =
     !listconcat(P11InheritableFeatures, FutureAdditionalFeatures);
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0c2a506005604..94e95953363db 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1476,7 +1476,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   setMinFunctionAlignment(Align(4));
 
-  switch (Subtarget.getCPUDirective()) {
+  auto CPUDirective = Subtarget.getCPUDirective();
+  switch (CPUDirective) {
   default: break;
   case PPC::DIR_970:
   case PPC::DIR_A2:
@@ -1508,15 +1509,14 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   // The Freescale cores do better with aggressive inlining of memcpy and
   // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
-  if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
-      Subtarget.getCPUDirective() == PPC::DIR_E5500) {
+  if (CPUDirective == PPC::DIR_E500mc || CPUDirective == PPC::DIR_E5500) {
     MaxStoresPerMemset = 32;
     MaxStoresPerMemsetOptSize = 16;
     MaxStoresPerMemcpy = 32;
     MaxStoresPerMemcpyOptSize = 8;
     MaxStoresPerMemmove = 32;
     MaxStoresPerMemmoveOptSize = 8;
-  } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
+  } else if (CPUDirective == PPC::DIR_A2) {
     // The A2 also benefits from (very) aggressive inlining of memcpy and
     // friends. The overhead of a the function call, even when warm, can be
     // over one hundred cycles.
@@ -1529,6 +1529,11 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
     MaxLoadsPerMemcmpOptSize = 4;
   }
 
+  // Enable generation of STXVP instructions by default for mcpu=future.
+  if (CPUDirective == PPC::DIR_PWR_FUTURE &&
+      !DisableAutoPairedVecSt.getNumOccurrences())
+    DisableAutoPairedVecSt = false;
+
   IsStrictFPEnabled = true;
 
   // Let the subtarget (CPU) decide if a predictable select is more expensive
diff --git a/llvm/test/CodeGen/PowerPC/dmr-spill.ll b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
index b224643a6dd9f..c1b01cd2d3fd5 100644
--- a/llvm/test/CodeGen/PowerPC/dmr-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/dmr-spill.ll
@@ -1,12 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN:   -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-aix \
-; RUN:   -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN:   -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-aix \
-; RUN:   -disable-auto-paired-vec-st=false -ppc-asm-full-reg-names \
+; RUN:   -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr -mcpu=future < %s | FileCheck %s --check-prefix=AIX32
 
 declare <1024 x i1> @llvm.ppc.mma.dmxvbf16gerx2pp(<1024 x i1>, <256 x i1>, <16 x i8>)
diff --git a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
index c2c8a42c402a2..8dd17abb26347 100644
--- a/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
+++ b/llvm/test/CodeGen/PowerPC/mmaplus-acc-spill.ll
@@ -1,11 +1,9 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; This test is a copy of mma-acc-spill.ll except that it uses mcpu=future.
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN:   -disable-auto-paired-vec-st=false \
 ; RUN:   -mcpu=future -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
-; RUN:   -disable-auto-paired-vec-st=false \
 ; RUN:   -mcpu=future -ppc-asm-full-reg-names \
 ; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
 

>From 4eb195872aa081e2fd1339131a0319d3940617b1 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei at ca.ibm.com>
Date: Wed, 4 Jun 2025 16:14:49 -0500
Subject: [PATCH 2/2] address review comment

---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 94e95953363db..3ea9b0b11ce89 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1531,7 +1531,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   // Enable generation of STXVP instructions by default for mcpu=future.
   if (CPUDirective == PPC::DIR_PWR_FUTURE &&
-      !DisableAutoPairedVecSt.getNumOccurrences())
+      DisableAutoPairedVecSt.getNumOccurrences() == 0)
     DisableAutoPairedVecSt = false;
 
   IsStrictFPEnabled = true;



More information about the llvm-commits mailing list