[clang] [llvm] [PowerPC] Add support for -mcpu=pwr11 / -mtune=pwr11 (PR #99511)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jul 19 13:10:17 PDT 2024
https://github.com/azhan92 updated https://github.com/llvm/llvm-project/pull/99511
>From 52100e3378c86eddb9a4757f8f3e175804a6dc76 Mon Sep 17 00:00:00 2001
From: Alison Zhang <alisonzhang at ibm.com>
Date: Thu, 18 Jul 2024 10:27:40 -0400
Subject: [PATCH 1/7] Add support for -mcpu=pwr11 / -mtune=pwr11
---
clang/lib/Basic/Targets/PPC.cpp | 39 ++++++++++++-------
clang/lib/Basic/Targets/PPC.h | 19 ++++++---
clang/lib/Driver/ToolChains/Arch/PPC.cpp | 3 ++
clang/test/Misc/target-invalid-cpu-note.c | 2 +-
clang/test/Preprocessor/init-ppc64.c | 20 ++++++++++
llvm/lib/Target/PowerPC/PPC.td | 21 ++++++++--
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1 +
llvm/lib/Target/PowerPC/PPCSubtarget.h | 1 +
.../Target/PowerPC/PPCTargetTransformInfo.cpp | 4 +-
llvm/lib/TargetParser/Host.cpp | 1 +
10 files changed, 86 insertions(+), 25 deletions(-)
diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 4ba4a49311d36..9ff54083c923b 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -385,6 +385,8 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("_ARCH_PWR9");
if (ArchDefs & ArchDefinePwr10)
Builder.defineMacro("_ARCH_PWR10");
+ if (ArchDefs & ArchDefinePwr11)
+ Builder.defineMacro("_ARCH_PWR11");
if (ArchDefs & ArchDefineA2)
Builder.defineMacro("_ARCH_A2");
if (ArchDefs & ArchDefineE500)
@@ -622,10 +624,17 @@ bool PPCTargetInfo::initFeatureMap(
addP10SpecificFeatures(Features);
}
- // Future CPU should include all of the features of Power 10 as well as any
+ // Power11 includes all the same features as Power10 plus any features
+ // specific to the Power11 core.
+ if (CPU == "pwr11" || CPU == "power11") {
+ initFeatureMap(Features, Diags, "pwr10", FeaturesVec);
+ addP11SpecificFeatures(Features);
+ }
+
+ // Future CPU should include all of the features of Power 11 as well as any
// additional features (yet to be determined) specific to it.
if (CPU == "future") {
- initFeatureMap(Features, Diags, "pwr10", FeaturesVec);
+ initFeatureMap(Features, Diags, "pwr11", FeaturesVec);
addFutureSpecificFeatures(Features);
}
@@ -696,6 +705,10 @@ void PPCTargetInfo::addP10SpecificFeatures(
Features["isa-v31-instructions"] = true;
}
+// Add any Power11 specific features.
+void PPCTargetInfo::addP11SpecificFeatures(
+ llvm::StringMap<bool> &Features) const {}
+
// Add features specific to the "Future" CPU.
void PPCTargetInfo::addFutureSpecificFeatures(
llvm::StringMap<bool> &Features) const {}
@@ -870,17 +883,17 @@ ArrayRef<TargetInfo::AddlRegName> PPCTargetInfo::getGCCAddlRegNames() const {
}
static constexpr llvm::StringLiteral ValidCPUNames[] = {
- {"generic"}, {"440"}, {"450"}, {"601"}, {"602"},
- {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"},
- {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"},
- {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"},
- {"g5"}, {"a2"}, {"e500"}, {"e500mc"}, {"e5500"},
- {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, {"power5"},
- {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, {"pwr6"},
- {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, {"power8"},
- {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, {"pwr10"},
- {"powerpc"}, {"ppc"}, {"ppc32"}, {"powerpc64"}, {"ppc64"},
- {"powerpc64le"}, {"ppc64le"}, {"future"}};
+ {"generic"}, {"440"}, {"450"}, {"601"}, {"602"},
+ {"603"}, {"603e"}, {"603ev"}, {"604"}, {"604e"},
+ {"620"}, {"630"}, {"g3"}, {"7400"}, {"g4"},
+ {"7450"}, {"g4+"}, {"750"}, {"8548"}, {"970"},
+ {"g5"}, {"a2"}, {"e500"}, {"e500mc"}, {"e5500"},
+ {"power3"}, {"pwr3"}, {"power4"}, {"pwr4"}, {"power5"},
+ {"pwr5"}, {"power5x"}, {"pwr5x"}, {"power6"}, {"pwr6"},
+ {"power6x"}, {"pwr6x"}, {"power7"}, {"pwr7"}, {"power8"},
+ {"pwr8"}, {"power9"}, {"pwr9"}, {"power10"}, {"pwr10"},
+ {"power11"}, {"pwr11"}, {"powerpc"}, {"ppc"}, {"ppc32"},
+ {"powerpc64"}, {"ppc64"}, {"powerpc64le"}, {"ppc64le"}, {"future"}};
bool PPCTargetInfo::isValidCPUName(StringRef Name) const {
return llvm::is_contained(ValidCPUNames, Name);
diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h
index b15ab6fbcf492..6d5d8dd54d013 100644
--- a/clang/lib/Basic/Targets/PPC.h
+++ b/clang/lib/Basic/Targets/PPC.h
@@ -44,8 +44,9 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
ArchDefinePwr8 = 1 << 12,
ArchDefinePwr9 = 1 << 13,
ArchDefinePwr10 = 1 << 14,
- ArchDefineFuture = 1 << 15,
- ArchDefineA2 = 1 << 16,
+ ArchDefinePwr11 = 1 << 15,
+ ArchDefineFuture = 1 << 16,
+ ArchDefineA2 = 1 << 17,
ArchDefineE500 = 1 << 18
} ArchDefineTypes;
@@ -166,11 +167,16 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
ArchDefinePwr7 | ArchDefinePwr6 | ArchDefinePwr5x |
ArchDefinePwr5 | ArchDefinePwr4 | ArchDefinePpcgr |
ArchDefinePpcsq)
+ .Cases("power11", "pwr11",
+ ArchDefinePwr11 | ArchDefinePwr10 | ArchDefinePwr9 |
+ ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 |
+ ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 |
+ ArchDefinePpcgr | ArchDefinePpcsq)
.Case("future",
- ArchDefineFuture | ArchDefinePwr10 | ArchDefinePwr9 |
- ArchDefinePwr8 | ArchDefinePwr7 | ArchDefinePwr6 |
- ArchDefinePwr5x | ArchDefinePwr5 | ArchDefinePwr4 |
- ArchDefinePpcgr | ArchDefinePpcsq)
+ ArchDefineFuture | ArchDefinePwr11 | ArchDefinePwr10 |
+ ArchDefinePwr9 | ArchDefinePwr8 | ArchDefinePwr7 |
+ ArchDefinePwr6 | ArchDefinePwr5x | ArchDefinePwr5 |
+ ArchDefinePwr4 | ArchDefinePpcgr | ArchDefinePpcsq)
.Cases("8548", "e500", ArchDefineE500)
.Default(ArchDefineNone);
}
@@ -192,6 +198,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo {
const std::vector<std::string> &FeaturesVec) const override;
void addP10SpecificFeatures(llvm::StringMap<bool> &Features) const;
+ void addP11SpecificFeatures(llvm::StringMap<bool> &Features) const;
void addFutureSpecificFeatures(llvm::StringMap<bool> &Features) const;
bool handleTargetFeatures(std::vector<std::string> &Features,
diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp
index 634c096523319..acd5757d6ea97 100644
--- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp
@@ -70,6 +70,7 @@ static std::string normalizeCPUName(StringRef CPUName, const llvm::Triple &T) {
.Case("power8", "pwr8")
.Case("power9", "pwr9")
.Case("power10", "pwr10")
+ .Case("power11", "pwr11")
.Case("future", "future")
.Case("powerpc", "ppc")
.Case("powerpc64", "ppc64")
@@ -103,6 +104,8 @@ const char *ppc::getPPCAsmModeForCPU(StringRef Name) {
.Case("power9", "-mpower9")
.Case("pwr10", "-mpower10")
.Case("power10", "-mpower10")
+ .Case("pwr11", "-mpower11")
+ .Case("power11", "-mpower11")
.Default("-many");
}
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index a5f9ffa21220a..4d6759dd81537 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -57,7 +57,7 @@
// RUN: not %clang_cc1 -triple powerpc--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix PPC
// PPC: error: unknown target CPU 'not-a-cpu'
-// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}}
+// PPC-NEXT: note: valid target CPU values are: generic, 440, 450, 601, 602, 603, 603e, 603ev, 604, 604e, 620, 630, g3, 7400, g4, 7450, g4+, 750, 8548, 970, g5, a2, e500, e500mc, e5500, power3, pwr3, power4, pwr4, power5, pwr5, power5x, pwr5x, power6, pwr6, power6x, pwr6x, power7, pwr7, power8, pwr8, power9, pwr9, power10, pwr10, power11, pwr11, powerpc, ppc, ppc32, powerpc64, ppc64, powerpc64le, ppc64le, future{{$}}
// RUN: not %clang_cc1 -triple mips--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix MIPS
// MIPS: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/Preprocessor/init-ppc64.c b/clang/test/Preprocessor/init-ppc64.c
index 42e5232824de7..a53397226b8d5 100644
--- a/clang/test/Preprocessor/init-ppc64.c
+++ b/clang/test/Preprocessor/init-ppc64.c
@@ -632,6 +632,25 @@
// PPCPOWER10:#define __PCREL__ 1
// PPCPOWER10-NOT:#define __ROP_PROTECT__ 1
//
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu pwr11 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER11 %s
+// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu power11 -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCPOWER11 %s
+//
+// PPCPOWER11:#define _ARCH_PPC 1
+// PPCPOWER11:#define _ARCH_PPC64 1
+// PPCPOWER11:#define _ARCH_PPCGR 1
+// PPCPOWER11:#define _ARCH_PPCSQ 1
+// PPCPOWER11:#define _ARCH_PWR10 1
+// PPCPOWER11:#define _ARCH_PWR11 1
+// PPCPOWER11:#define _ARCH_PWR4 1
+// PPCPOWER11:#define _ARCH_PWR5 1
+// PPCPOWER11:#define _ARCH_PWR5X 1
+// PPCPOWER11:#define _ARCH_PWR6 1
+// PPCPOWER11-NOT:#define _ARCH_PWR6X 1
+// PPCPOWER11:#define _ARCH_PWR7 1
+// PPCPOWER11:#define _ARCH_PWR8 1
+// PPCPOWER11:#define _ARCH_PWR9 1
+// PPCPOWER11-NOT:#define __ROP_PROTECT__ 1
+//
// RUN: %clang_cc1 -E -dM -ffreestanding -triple=powerpc64-none-none -target-cpu future -fno-signed-char < /dev/null | FileCheck -match-full-lines -check-prefix PPCFUTURE %s
//
// PPCFUTURE:#define _ARCH_PPC 1
@@ -639,6 +658,7 @@
// PPCFUTURE:#define _ARCH_PPCGR 1
// PPCFUTURE:#define _ARCH_PPCSQ 1
// PPCFUTURE:#define _ARCH_PWR10 1
+// PPCFUTURE:#define _ARCH_PWR11 1
// PPCFUTURE:#define _ARCH_PWR4 1
// PPCFUTURE:#define _ARCH_PWR5 1
// PPCFUTURE:#define _ARCH_PWR5X 1
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 84ef582c029d3..41d2ee328b8b8 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -52,6 +52,7 @@ def DirectivePwr7: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR7", "">;
def DirectivePwr8: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR8", "">;
def DirectivePwr9: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR9", "">;
def DirectivePwr10: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR10", "">;
+def DirectivePwr11: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR11", "">;
def DirectivePwrFuture
: SubtargetFeature<"", "CPUDirective", "PPC::DIR_PWR_FUTURE", "">;
@@ -467,13 +468,25 @@ def ProcessorFeatures {
list<SubtargetFeature> P10Features =
!listconcat(P10InheritableFeatures, P10SpecificFeatures);
- // Future
- // For future CPU we assume that all of the existing features from Power10
+ // Power11
+ // For P11 CPU we assume that all the existing features from Power10
// still exist with the exception of those we know are Power10 specific.
+ list<SubtargetFeature> P11AdditionalFeatures =
+ [DirectivePwr11];
+ list<SubtargetFeature> P11SpecificFeatures =
+ [];
+ list<SubtargetFeature> P11InheritableFeatures =
+ !listconcat(P10InheritableFeatures, P11AdditionalFeatures);
+ list<SubtargetFeature> P11Features =
+ !listconcat(P11InheritableFeatures, P11SpecificFeatures);
+
+ // Future
+ // For future CPU we assume that all of the existing features from Power11
+ // still exist with the exception of those we know are Power11 specific.
list<SubtargetFeature> FutureAdditionalFeatures = [FeatureISAFuture];
list<SubtargetFeature> FutureSpecificFeatures = [];
list<SubtargetFeature> FutureInheritableFeatures =
- !listconcat(P10InheritableFeatures, FutureAdditionalFeatures);
+ !listconcat(P11InheritableFeatures, FutureAdditionalFeatures);
list<SubtargetFeature> FutureFeatures =
!listconcat(FutureInheritableFeatures, FutureSpecificFeatures);
}
@@ -672,6 +685,8 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>;
+// No scheduler model yet.
+def : ProcessorModel<"pwr11", NoSchedModel, ProcessorFeatures.P11Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index a11ab93b8db3c..0520d75fda31e 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1469,6 +1469,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
case PPC::DIR_PWR10:
+ case PPC::DIR_PWR11:
case PPC::DIR_PWR_FUTURE:
setPrefLoopAlignment(Align(16));
setPrefFunctionAlignment(Align(16));
diff --git a/llvm/lib/Target/PowerPC/PPCSubtarget.h b/llvm/lib/Target/PowerPC/PPCSubtarget.h
index bf35f8ec151b1..2079dc0acc3cf 100644
--- a/llvm/lib/Target/PowerPC/PPCSubtarget.h
+++ b/llvm/lib/Target/PowerPC/PPCSubtarget.h
@@ -61,6 +61,7 @@ enum {
DIR_PWR8,
DIR_PWR9,
DIR_PWR10,
+ DIR_PWR11,
DIR_PWR_FUTURE,
DIR_64
};
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 3fa35efc2d159..b7bdbeb535d52 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -504,7 +504,7 @@ unsigned PPCTTIImpl::getCacheLineSize() const {
// Assume that Future CPU has the same cache line size as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
- Directive == PPC::DIR_PWR_FUTURE)
+ Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)
return 128;
// On other processors return a default of 64 bytes.
@@ -538,7 +538,7 @@ unsigned PPCTTIImpl::getMaxInterleaveFactor(ElementCount VF) {
// Assume that future is the same as the others.
if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8 ||
Directive == PPC::DIR_PWR9 || Directive == PPC::DIR_PWR10 ||
- Directive == PPC::DIR_PWR_FUTURE)
+ Directive == PPC::DIR_PWR11 || Directive == PPC::DIR_PWR_FUTURE)
return 12;
// For most things, modern systems have two execution units (and
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 82c1731f58f0a..5d85386e5b359 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -150,6 +150,7 @@ StringRef sys::detail::getHostCPUNameForPowerPC(StringRef ProcCpuinfoContent) {
.Case("POWER8NVL", "pwr8")
.Case("POWER9", "pwr9")
.Case("POWER10", "pwr10")
+ .Case("POWER11", "pwr11")
// FIXME: If we get a simulator or machine with the capabilities of
// mcpu=future, we should revisit this and add the name reported by the
// simulator/machine.
>From e55375b12aa1da16d27d8d888d707669041303f6 Mon Sep 17 00:00:00 2001
From: Alison Zhang <alisonzhang at ibm.com>
Date: Fri, 19 Jul 2024 10:19:09 -0500
Subject: [PATCH 2/7] Suggestions from reviewer
---
llvm/lib/Target/PowerPC/PPC.td | 3 +-
llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 +
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 3 +-
llvm/lib/Target/PowerPC/PPCInstrInfo.h | 24 +-
llvm/lib/TargetParser/Host.cpp | 6 +
llvm/test/CodeGen/PowerPC/check-cpu.ll | 6 +-
...{mma-acc-spill.ll => mma-acc-spill-p10.ll} | 0
.../test/CodeGen/PowerPC/mma-acc-spill-p11.ll | 122 ++++++
llvm/test/CodeGen/PowerPC/p11-constants.ll | 392 ++++++++++++++++++
llvm/unittests/TargetParser/Host.cpp | 1 +
10 files changed, 551 insertions(+), 8 deletions(-)
rename llvm/test/CodeGen/PowerPC/{mma-acc-spill.ll => mma-acc-spill-p10.ll} (100%)
create mode 100644 llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll
create mode 100644 llvm/test/CodeGen/PowerPC/p11-constants.ll
diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td
index 41d2ee328b8b8..da31a993b9c69 100644
--- a/llvm/lib/Target/PowerPC/PPC.td
+++ b/llvm/lib/Target/PowerPC/PPC.td
@@ -685,8 +685,7 @@ def : ProcessorModel<"pwr7", P7Model, ProcessorFeatures.P7Features>;
def : ProcessorModel<"pwr8", P8Model, ProcessorFeatures.P8Features>;
def : ProcessorModel<"pwr9", P9Model, ProcessorFeatures.P9Features>;
def : ProcessorModel<"pwr10", P10Model, ProcessorFeatures.P10Features>;
-// No scheduler model yet.
-def : ProcessorModel<"pwr11", NoSchedModel, ProcessorFeatures.P11Features>;
+def : ProcessorModel<"pwr11", P10Model, ProcessorFeatures.P11Features>;
// No scheduler model for future CPU.
def : ProcessorModel<"future", NoSchedModel,
ProcessorFeatures.FutureFeatures>;
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0520d75fda31e..258dc907fb2b0 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -16665,6 +16665,7 @@ Align PPCTargetLowering::getPrefLoopAlignment(MachineLoop *ML) const {
case PPC::DIR_PWR8:
case PPC::DIR_PWR9:
case PPC::DIR_PWR10:
+ case PPC::DIR_PWR11:
case PPC::DIR_PWR_FUTURE: {
if (!ML)
break;
@@ -18047,6 +18048,7 @@ SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
return true;
case PPC::DIR_PWR9:
case PPC::DIR_PWR10:
+ case PPC::DIR_PWR11:
case PPC::DIR_PWR_FUTURE:
// type mul add shl
// scalar 5 2 2
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2d3c520429f2a..c8d8f97231514 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3485,7 +3485,8 @@ unsigned PPCInstrInfo::getSpillTarget() const {
// With P10, we may need to spill paired vector registers or accumulator
// registers. MMA implies paired vectors, so we can just check that.
bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
- return Subtarget.isISAFuture() ? 3 : IsP10Variant ?
+ // P11 uses the P10 target.
+ return Subtarget.isISAFuture() ? 4 : IsP10Variant ?
2 : Subtarget.hasP9Vector() ?
1 : 0;
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 1e2687f92c61e..ea34b459251f4 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -124,6 +124,14 @@ enum PPCMachineCombinerPattern : unsigned {
PPC::RESTORE_UACC, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \
}
+#define Pwr11LoadOpcodes \
+ { \
+ PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
+ PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
+ PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \
+ PPC::RESTORE_UACC, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \
+ }
+
#define FutureLoadOpcodes \
{ \
PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
@@ -156,6 +164,14 @@ enum PPCMachineCombinerPattern : unsigned {
NoInstr, NoInstr, PPC::SPILL_QUADWORD \
}
+#define Pwr11StoreOpcodes \
+ { \
+ PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
+ PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \
+ NoInstr, NoInstr, PPC::SPILL_QUADWORD \
+ }
+
#define FutureStoreOpcodes \
{ \
PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
@@ -166,17 +182,17 @@ enum PPCMachineCombinerPattern : unsigned {
// Initialize arrays for load and store spill opcodes on supported subtargets.
#define StoreOpcodesForSpill \
- { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes, FutureStoreOpcodes }
+ { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes, Pwr11StoreOpcodes, FutureStoreOpcodes }
#define LoadOpcodesForSpill \
- { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes, FutureLoadOpcodes }
+ { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes, Pwr11LoadOpcodes, FutureLoadOpcodes }
class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
PPCSubtarget &Subtarget;
const PPCRegisterInfo RI;
- const unsigned StoreSpillOpcodesArray[4][SOK_LastOpcodeSpill] =
+ const unsigned StoreSpillOpcodesArray[5][SOK_LastOpcodeSpill] =
StoreOpcodesForSpill;
- const unsigned LoadSpillOpcodesArray[4][SOK_LastOpcodeSpill] =
+ const unsigned LoadSpillOpcodesArray[5][SOK_LastOpcodeSpill] =
LoadOpcodesForSpill;
void StoreRegToStackSlot(MachineFunction &MF, unsigned SrcReg, bool isKill,
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 5d85386e5b359..db9183dafee22 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1550,6 +1550,12 @@ StringRef sys::getHostCPUName() {
case 0x40000:
#endif
return "pwr10";
+#ifdef POWER_11
+ case POWER_11:
+#else
+ case 0x80000:
+#endif
+ return "pwr11";
default:
return "generic";
}
diff --git a/llvm/test/CodeGen/PowerPC/check-cpu.ll b/llvm/test/CodeGen/PowerPC/check-cpu.ll
index e1a201427a410..1dc532cb428f4 100644
--- a/llvm/test/CodeGen/PowerPC/check-cpu.ll
+++ b/llvm/test/CodeGen/PowerPC/check-cpu.ll
@@ -3,6 +3,10 @@
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=future < %s 2>&1 | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 < %s 2>&1 | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 < %s 2>&1 | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -mcpu=pwr10 < %s 2>&1 | FileCheck %s
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
; RUN: -mcpu=pwr10 < %s 2>&1 | FileCheck %s
@@ -13,7 +17,7 @@
-; Test -mcpu=[pwr9|pwr10|future] is recognized on PowerPC.
+; Test -mcpu=[pwr9|pwr10|pwr11|future] is recognized on PowerPC.
; CHECK-NOT: is not a recognized processor for this target
; CHECK: .text
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill-p10.ll
similarity index 100%
rename from llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
rename to llvm/test/CodeGen/PowerPC/mma-acc-spill-p10.ll
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll
new file mode 100644
index 0000000000000..8bf609a84aa02
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; ; This test is a copy of mma-acc-spill.ll except that it uses mcpu=pwr11.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
+declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
+declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
+declare void @foo()
+define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, ptr %ptr) {
+; CHECK-LABEL: intrinsics1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr r0
+; CHECK-NEXT: std r0, 16(r1)
+; CHECK-NEXT: stdu r1, -176(r1)
+; CHECK-NEXT: .cfi_def_cfa_offset 176
+; CHECK-NEXT: .cfi_offset lr, 16
+; CHECK-NEXT: .cfi_offset r30, -16
+; CHECK-NEXT: .cfi_offset v28, -80
+; CHECK-NEXT: .cfi_offset v29, -64
+; CHECK-NEXT: .cfi_offset v30, -48
+; CHECK-NEXT: .cfi_offset v31, -32
+; CHECK-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill
+; CHECK-NEXT: vmr v29, v3
+; CHECK-NEXT: vmr v28, v2
+; CHECK-NEXT: xxlor vs0, v28, v28
+; CHECK-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill
+; CHECK-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill
+; CHECK-NEXT: vmr v31, v5
+; CHECK-NEXT: vmr v30, v4
+; CHECK-NEXT: xxlor vs1, v29, v29
+; CHECK-NEXT: xxlor vs2, v30, v30
+; CHECK-NEXT: xxlor vs3, v31, v31
+; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill
+; CHECK-NEXT: ld r30, 272(r1)
+; CHECK-NEXT: xxmtacc acc0
+; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
+; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: stxvp vsp0, 64(r1)
+; CHECK-NEXT: stxvp vsp2, 32(r1)
+; CHECK-NEXT: bl foo at notoc
+; CHECK-NEXT: lxvp vsp0, 64(r1)
+; CHECK-NEXT: lxvp vsp2, 32(r1)
+; CHECK-NEXT: xxmtacc acc0
+; CHECK-NEXT: xvf16ger2pp acc0, v28, v30
+; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
+; CHECK-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload
+; CHECK-NEXT: xxmfacc acc0
+; CHECK-NEXT: stxv vs0, 48(r30)
+; CHECK-NEXT: stxv vs1, 32(r30)
+; CHECK-NEXT: stxv vs2, 16(r30)
+; CHECK-NEXT: stxv vs3, 0(r30)
+; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
+; CHECK-NEXT: addi r1, r1, 176
+; CHECK-NEXT: ld r0, 16(r1)
+; CHECK-NEXT: mtlr r0
+; CHECK-NEXT: blr
+;
+; CHECK-BE-LABEL: intrinsics1:
+; CHECK-BE: # %bb.0:
+; CHECK-BE-NEXT: mflr r0
+; CHECK-BE-NEXT: std r0, 16(r1)
+; CHECK-BE-NEXT: stdu r1, -256(r1)
+; CHECK-BE-NEXT: .cfi_def_cfa_offset 256
+; CHECK-BE-NEXT: .cfi_offset lr, 16
+; CHECK-BE-NEXT: .cfi_offset r30, -16
+; CHECK-BE-NEXT: .cfi_offset v28, -80
+; CHECK-BE-NEXT: .cfi_offset v29, -64
+; CHECK-BE-NEXT: .cfi_offset v30, -48
+; CHECK-BE-NEXT: .cfi_offset v31, -32
+; CHECK-BE-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: vmr v29, v3
+; CHECK-BE-NEXT: vmr v28, v2
+; CHECK-BE-NEXT: xxlor vs0, v28, v28
+; CHECK-BE-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill
+; CHECK-BE-NEXT: vmr v31, v5
+; CHECK-BE-NEXT: vmr v30, v4
+; CHECK-BE-NEXT: xxlor vs1, v29, v29
+; CHECK-BE-NEXT: xxlor vs2, v30, v30
+; CHECK-BE-NEXT: xxlor vs3, v31, v31
+; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill
+; CHECK-BE-NEXT: ld r30, 368(r1)
+; CHECK-BE-NEXT: xxmtacc acc0
+; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
+; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: stxvp vsp0, 112(r1)
+; CHECK-BE-NEXT: stxvp vsp2, 144(r1)
+; CHECK-BE-NEXT: bl foo
+; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
+; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
+; CHECK-BE-NEXT: xxmtacc acc0
+; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30
+; CHECK-BE-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
+; CHECK-BE-NEXT: xxmfacc acc0
+; CHECK-BE-NEXT: stxv vs1, 16(r30)
+; CHECK-BE-NEXT: stxv vs0, 0(r30)
+; CHECK-BE-NEXT: stxv vs3, 48(r30)
+; CHECK-BE-NEXT: stxv vs2, 32(r30)
+; CHECK-BE-NEXT: ld r30, 240(r1) # 8-byte Folded Reload
+; CHECK-BE-NEXT: addi r1, r1, 256
+; CHECK-BE-NEXT: ld r0, 16(r1)
+; CHECK-BE-NEXT: mtlr r0
+; CHECK-BE-NEXT: blr
+ %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4)
+ %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3)
+ tail call void @foo()
+ %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
+ store <512 x i1> %3, ptr %ptr, align 64
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/p11-constants.ll b/llvm/test/CodeGen/PowerPC/p11-constants.ll
new file mode 100644
index 0000000000000..f1ca6401c8791
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/p11-constants.ll
@@ -0,0 +1,392 @@
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK32
+
+; These test cases aim to test constant materialization using the pli instruction on Power11.
+
+define signext i32 @t_16BitsMinRequiring34Bits() {
+; CHECK-LABEL: t_16BitsMinRequiring34Bits:
+; CHECK: pli r3, 32768
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_16BitsMinRequiring34Bits:
+; CHECK32: pli r3, 32768
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 32768
+}
+
+define signext i32 @t_16Bits() {
+; CHECK-LABEL: t_16Bits:
+; CHECK: pli r3, 62004
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_16Bits:
+; CHECK32: pli r3, 62004
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 62004
+}
+
+define signext i32 @t_lt32gt16BitsNonShiftable() {
+; CHECK-LABEL: t_lt32gt16BitsNonShiftable:
+; CHECK: pli r3, 1193046
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_lt32gt16BitsNonShiftable:
+; CHECK32: pli r3, 1193046
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 1193046
+}
+
+define signext i32 @t_32Bits() {
+; CHECK-LABEL: t_32Bits:
+; CHECK: pli r3, -231451016
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_32Bits:
+; CHECK32: pli r3, -231451016
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 -231451016
+}
+
+define i64 @t_34BitsLargestPositive() {
+; CHECK-LABEL: t_34BitsLargestPositive:
+; CHECK: pli r3, 8589934591
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_34BitsLargestPositive:
+; CHECK32: li r3, 1
+; CHECK32-NEXT: li r4, -1
+; CHECK32-NEXT: blr
+
+entry:
+ ret i64 8589934591
+}
+
+define i64 @t_neg34Bits() {
+; CHECK-LABEL: t_neg34Bits:
+; CHECK: pli r3, -8284514696
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_neg34Bits:
+; CHECK32: li r3, -2
+; CHECK32-NEXT: pli r4, 305419896
+; CHECK32-NEXT: blr
+
+entry:
+ ret i64 -8284514696
+}
+
+define signext i32 @t_16BitsMinRequiring34BitsMinusOne() {
+; CHECK-LABEL: t_16BitsMinRequiring34BitsMinusOne:
+; CHECK: li r3, 32767
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_16BitsMinRequiring34BitsMinusOne:
+; CHECK32: li r3, 32767
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 32767
+}
+
+define signext i32 @t_lt16Bits() {
+; CHECK-LABEL: t_lt16Bits:
+; CHECK: li r3, 291
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_lt16Bits:
+; CHECK32: li r3, 291
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 291
+}
+
+define signext i32 @t_neglt16Bits() {
+; CHECK-LABEL: t_neglt16Bits:
+; CHECK: li r3, -3805
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_neglt16Bits:
+; CHECK32: li r3, -3805
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 -3805
+}
+
+define signext i32 @t_neg16Bits() {
+; CHECK-LABEL: t_neg16Bits:
+; CHECK: li r3, -32204
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_neg16Bits:
+; CHECK32: li r3, -32204
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 -32204
+}
+
+define signext i32 @t_lt32gt16BitsShiftable() {
+; CHECK-LABEL: t_lt32gt16BitsShiftable:
+; CHECK: lis r3, 18
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_lt32gt16BitsShiftable:
+; CHECK32: lis r3, 18
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 1179648
+}
+
+define signext i32 @t_32gt16BitsShiftable() {
+; CHECK-LABEL: t_32gt16BitsShiftable:
+; CHECK: lis r3, -3532
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_32gt16BitsShiftable:
+; CHECK32: lis r3, -3532
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 -231473152
+}
+
+define signext i32 @t_32BitsZero() {
+; CHECK-LABEL: t_32BitsZero:
+; CHECK: li r3, 0
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_32BitsZero:
+; CHECK32: li r3, 0
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 0
+}
+
+define signext i32 @t_32BitsAllOnes() {
+; CHECK-LABEL: t_32BitsAllOnes:
+; CHECK: li r3, -1
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_32BitsAllOnes:
+; CHECK32: li r3, -1
+; CHECK32-NEXT: blr
+
+entry:
+ ret i32 -1
+}
+
+define i64 @t_34BitsLargestPositivePlus() {
+; CHECK-LABEL: t_34BitsLargestPositivePlus:
+; CHECK: li r3, 1
+; CHECK-NEXT: rldic r3, r3, 33, 30
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_34BitsLargestPositivePlus:
+; CHECK32: li r3, 2
+; CHECK32-NEXT: li r4, 0
+; CHECK32-NEXT: blr
+
+entry:
+ ret i64 8589934592
+}
+
+define i64 @t_34Bits() {
+; CHECK-LABEL: t_34Bits:
+; CHECK: pli r3, 1648790223
+; CHECK-NEXT: rldic r3, r3, 3, 30
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_34Bits:
+; CHECK32: li r3, 3
+; CHECK32-NEXT: pli r4, 305419896
+; CHECK32-NEXT: blr
+
+entry:
+ ret i64 13190321784
+}
+
+define i64 @t_35Bits() {
+; CHECK-LABEL: t_35Bits:
+; CHECK: pli r3, 4266035919
+; CHECK-NEXT: rldic r3, r3, 3, 29
+; CHECK-NEXT: blr
+; CHECK32-LABEL: t_35Bits:
+; CHECK32: li r3, 7
+; CHECK32-NEXT: pli r4, -231451016
+; CHECK32-NEXT: blr
+
+entry:
+ ret i64 34128287352
+}
+
+; (Value >> Shift) can be expressed in 34 bits
+define i64 @t_Shift() {
+; CHECK-LABEL: t_Shift:
+; CHECK: pli r3, 8522759166
+; CHECK-NEXT: rotldi r3, r3, 48
+; CHECK-NEXT: blr
+
+entry:
+ ; 0xFBFE00000001FBFE
+ ret i64 18157950747604548606
+}
+
+; Leading Zeros + Following Ones + Trailing Zeros > 30
+define i64 @t_LZFOTZ() {
+; CHECK-LABEL: t_LZFOTZ:
+; CHECK: pli r3, -349233
+; CHECK-NEXT: rldic r3, r3, 4, 12
+; CHECK-NEXT: blr
+
+entry:
+ ; 0x000FFFFFFFAABCF0
+ ret i64 4503599621782768
+}
+
+; Leading Zeros + Trailing Ones > 30
+define i64 @t_LZTO() {
+; CHECK-LABEL: t_LZTO:
+; CHECK: pli r3, -2684406441
+; CHECK-NEXT: rldicl r3, r3, 11, 19
+; CHECK-NEXT: blr
+entry:
+ ; 0x00001AFFF9AABFFF
+ ret i64 29686707699711
+}
+
+; Leading Zeros + Trailing Ones + Following Zeros > 30
+define i64 @t_LZTOFO() {
+; CHECK-LABEL: t_LZTOFO:
+; CHECK: pli r3, -5720033968
+; CHECK-NEXT: rldicl r3, r3, 11, 12
+; CHECK-NEXT: blr
+entry:
+ ; 0x000FF55879AA87FF
+ ret i64 4491884997806079
+}
+
+; Requires full expansion
+define i64 @t_Full64Bits1() {
+; CHECK-LABEL: t_Full64Bits1:
+; CHECK: pli r4, 2146500607
+; CHECK-NEXT: pli r3, 4043305214
+; CHECK-NEXT: rldimi r3, r4, 32, 0
+; CHECK-NEXT: blr
+entry:
+ ; 0x7FF0FFFFF0FFF0FE
+ ret i64 9219149911952453886
+}
+
+; Requires full expansion
+define i64 @t_Ful64Bits2() {
+; CHECK-LABEL: t_Ful64Bits2:
+; CHECK: pli r4, 4042326015
+; CHECK-NEXT: pli r3, 4043305214
+; CHECK-NEXT: rldimi r3, r4, 32, 0
+; CHECK-NEXT: blr
+entry:
+ ; 0xF0F0FFFFF0FFF0FE
+ ret i64 17361658038238310654
+}
+
+; A splat of 32 bits: 32 Bits Low == 32 Bits High
+define i64 @t_Splat32Bits() {
+; CHECK-LABEL: t_Splat32Bits:
+; CHECK: pli r3, 262916796
+; CHECK-NEXT: rldimi r3, r3, 32, 0
+; CHECK-NEXT: blr
+entry:
+ ; 0x0FABCABC0FABCABC
+ ret i64 1129219040652020412
+}
+
+; Producing `pli` when the constant fits within 34-bits and the constant
+; is being produced in other transformations (such as complex bit permutations).
+define i64 @t_34Bits_Complex(i64 %a, i64 %b) {
+; CHECK-LABEL: t_34Bits_Complex:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: rotldi r4, r4, 30
+; CHECK-NEXT: rldimi r3, r4, 34, 31
+; CHECK-NEXT: pli r4, -268435457
+; CHECK-NEXT: and r3, r3, r4
+; CHECK-NEXT: blr
+;
+; CHECK32-LABEL: t_34Bits_Complex:
+; CHECK32: # %bb.0: # %entry
+; CHECK32-NEXT: rlwinm r4, r6, 0, 4, 2
+; CHECK32-NEXT: rlwimi r3, r5, 0, 31, 29
+; CHECK32-NEXT: blr
+entry:
+ %and = and i64 %a, 8589934592
+ %and1 = and i64 %b, -8858370049
+ %or = or i64 %and1, %and
+ ret i64 %or
+}
+
+; The load immediates resulting from phi-nodes are needed to test whether
+; li/lis is preferred to pli by the instruction selector.
+define dso_local void @t_phiNode() {
+; CHECK-LABEL: t_phiNode:
+; CHECK: lis r6, 18
+; CHECK-NEXT: li r5, 291
+; CHECK-NEXT: li r4, 0
+; CHECK-NEXT: cmpwi r3, 1
+; CHECK-NEXT: li r3, -1
+; CHECK: pli r6, 2147483647
+; CHECK-NEXT: pli r5, 1193046
+; CHECK-NEXT: pli r4, 32768
+; CHECK-NEXT: pli r3, -231451016
+; CHECK32-LABEL: t_phiNode:
+; CHECK32: lis r6, 18
+; CHECK32-NEXT: li r5, 291
+; CHECK32-NEXT: li r4, 0
+; CHECK32-NEXT: cmpwi r3, 1
+; CHECK32-NEXT: li r3, -1
+; CHECK32: pli r6, 2147483647
+; CHECK32-NEXT: pli r5, 1193046
+; CHECK32-NEXT: pli r4, 32768
+; CHECK32-NEXT: pli r3, -231451016
+
+entry:
+ br label %while.body
+
+while.body: ; preds = %if.else.i, %entry
+ br label %while.body.i
+
+while.body.i: ; preds = %sw.epilog.i, %while.body
+ %a.1.i = phi i32 [ %a.2.i, %sw.epilog.i ], [ -1, %while.body ]
+ %b.1.i = phi i32 [ %b.2.i, %sw.epilog.i ], [ 0, %while.body ]
+ %c.1.i = phi i32 [ %c.2.i, %sw.epilog.i ], [ 291, %while.body ]
+ %d.1.i = phi i32 [ %d.2.i, %sw.epilog.i ], [ 1179648, %while.body ]
+ %0 = load i8, ptr null, align 1
+ %cmp1.i = icmp eq i8 %0, 1
+ br i1 %cmp1.i, label %if.then.i, label %if.else.i
+
+if.then.i: ; preds = %while.body.i
+ switch i8 undef, label %sw.default.i [
+ i8 3, label %sw.epilog.i
+ i8 2, label %sw.bb1.i
+ ]
+
+sw.bb1.i: ; preds = %if.then.i
+ br label %sw.epilog.i
+
+sw.default.i: ; preds = %if.then.i
+ unreachable
+
+sw.epilog.i: ; preds = %sw.bb2.i, %sw.bb1.i, %if.then.i
+ %a.2.i = phi i32 [ -231451016, %sw.bb1.i ], [ %a.1.i, %if.then.i ]
+ %b.2.i = phi i32 [ 32768, %sw.bb1.i ], [ %b.1.i, %if.then.i ]
+ %c.2.i = phi i32 [ 1193046, %sw.bb1.i ], [ %c.1.i, %if.then.i ]
+ %d.2.i = phi i32 [ 2147483647, %sw.bb1.i ], [ %d.1.i, %if.then.i ]
+ br label %while.body.i
+
+if.else.i: ; preds = %while.body.i
+ call void @func2(i32 signext %a.1.i, i32 signext %b.1.i, i32 signext %c.1.i, i32 signext %d.1.i)
+ br label %while.body
+}
+
+declare void @func2(i32, i32, i32, i32)
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index 61921a99e1711..c85be8476c5b6 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -536,6 +536,7 @@ TEST(HostTest, AIXHostCPUDetect) {
.Case("POWER 8\n", "pwr8")
.Case("POWER 9\n", "pwr9")
.Case("POWER 10\n", "pwr10")
+ .Case("POWER 11\n", "pwr11")
.Default("unknown");
StringRef HostCPU = sys::getHostCPUName();
>From d1bc0e7a4680fc30638889c0a328ed44b833ac84 Mon Sep 17 00:00:00 2001
From: Alison Zhang <alisonzhang at ibm.com>
Date: Fri, 19 Jul 2024 10:26:18 -0500
Subject: [PATCH 3/7] Formatting
---
llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 7 ++++---
llvm/unittests/TargetParser/Host.cpp | 2 +-
2 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index c8d8f97231514..9f7755a30c365 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -3486,9 +3486,10 @@ unsigned PPCInstrInfo::getSpillTarget() const {
// registers. MMA implies paired vectors, so we can just check that.
bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();
// P11 uses the P10 target.
- return Subtarget.isISAFuture() ? 4 : IsP10Variant ?
- 2 : Subtarget.hasP9Vector() ?
- 1 : 0;
+ return Subtarget.isISAFuture() ? 4
+ : IsP10Variant ? 2
+ : Subtarget.hasP9Vector() ? 1
+ : 0;
}
ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index c85be8476c5b6..f8dd1d3a60a00 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -536,7 +536,7 @@ TEST(HostTest, AIXHostCPUDetect) {
.Case("POWER 8\n", "pwr8")
.Case("POWER 9\n", "pwr9")
.Case("POWER 10\n", "pwr10")
- .Case("POWER 11\n", "pwr11")
+ .Case("POWER 11\n", "pwr11")
.Default("unknown");
StringRef HostCPU = sys::getHostCPUName();
>From bb69361f0a2448de492c2469036b973901356899 Mon Sep 17 00:00:00 2001
From: Alison Zhang <alisonzhang at ibm.com>
Date: Fri, 19 Jul 2024 10:30:56 -0500
Subject: [PATCH 4/7] Formatting again
---
llvm/lib/Target/PowerPC/PPCInstrInfo.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index ea34b459251f4..469c2e4721455 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -182,9 +182,11 @@ enum PPCMachineCombinerPattern : unsigned {
// Initialize arrays for load and store spill opcodes on supported subtargets.
#define StoreOpcodesForSpill \
- { Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes, Pwr11StoreOpcodes, FutureStoreOpcodes }
+ {Pwr8StoreOpcodes, Pwr9StoreOpcodes, Pwr10StoreOpcodes, Pwr11StoreOpcodes, \
+ FutureStoreOpcodes}
#define LoadOpcodesForSpill \
- { Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes, Pwr11LoadOpcodes, FutureLoadOpcodes }
+ {Pwr8LoadOpcodes, Pwr9LoadOpcodes, Pwr10LoadOpcodes, Pwr11LoadOpcodes, \
+ FutureLoadOpcodes}
class PPCSubtarget;
class PPCInstrInfo : public PPCGenInstrInfo {
>From 99fc290e8f7d6f20a26f427b0276f09e6b6942eb Mon Sep 17 00:00:00 2001
From: Alison Zhang <alisonzhang at ibm.com>
Date: Fri, 19 Jul 2024 15:02:21 -0500
Subject: [PATCH 5/7] Add run lines to existing test
---
.../test/CodeGen/PowerPC/mma-acc-spill-p11.ll | 122 ------------------
...{mma-acc-spill-p10.ll => mma-acc-spill.ll} | 7 +
2 files changed, 7 insertions(+), 122 deletions(-)
delete mode 100644 llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll
rename llvm/test/CodeGen/PowerPC/{mma-acc-spill-p10.ll => mma-acc-spill.ll} (92%)
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll
deleted file mode 100644
index 8bf609a84aa02..0000000000000
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill-p11.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; ; This test is a copy of mma-acc-spill.ll except that it uses mcpu=pwr11.
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
-; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
-; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
-; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
-
-declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
-declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
-declare void @foo()
-define void @intrinsics1(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4, ptr %ptr) {
-; CHECK-LABEL: intrinsics1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: mflr r0
-; CHECK-NEXT: std r0, 16(r1)
-; CHECK-NEXT: stdu r1, -176(r1)
-; CHECK-NEXT: .cfi_def_cfa_offset 176
-; CHECK-NEXT: .cfi_offset lr, 16
-; CHECK-NEXT: .cfi_offset r30, -16
-; CHECK-NEXT: .cfi_offset v28, -80
-; CHECK-NEXT: .cfi_offset v29, -64
-; CHECK-NEXT: .cfi_offset v30, -48
-; CHECK-NEXT: .cfi_offset v31, -32
-; CHECK-NEXT: stxv v28, 96(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v29, 112(r1) # 16-byte Folded Spill
-; CHECK-NEXT: vmr v29, v3
-; CHECK-NEXT: vmr v28, v2
-; CHECK-NEXT: xxlor vs0, v28, v28
-; CHECK-NEXT: stxv v30, 128(r1) # 16-byte Folded Spill
-; CHECK-NEXT: stxv v31, 144(r1) # 16-byte Folded Spill
-; CHECK-NEXT: vmr v31, v5
-; CHECK-NEXT: vmr v30, v4
-; CHECK-NEXT: xxlor vs1, v29, v29
-; CHECK-NEXT: xxlor vs2, v30, v30
-; CHECK-NEXT: xxlor vs3, v31, v31
-; CHECK-NEXT: std r30, 160(r1) # 8-byte Folded Spill
-; CHECK-NEXT: ld r30, 272(r1)
-; CHECK-NEXT: xxmtacc acc0
-; CHECK-NEXT: xvf16ger2pp acc0, v2, v4
-; CHECK-NEXT: xxmfacc acc0
-; CHECK-NEXT: stxvp vsp0, 64(r1)
-; CHECK-NEXT: stxvp vsp2, 32(r1)
-; CHECK-NEXT: bl foo at notoc
-; CHECK-NEXT: lxvp vsp0, 64(r1)
-; CHECK-NEXT: lxvp vsp2, 32(r1)
-; CHECK-NEXT: xxmtacc acc0
-; CHECK-NEXT: xvf16ger2pp acc0, v28, v30
-; CHECK-NEXT: lxv v31, 144(r1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv v30, 128(r1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv v29, 112(r1) # 16-byte Folded Reload
-; CHECK-NEXT: lxv v28, 96(r1) # 16-byte Folded Reload
-; CHECK-NEXT: xxmfacc acc0
-; CHECK-NEXT: stxv vs0, 48(r30)
-; CHECK-NEXT: stxv vs1, 32(r30)
-; CHECK-NEXT: stxv vs2, 16(r30)
-; CHECK-NEXT: stxv vs3, 0(r30)
-; CHECK-NEXT: ld r30, 160(r1) # 8-byte Folded Reload
-; CHECK-NEXT: addi r1, r1, 176
-; CHECK-NEXT: ld r0, 16(r1)
-; CHECK-NEXT: mtlr r0
-; CHECK-NEXT: blr
-;
-; CHECK-BE-LABEL: intrinsics1:
-; CHECK-BE: # %bb.0:
-; CHECK-BE-NEXT: mflr r0
-; CHECK-BE-NEXT: std r0, 16(r1)
-; CHECK-BE-NEXT: stdu r1, -256(r1)
-; CHECK-BE-NEXT: .cfi_def_cfa_offset 256
-; CHECK-BE-NEXT: .cfi_offset lr, 16
-; CHECK-BE-NEXT: .cfi_offset r30, -16
-; CHECK-BE-NEXT: .cfi_offset v28, -80
-; CHECK-BE-NEXT: .cfi_offset v29, -64
-; CHECK-BE-NEXT: .cfi_offset v30, -48
-; CHECK-BE-NEXT: .cfi_offset v31, -32
-; CHECK-BE-NEXT: stxv v28, 176(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT: stxv v29, 192(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT: vmr v29, v3
-; CHECK-BE-NEXT: vmr v28, v2
-; CHECK-BE-NEXT: xxlor vs0, v28, v28
-; CHECK-BE-NEXT: stxv v30, 208(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT: stxv v31, 224(r1) # 16-byte Folded Spill
-; CHECK-BE-NEXT: vmr v31, v5
-; CHECK-BE-NEXT: vmr v30, v4
-; CHECK-BE-NEXT: xxlor vs1, v29, v29
-; CHECK-BE-NEXT: xxlor vs2, v30, v30
-; CHECK-BE-NEXT: xxlor vs3, v31, v31
-; CHECK-BE-NEXT: std r30, 240(r1) # 8-byte Folded Spill
-; CHECK-BE-NEXT: ld r30, 368(r1)
-; CHECK-BE-NEXT: xxmtacc acc0
-; CHECK-BE-NEXT: xvf16ger2pp acc0, v2, v4
-; CHECK-BE-NEXT: xxmfacc acc0
-; CHECK-BE-NEXT: stxvp vsp0, 112(r1)
-; CHECK-BE-NEXT: stxvp vsp2, 144(r1)
-; CHECK-BE-NEXT: bl foo
-; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: lxvp vsp0, 112(r1)
-; CHECK-BE-NEXT: lxvp vsp2, 144(r1)
-; CHECK-BE-NEXT: xxmtacc acc0
-; CHECK-BE-NEXT: xvf16ger2pp acc0, v28, v30
-; CHECK-BE-NEXT: lxv v31, 224(r1) # 16-byte Folded Reload
-; CHECK-BE-NEXT: lxv v30, 208(r1) # 16-byte Folded Reload
-; CHECK-BE-NEXT: lxv v29, 192(r1) # 16-byte Folded Reload
-; CHECK-BE-NEXT: lxv v28, 176(r1) # 16-byte Folded Reload
-; CHECK-BE-NEXT: xxmfacc acc0
-; CHECK-BE-NEXT: stxv vs1, 16(r30)
-; CHECK-BE-NEXT: stxv vs0, 0(r30)
-; CHECK-BE-NEXT: stxv vs3, 48(r30)
-; CHECK-BE-NEXT: stxv vs2, 32(r30)
-; CHECK-BE-NEXT: ld r30, 240(r1) # 8-byte Folded Reload
-; CHECK-BE-NEXT: addi r1, r1, 256
-; CHECK-BE-NEXT: ld r0, 16(r1)
-; CHECK-BE-NEXT: mtlr r0
-; CHECK-BE-NEXT: blr
- %1 = tail call <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8> %vc1, <16 x i8> %vc2, <16 x i8> %vc3, <16 x i8> %vc4)
- %2 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %1, <16 x i8> %vc1, <16 x i8> %vc3)
- tail call void @foo()
- %3 = tail call <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1> %2, <16 x i8> %vc1, <16 x i8> %vc3)
- store <512 x i1> %3, ptr %ptr, align 64
- ret void
-}
diff --git a/llvm/test/CodeGen/PowerPC/mma-acc-spill-p10.ll b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
similarity index 92%
rename from llvm/test/CodeGen/PowerPC/mma-acc-spill-p10.ll
rename to llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
index 8d03594fe1bfd..681f81d74794d 100644
--- a/llvm/test/CodeGen/PowerPC/mma-acc-spill-p10.ll
+++ b/llvm/test/CodeGen/PowerPC/mma-acc-spill.ll
@@ -6,6 +6,13 @@
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -disable-auto-paired-vec-st=false \
+; RUN: -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
+
declare <512 x i1> @llvm.ppc.mma.xvf16ger2pp(<512 x i1>, <16 x i8>, <16 x i8>)
declare <512 x i1> @llvm.ppc.mma.assemble.acc(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>)
declare void @foo()
>From ec02d7135e8aa97db82c757187f39023b88a6de0 Mon Sep 17 00:00:00 2001
From: Alison Zhang <alisonzhang at ibm.com>
Date: Fri, 19 Jul 2024 15:05:37 -0500
Subject: [PATCH 6/7] Move run lines to existing test again
---
...{p10-constants.ll => p10-p11-constants.ll} | 12 +-
llvm/test/CodeGen/PowerPC/p11-constants.ll | 392 ------------------
2 files changed, 11 insertions(+), 393 deletions(-)
rename llvm/test/CodeGen/PowerPC/{p10-constants.ll => p10-p11-constants.ll} (94%)
delete mode 100644 llvm/test/CodeGen/PowerPC/p11-constants.ll
diff --git a/llvm/test/CodeGen/PowerPC/p10-constants.ll b/llvm/test/CodeGen/PowerPC/p10-p11-constants.ll
similarity index 94%
rename from llvm/test/CodeGen/PowerPC/p10-constants.ll
rename to llvm/test/CodeGen/PowerPC/p10-p11-constants.ll
index 77472afd9c3d4..5f6a345bdd938 100644
--- a/llvm/test/CodeGen/PowerPC/p10-constants.ll
+++ b/llvm/test/CodeGen/PowerPC/p10-p11-constants.ll
@@ -8,7 +8,17 @@
; RUN: -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
; RUN: FileCheck %s --check-prefix=CHECK32
-; These test cases aim to test constant materialization using the pli instruction on Power10.
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \
+; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK32
+
+; These test cases aim to test constant materialization using the pli instruction on Power10 and Power11.
define signext i32 @t_16BitsMinRequiring34Bits() {
; CHECK-LABEL: t_16BitsMinRequiring34Bits:
diff --git a/llvm/test/CodeGen/PowerPC/p11-constants.ll b/llvm/test/CodeGen/PowerPC/p11-constants.ll
deleted file mode 100644
index f1ca6401c8791..0000000000000
--- a/llvm/test/CodeGen/PowerPC/p11-constants.ll
+++ /dev/null
@@ -1,392 +0,0 @@
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
-; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN: FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
-; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN: FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc-unknown-linux-gnu \
-; RUN: -mcpu=pwr11 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK32
-
-; These test cases aim to test constant materialization using the pli instruction on Power11.
-
-define signext i32 @t_16BitsMinRequiring34Bits() {
-; CHECK-LABEL: t_16BitsMinRequiring34Bits:
-; CHECK: pli r3, 32768
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_16BitsMinRequiring34Bits:
-; CHECK32: pli r3, 32768
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 32768
-}
-
-define signext i32 @t_16Bits() {
-; CHECK-LABEL: t_16Bits:
-; CHECK: pli r3, 62004
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_16Bits:
-; CHECK32: pli r3, 62004
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 62004
-}
-
-define signext i32 @t_lt32gt16BitsNonShiftable() {
-; CHECK-LABEL: t_lt32gt16BitsNonShiftable:
-; CHECK: pli r3, 1193046
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_lt32gt16BitsNonShiftable:
-; CHECK32: pli r3, 1193046
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 1193046
-}
-
-define signext i32 @t_32Bits() {
-; CHECK-LABEL: t_32Bits:
-; CHECK: pli r3, -231451016
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_32Bits:
-; CHECK32: pli r3, -231451016
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 -231451016
-}
-
-define i64 @t_34BitsLargestPositive() {
-; CHECK-LABEL: t_34BitsLargestPositive:
-; CHECK: pli r3, 8589934591
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_34BitsLargestPositive:
-; CHECK32: li r3, 1
-; CHECK32-NEXT: li r4, -1
-; CHECK32-NEXT: blr
-
-entry:
- ret i64 8589934591
-}
-
-define i64 @t_neg34Bits() {
-; CHECK-LABEL: t_neg34Bits:
-; CHECK: pli r3, -8284514696
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_neg34Bits:
-; CHECK32: li r3, -2
-; CHECK32-NEXT: pli r4, 305419896
-; CHECK32-NEXT: blr
-
-entry:
- ret i64 -8284514696
-}
-
-define signext i32 @t_16BitsMinRequiring34BitsMinusOne() {
-; CHECK-LABEL: t_16BitsMinRequiring34BitsMinusOne:
-; CHECK: li r3, 32767
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_16BitsMinRequiring34BitsMinusOne:
-; CHECK32: li r3, 32767
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 32767
-}
-
-define signext i32 @t_lt16Bits() {
-; CHECK-LABEL: t_lt16Bits:
-; CHECK: li r3, 291
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_lt16Bits:
-; CHECK32: li r3, 291
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 291
-}
-
-define signext i32 @t_neglt16Bits() {
-; CHECK-LABEL: t_neglt16Bits:
-; CHECK: li r3, -3805
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_neglt16Bits:
-; CHECK32: li r3, -3805
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 -3805
-}
-
-define signext i32 @t_neg16Bits() {
-; CHECK-LABEL: t_neg16Bits:
-; CHECK: li r3, -32204
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_neg16Bits:
-; CHECK32: li r3, -32204
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 -32204
-}
-
-define signext i32 @t_lt32gt16BitsShiftable() {
-; CHECK-LABEL: t_lt32gt16BitsShiftable:
-; CHECK: lis r3, 18
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_lt32gt16BitsShiftable:
-; CHECK32: lis r3, 18
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 1179648
-}
-
-define signext i32 @t_32gt16BitsShiftable() {
-; CHECK-LABEL: t_32gt16BitsShiftable:
-; CHECK: lis r3, -3532
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_32gt16BitsShiftable:
-; CHECK32: lis r3, -3532
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 -231473152
-}
-
-define signext i32 @t_32BitsZero() {
-; CHECK-LABEL: t_32BitsZero:
-; CHECK: li r3, 0
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_32BitsZero:
-; CHECK32: li r3, 0
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 0
-}
-
-define signext i32 @t_32BitsAllOnes() {
-; CHECK-LABEL: t_32BitsAllOnes:
-; CHECK: li r3, -1
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_32BitsAllOnes:
-; CHECK32: li r3, -1
-; CHECK32-NEXT: blr
-
-entry:
- ret i32 -1
-}
-
-define i64 @t_34BitsLargestPositivePlus() {
-; CHECK-LABEL: t_34BitsLargestPositivePlus:
-; CHECK: li r3, 1
-; CHECK-NEXT: rldic r3, r3, 33, 30
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_34BitsLargestPositivePlus:
-; CHECK32: li r3, 2
-; CHECK32-NEXT: li r4, 0
-; CHECK32-NEXT: blr
-
-entry:
- ret i64 8589934592
-}
-
-define i64 @t_34Bits() {
-; CHECK-LABEL: t_34Bits:
-; CHECK: pli r3, 1648790223
-; CHECK-NEXT: rldic r3, r3, 3, 30
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_34Bits:
-; CHECK32: li r3, 3
-; CHECK32-NEXT: pli r4, 305419896
-; CHECK32-NEXT: blr
-
-entry:
- ret i64 13190321784
-}
-
-define i64 @t_35Bits() {
-; CHECK-LABEL: t_35Bits:
-; CHECK: pli r3, 4266035919
-; CHECK-NEXT: rldic r3, r3, 3, 29
-; CHECK-NEXT: blr
-; CHECK32-LABEL: t_35Bits:
-; CHECK32: li r3, 7
-; CHECK32-NEXT: pli r4, -231451016
-; CHECK32-NEXT: blr
-
-entry:
- ret i64 34128287352
-}
-
-; (Value >> Shift) can be expressed in 34 bits
-define i64 @t_Shift() {
-; CHECK-LABEL: t_Shift:
-; CHECK: pli r3, 8522759166
-; CHECK-NEXT: rotldi r3, r3, 48
-; CHECK-NEXT: blr
-
-entry:
- ; 0xFBFE00000001FBFE
- ret i64 18157950747604548606
-}
-
-; Leading Zeros + Following Ones + Trailing Zeros > 30
-define i64 @t_LZFOTZ() {
-; CHECK-LABEL: t_LZFOTZ:
-; CHECK: pli r3, -349233
-; CHECK-NEXT: rldic r3, r3, 4, 12
-; CHECK-NEXT: blr
-
-entry:
- ; 0x000FFFFFFFAABCF0
- ret i64 4503599621782768
-}
-
-; Leading Zeros + Trailing Ones > 30
-define i64 @t_LZTO() {
-; CHECK-LABEL: t_LZTO:
-; CHECK: pli r3, -2684406441
-; CHECK-NEXT: rldicl r3, r3, 11, 19
-; CHECK-NEXT: blr
-entry:
- ; 0x00001AFFF9AABFFF
- ret i64 29686707699711
-}
-
-; Leading Zeros + Trailing Ones + Following Zeros > 30
-define i64 @t_LZTOFO() {
-; CHECK-LABEL: t_LZTOFO:
-; CHECK: pli r3, -5720033968
-; CHECK-NEXT: rldicl r3, r3, 11, 12
-; CHECK-NEXT: blr
-entry:
- ; 0x000FF55879AA87FF
- ret i64 4491884997806079
-}
-
-; Requires full expansion
-define i64 @t_Full64Bits1() {
-; CHECK-LABEL: t_Full64Bits1:
-; CHECK: pli r4, 2146500607
-; CHECK-NEXT: pli r3, 4043305214
-; CHECK-NEXT: rldimi r3, r4, 32, 0
-; CHECK-NEXT: blr
-entry:
- ; 0x7FF0FFFFF0FFF0FE
- ret i64 9219149911952453886
-}
-
-; Requires full expansion
-define i64 @t_Ful64Bits2() {
-; CHECK-LABEL: t_Ful64Bits2:
-; CHECK: pli r4, 4042326015
-; CHECK-NEXT: pli r3, 4043305214
-; CHECK-NEXT: rldimi r3, r4, 32, 0
-; CHECK-NEXT: blr
-entry:
- ; 0xF0F0FFFFF0FFF0FE
- ret i64 17361658038238310654
-}
-
-; A splat of 32 bits: 32 Bits Low == 32 Bits High
-define i64 @t_Splat32Bits() {
-; CHECK-LABEL: t_Splat32Bits:
-; CHECK: pli r3, 262916796
-; CHECK-NEXT: rldimi r3, r3, 32, 0
-; CHECK-NEXT: blr
-entry:
- ; 0x0FABCABC0FABCABC
- ret i64 1129219040652020412
-}
-
-; Producing `pli` when the constant fits within 34-bits and the constant
-; is being produced in other transformations (such as complex bit permutations).
-define i64 @t_34Bits_Complex(i64 %a, i64 %b) {
-; CHECK-LABEL: t_34Bits_Complex:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: rotldi r4, r4, 30
-; CHECK-NEXT: rldimi r3, r4, 34, 31
-; CHECK-NEXT: pli r4, -268435457
-; CHECK-NEXT: and r3, r3, r4
-; CHECK-NEXT: blr
-;
-; CHECK32-LABEL: t_34Bits_Complex:
-; CHECK32: # %bb.0: # %entry
-; CHECK32-NEXT: rlwinm r4, r6, 0, 4, 2
-; CHECK32-NEXT: rlwimi r3, r5, 0, 31, 29
-; CHECK32-NEXT: blr
-entry:
- %and = and i64 %a, 8589934592
- %and1 = and i64 %b, -8858370049
- %or = or i64 %and1, %and
- ret i64 %or
-}
-
-; The load immediates resulting from phi-nodes are needed to test whether
-; li/lis is preferred to pli by the instruction selector.
-define dso_local void @t_phiNode() {
-; CHECK-LABEL: t_phiNode:
-; CHECK: lis r6, 18
-; CHECK-NEXT: li r5, 291
-; CHECK-NEXT: li r4, 0
-; CHECK-NEXT: cmpwi r3, 1
-; CHECK-NEXT: li r3, -1
-; CHECK: pli r6, 2147483647
-; CHECK-NEXT: pli r5, 1193046
-; CHECK-NEXT: pli r4, 32768
-; CHECK-NEXT: pli r3, -231451016
-; CHECK32-LABEL: t_phiNode:
-; CHECK32: lis r6, 18
-; CHECK32-NEXT: li r5, 291
-; CHECK32-NEXT: li r4, 0
-; CHECK32-NEXT: cmpwi r3, 1
-; CHECK32-NEXT: li r3, -1
-; CHECK32: pli r6, 2147483647
-; CHECK32-NEXT: pli r5, 1193046
-; CHECK32-NEXT: pli r4, 32768
-; CHECK32-NEXT: pli r3, -231451016
-
-entry:
- br label %while.body
-
-while.body: ; preds = %if.else.i, %entry
- br label %while.body.i
-
-while.body.i: ; preds = %sw.epilog.i, %while.body
- %a.1.i = phi i32 [ %a.2.i, %sw.epilog.i ], [ -1, %while.body ]
- %b.1.i = phi i32 [ %b.2.i, %sw.epilog.i ], [ 0, %while.body ]
- %c.1.i = phi i32 [ %c.2.i, %sw.epilog.i ], [ 291, %while.body ]
- %d.1.i = phi i32 [ %d.2.i, %sw.epilog.i ], [ 1179648, %while.body ]
- %0 = load i8, ptr null, align 1
- %cmp1.i = icmp eq i8 %0, 1
- br i1 %cmp1.i, label %if.then.i, label %if.else.i
-
-if.then.i: ; preds = %while.body.i
- switch i8 undef, label %sw.default.i [
- i8 3, label %sw.epilog.i
- i8 2, label %sw.bb1.i
- ]
-
-sw.bb1.i: ; preds = %if.then.i
- br label %sw.epilog.i
-
-sw.default.i: ; preds = %if.then.i
- unreachable
-
-sw.epilog.i: ; preds = %sw.bb2.i, %sw.bb1.i, %if.then.i
- %a.2.i = phi i32 [ -231451016, %sw.bb1.i ], [ %a.1.i, %if.then.i ]
- %b.2.i = phi i32 [ 32768, %sw.bb1.i ], [ %b.1.i, %if.then.i ]
- %c.2.i = phi i32 [ 1193046, %sw.bb1.i ], [ %c.1.i, %if.then.i ]
- %d.2.i = phi i32 [ 2147483647, %sw.bb1.i ], [ %d.1.i, %if.then.i ]
- br label %while.body.i
-
-if.else.i: ; preds = %while.body.i
- call void @func2(i32 signext %a.1.i, i32 signext %b.1.i, i32 signext %c.1.i, i32 signext %d.1.i)
- br label %while.body
-}
-
-declare void @func2(i32, i32, i32, i32)
>From d216efa9fe38f5560d04c9f737252fa3cbb5f886 Mon Sep 17 00:00:00 2001
From: Alison Zhang <alisonzhang at ibm.com>
Date: Fri, 19 Jul 2024 15:10:17 -0500
Subject: [PATCH 7/7] Format
---
llvm/lib/Target/PowerPC/PPCInstrInfo.h | 35 +++++++++++++++++---------
1 file changed, 23 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 469c2e4721455..b7105b1cdb78d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -125,12 +125,23 @@ enum PPCMachineCombinerPattern : unsigned {
}
#define Pwr11LoadOpcodes \
- { \
- PPC::LWZ, PPC::LD, PPC::LFD, PPC::LFS, PPC::RESTORE_CR, \
- PPC::RESTORE_CRBIT, PPC::LVX, PPC::LXV, PPC::DFLOADf64, \
- PPC::DFLOADf32, PPC::SPILLTOVSR_LD, PPC::LXVP, PPC::RESTORE_ACC, \
- PPC::RESTORE_UACC, NoInstr, NoInstr, PPC::RESTORE_QUADWORD \
- }
+ {PPC::LWZ, \
+ PPC::LD, \
+ PPC::LFD, \
+ PPC::LFS, \
+ PPC::RESTORE_CR, \
+ PPC::RESTORE_CRBIT, \
+ PPC::LVX, \
+ PPC::LXV, \
+ PPC::DFLOADf64, \
+ PPC::DFLOADf32, \
+ PPC::SPILLTOVSR_LD, \
+ PPC::LXVP, \
+ PPC::RESTORE_ACC, \
+ PPC::RESTORE_UACC, \
+ NoInstr, \
+ NoInstr, \
+ PPC::RESTORE_QUADWORD}
#define FutureLoadOpcodes \
{ \
@@ -165,12 +176,12 @@ enum PPCMachineCombinerPattern : unsigned {
}
#define Pwr11StoreOpcodes \
- { \
- PPC::STW, PPC::STD, PPC::STFD, PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
- PPC::STVX, PPC::STXV, PPC::DFSTOREf64, PPC::DFSTOREf32, \
- PPC::SPILLTOVSR_ST, PPC::STXVP, PPC::SPILL_ACC, PPC::SPILL_UACC, \
- NoInstr, NoInstr, PPC::SPILL_QUADWORD \
- }
+ {PPC::STW, PPC::STD, PPC::STFD, \
+ PPC::STFS, PPC::SPILL_CR, PPC::SPILL_CRBIT, \
+ PPC::STVX, PPC::STXV, PPC::DFSTOREf64, \
+ PPC::DFSTOREf32, PPC::SPILLTOVSR_ST, PPC::STXVP, \
+ PPC::SPILL_ACC, PPC::SPILL_UACC, NoInstr, \
+ NoInstr, PPC::SPILL_QUADWORD}
#define FutureStoreOpcodes \
{ \
More information about the cfe-commits
mailing list