[llvm] r271555 - AArch64: Do not test for CPUs, use SubtargetFeatures
Matthias Braun via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 2 11:03:53 PDT 2016
Author: matze
Date: Thu Jun 2 13:03:53 2016
New Revision: 271555
URL: http://llvm.org/viewvc/llvm-project?rev=271555&view=rev
Log:
AArch64: Do not test for CPUs, use SubtargetFeatures
Testing for specific CPUs has a number of problems, better use subtarget
features:
- When some tweak is added for a specific CPU it is often desirable for
the next version of that CPU as well, yet we often forget to add it.
- It is hard to keep track of checks scattered around the target code;
Declaring all target specifics together with the CPU in the tablegen
file is a clear representation.
- Subtarget features can be tweaked from the command line.
To discourage people from using CPU checks in the future I removed the
isCortexXX(), isCyclone(), ... functions. I added an getProcFamily()
function for exceptional circumstances but made it clear in the comment
that usage is discouraged.
Reformat feature list in AArch64.td to have 1 feature per line in
alphabetical order to simplify merging and sorting for out of tree
tweaks.
No functional change intended.
Differential Revision: http://reviews.llvm.org/D20762
Modified:
llvm/trunk/lib/Target/AArch64/AArch64.td
llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Thu Jun 2 13:03:53 2016
@@ -58,6 +58,50 @@ def FeatureReserveX18 : SubtargetFeature
"Reserve X18, making it unavailable "
"as a GPR">;
+def FeatureMergeNarrowLd : SubtargetFeature<"merge-narrow-ld",
+ "MergeNarrowLoads", "true",
+ "Merge narrow load instructions">;
+
+def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
+ "Use alias analysis during codegen">;
+
+def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",
+ "true",
+ "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;
+
+def FeaturePredictableSelectIsExpensive : SubtargetFeature<
+ "predictable-select-expensive", "PredictableSelectIsExpensive", "true",
+ "Prefer likely predicted branches over selects">;
+
+def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",
+ "CustomAsCheapAsMove", "true",
+ "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
+
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
+ "UsePostRAScheduler", "true", "Schedule again after register allocation">;
+
+def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",
+ "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;
+
+def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
+ "AvoidQuadLdStPairs", "true",
+ "Do not form quad load/store pair operations">;
+
+def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
+ "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",
+ "true", "Use alternative pattern for sextload convert to f32">;
+
+def FeatureMacroOpFusion : SubtargetFeature<
+ "macroop-fusion", "HasMacroOpFusion", "true",
+ "CPU supports macro op fusion">;
+
+def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
+ "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",
+ "Disable latency scheduling heuristic">;
+
+def FeatureUseRSqrt : SubtargetFeature<
+ "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square root">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -94,57 +138,87 @@ include "AArch64SchedM1.td"
include "AArch64SchedKryo.td"
def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
- "Cortex-A35 ARM processors",
- [FeatureFPARMv8,
- FeatureNEON,
- FeatureCrypto,
+ "Cortex-A35 ARM processors", [
FeatureCRC,
- FeaturePerfMon]>;
+ FeatureCrypto,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeaturePerfMon
+ ]>;
def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
- "Cortex-A53 ARM processors",
- [FeatureFPARMv8,
- FeatureNEON,
- FeatureCrypto,
+ "Cortex-A53 ARM processors", [
+ FeatureBalanceFPOps,
FeatureCRC,
- FeaturePerfMon]>;
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeatureUseAA
+ ]>;
def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
- "Cortex-A57 ARM processors",
- [FeatureFPARMv8,
- FeatureNEON,
- FeatureCrypto,
+ "Cortex-A57 ARM processors", [
+ FeatureBalanceFPOps,
FeatureCRC,
- FeaturePerfMon]>;
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureMergeNarrowLd,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive
+ ]>;
def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
- "Cyclone",
- [FeatureFPARMv8,
- FeatureNEON,
+ "Cyclone", [
+ FeatureAlternateSExtLoadCVTF32Pattern,
FeatureCrypto,
+ FeatureDisableLatencySchedHeuristic,
+ FeatureFPARMv8,
+ FeatureMacroOpFusion,
+ FeatureNEON,
FeaturePerfMon,
- FeatureZCRegMove, FeatureZCZeroing]>;
+ FeatureSlowMisaligned128Store,
+ FeatureZCRegMove,
+ FeatureZCZeroing
+ ]>;
def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",
- "Samsung Exynos-M1 processors",
- [FeatureFPARMv8,
- FeatureNEON,
- FeatureCrypto,
+ "Samsung Exynos-M1 processors", [
+ FeatureAvoidQuadLdStPairs,
FeatureCRC,
- FeaturePerfMon]>;
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeatureUseRSqrt
+ ]>;
def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
- "Qualcomm Kryo processors",
- [FeatureFPARMv8,
- FeatureNEON,
- FeatureCrypto,
+ "Qualcomm Kryo processors", [
FeatureCRC,
- FeaturePerfMon]>;
-
-def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
- FeatureNEON,
- FeatureCRC,
- FeaturePerfMon]>;
+ FeatureCrypto,
+ FeatureCustomCheapAsMoveHandling,
+ FeatureFPARMv8,
+ FeatureMergeNarrowLd,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler,
+ FeaturePredictableSelectIsExpensive
+ ]>;
+
+def : ProcessorModel<"generic", NoSchedModel, [
+ FeatureCRC,
+ FeatureFPARMv8,
+ FeatureNEON,
+ FeaturePerfMon,
+ FeaturePostRAScheduler
+ ]>;
// FIXME: Cortex-A35 is currently modelled as a Cortex-A53
def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
Modified: llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp Thu Jun 2 13:03:53 2016
@@ -314,9 +314,7 @@ bool AArch64A57FPLoadBalancing::runOnMac
if (skipFunction(*F.getFunction()))
return false;
- // Don't do anything if this isn't an A53 or A57.
- if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
- F.getSubtarget<AArch64Subtarget>().isCortexA57()))
+ if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
return false;
bool Changed = false;
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Jun 2 13:03:53 2016
@@ -634,9 +634,7 @@ AArch64TargetLowering::AArch64TargetLowe
}
}
- // Prefer likely predicted branches to selects on out-of-order cores.
- if (Subtarget->isCortexA57() || Subtarget->isKryo())
- PredictableSelectIsExpensive = true;
+ PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
}
void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {
@@ -814,12 +812,9 @@ bool AArch64TargetLowering::allowsMisali
if (Subtarget->requiresStrictAlign())
return false;
- // FIXME: This is mostly true for Cyclone, but not necessarily others.
if (Fast) {
- // FIXME: Define an attribute for slow unaligned accesses instead of
- // relying on the CPU type as a proxy.
- // On Cyclone, unaligned 128-bit stores are slow.
- *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
+ // Some CPUs are fine with unaligned stores except for 128-bit ones.
+ *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||
// See comments in performSTORECombine() for more details about
// these conditions.
@@ -8792,9 +8787,7 @@ static SDValue split16BStores(SDNode *N,
// be included in TLI.allowsMisalignedMemoryAccesses(), and there should be
// a call to that function here.
- // Cyclone has bad performance on unaligned 16B stores when crossing line and
- // page boundaries. We want to split such stores.
- if (!Subtarget->isCyclone())
+ if (!Subtarget->isMisaligned128StoreSlow())
return SDValue();
// Don't split at -Oz.
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Jun 2 13:03:53 2016
@@ -544,8 +544,7 @@ static bool canBeExpandedToORR(const Mac
// FIXME: this implementation should be micro-architecture dependent, so a
// micro-architecture target hook should be introduced here in future.
bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
- if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53() &&
- !Subtarget.isExynosM1() && !Subtarget.isKryo())
+ if (!Subtarget.hasCustomCheapAsMoveHandling())
return MI->isAsCheapAsAMove();
unsigned Imm;
@@ -559,7 +558,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
case AArch64::ADDXri:
case AArch64::SUBWri:
case AArch64::SUBXri:
- return (Subtarget.isExynosM1() ||
+ return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
MI->getOperand(3).getImm() == 0);
// add/sub on register with shift
@@ -568,7 +567,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
case AArch64::SUBWrs:
case AArch64::SUBXrs:
Imm = MI->getOperand(3).getImm();
- return (Subtarget.isExynosM1() &&
+ return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
AArch64_AM::getArithShiftValue(Imm) < 4);
// logical ops on immediate
@@ -609,7 +608,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
case AArch64::ORRWrs:
case AArch64::ORRXrs:
Imm = MI->getOperand(3).getImm();
- return (Subtarget.isExynosM1() &&
+ return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
AArch64_AM::getShiftValue(Imm) < 4 &&
AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
@@ -1522,8 +1521,8 @@ bool AArch64InstrInfo::isCandidateToMerg
if (isLdStPairSuppressed(MI))
return false;
- // Do not pair quad ld/st for Exynos.
- if (Subtarget.isExynosM1()) {
+ // On some CPUs quad load/store pairs are slower than two single load/stores.
+ if (Subtarget.avoidQuadLdStPairs()) {
switch (MI->getOpcode()) {
default:
break;
@@ -1801,8 +1800,8 @@ bool AArch64InstrInfo::shouldClusterMemO
bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
MachineInstr *Second) const {
- if (Subtarget.isCyclone()) {
- // Cyclone can fuse CMN, CMP, TST followed by Bcc.
+ if (Subtarget.hasMacroOpFusion()) {
+ // Fuse CMN, CMP, TST followed by Bcc.
unsigned SecondOpcode = Second->getOpcode();
if (SecondOpcode == AArch64::Bcc) {
switch (First->getOpcode()) {
@@ -1817,7 +1816,7 @@ bool AArch64InstrInfo::shouldScheduleAdj
return true;
}
}
- // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
+ // Fuse ALU operations followed by CBZ/CBNZ.
if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||
SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
switch (First->getOpcode()) {
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Thu Jun 2 13:03:53 2016
@@ -34,7 +34,8 @@ def HasSPE : Predicate<"Subtar
def IsLE : Predicate<"Subtarget->isLittleEndian()">;
def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
-def IsCyclone : Predicate<"Subtarget->isCyclone()">;
+def UseAlternateSExtLoadCVTF32
+ : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
//===----------------------------------------------------------------------===//
// AArch64-specific DAG Nodes.
@@ -4957,7 +4958,8 @@ class SExtLoadi8CVTf32Pat<dag addrmode,
0),
dsub)),
0),
- ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+ ssub)))>,
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
(LDRBroW GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
@@ -5010,7 +5012,8 @@ class SExtLoadi16CVTf64Pat<dag addrmode,
0),
dsub)),
0),
- dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;
+ dsub)))>,
+ Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
(LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Thu Jun 2 13:03:53 2016
@@ -160,10 +160,6 @@ struct AArch64LoadStoreOpt : public Mach
// Find and promote load instructions which read directly from store.
bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
- // Check if converting two narrow loads into a single wider load with
- // bitfield extracts could be enabled.
- bool enableNarrowLdMerge(MachineFunction &Fn);
-
bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
bool runOnMachineFunction(MachineFunction &Fn) override;
@@ -1912,15 +1908,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(
return Modified;
}
-bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
- bool ProfitableArch = Subtarget->isCortexA57() || Subtarget->isKryo();
- // FIXME: The benefit from converting narrow loads into a wider load could be
- // microarchitectural as it assumes that a single load with two bitfield
- // extracts is cheaper than two narrow loads. Currently, this conversion is
- // enabled only in cortex-a57 on which performance benefits were verified.
- return ProfitableArch && !Subtarget->requiresStrictAlign();
-}
-
bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(*Fn.getFunction()))
return false;
@@ -1936,7 +1923,8 @@ bool AArch64LoadStoreOpt::runOnMachineFu
UsedRegs.resize(TRI->getNumRegs());
bool Modified = false;
- bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
+ bool enableNarrowLdOpt =
+ Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();
for (auto &MBB : Fn)
Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp Thu Jun 2 13:03:53 2016
@@ -44,9 +44,36 @@ AArch64Subtarget::initializeSubtargetDep
CPUString = "generic";
ParseSubtargetFeatures(CPUString, FS);
+ initializeProperties();
+
return *this;
}
+void AArch64Subtarget::initializeProperties() {
+ // Initialize CPU specific properties. We should add a tablegen feature for
+ // this in the future so we can specify it together with the subtarget
+ // features.
+ switch (ARMProcFamily) {
+ case Cyclone:
+ CacheLineSize = 64;
+ PrefetchDistance = 280;
+ MinPrefetchStride = 2048;
+ MaxPrefetchIterationsAhead = 3;
+ break;
+ case CortexA57:
+ MaxInterleaveFactor = 4;
+ break;
+ case Kryo:
+ MaxInterleaveFactor = 4;
+ VectorInsertExtractBaseCost = 2;
+ break;
+ case Others: break;
+ case CortexA35: break;
+ case CortexA53: break;
+ case ExynosM1: break;
+ }
+}
+
AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS,
const TargetMachine &TM, bool LittleEndian)
@@ -110,8 +137,7 @@ void AArch64Subtarget::overrideSchedPoli
// Enabling or Disabling the latency heuristic is a close call: It seems to
// help nearly no benchmark on out-of-order architectures, on the other hand
// it regresses register pressure on a few benchmarking.
- if (isCyclone())
- Policy.DisableLatencyHeuristic = true;
+ Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
}
bool AArch64Subtarget::enableEarlyIfConversion() const {
@@ -133,8 +159,5 @@ bool AArch64Subtarget::supportsAddressTo
std::unique_ptr<PBQPRAConstraint>
AArch64Subtarget::getCustomPBQPConstraints() const {
- if (!isCortexA57())
- return nullptr;
-
- return llvm::make_unique<A57ChainingConstraint>();
+ return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
}
Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Thu Jun 2 13:03:53 2016
@@ -33,8 +33,8 @@ class StringRef;
class Triple;
class AArch64Subtarget : public AArch64GenSubtargetInfo {
-protected:
- enum ARMProcFamilyEnum {
+public:
+ enum ARMProcFamilyEnum : uint8_t {
Others,
CortexA35,
CortexA53,
@@ -44,6 +44,7 @@ protected:
Kryo
};
+protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily = Others;
@@ -66,6 +67,24 @@ protected:
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
+ bool MergeNarrowLoads = false;
+ bool UseAA = false;
+ bool PredictableSelectIsExpensive = false;
+ bool BalanceFPOps = false;
+ bool CustomAsCheapAsMove = false;
+ bool UsePostRAScheduler = false;
+ bool Misaligned128StoreIsSlow = false;
+ bool AvoidQuadLdStPairs = false;
+ bool UseAlternateSExtLoadCVTF32Pattern = false;
+ bool HasMacroOpFusion = false;
+ bool DisableLatencySchedHeuristic = false;
+ bool UseRSqrt = false;
+ uint8_t MaxInterleaveFactor = 2;
+ uint8_t VectorInsertExtractBaseCost = 3;
+ uint16_t CacheLineSize = 0;
+ uint16_t PrefetchDistance = 0;
+ uint16_t MinPrefetchStride = 1;
+ unsigned MaxPrefetchIterationsAhead = UINT_MAX;
// ReserveX18 - X18 is not available as a general purpose register.
bool ReserveX18;
@@ -93,6 +112,9 @@ private:
/// subtarget initialization.
AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);
+ /// Initialize properties based on the selected processor family.
+ void initializeProperties();
+
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
@@ -123,7 +145,15 @@ public:
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override {
- return isGeneric() || isCortexA53() || isCortexA57() || isKryo();
+ return UsePostRAScheduler;
+ }
+
+ /// Returns ARM processor family.
+ /// Avoid this function! CPU specifics should be kept local to this class
+ /// and preferably modeled with SubtargetFeatures or properties in
+ /// initializeProperties().
+ ARMProcFamilyEnum getProcFamily() const {
+ return ARMProcFamily;
}
bool hasV8_1aOps() const { return HasV8_1aOps; }
@@ -140,6 +170,30 @@ public:
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasCRC() const { return HasCRC; }
+ bool mergeNarrowLoads() const { return MergeNarrowLoads; }
+ bool balanceFPOps() const { return BalanceFPOps; }
+ bool predictableSelectIsExpensive() const {
+ return PredictableSelectIsExpensive;
+ }
+ bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
+ bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
+ bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }
+ bool useAlternateSExtLoadCVTF32Pattern() const {
+ return UseAlternateSExtLoadCVTF32Pattern;
+ }
+ bool hasMacroOpFusion() const { return HasMacroOpFusion; }
+ bool useRSqrt() const { return UseRSqrt; }
+ unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
+ unsigned getVectorInsertExtractBaseCost() const {
+ return VectorInsertExtractBaseCost;
+ }
+ unsigned getCacheLineSize() const { return CacheLineSize; }
+ unsigned getPrefetchDistance() const { return PrefetchDistance; }
+ unsigned getMinPrefetchStride() const { return MinPrefetchStride; }
+ unsigned getMaxPrefetchIterationsAhead() const {
+ return MaxPrefetchIterationsAhead;
+ }
+
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;
@@ -160,14 +214,7 @@ public:
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
- bool isGeneric() const { return CPUString == "generic"; }
- bool isCyclone() const { return CPUString == "cyclone"; }
- bool isCortexA57() const { return CPUString == "cortex-a57"; }
- bool isCortexA53() const { return CPUString == "cortex-a53"; }
- bool isExynosM1() const { return CPUString == "exynos-m1"; }
- bool isKryo() const { return CPUString == "kryo"; }
-
- bool useAA() const override { return isCortexA53(); }
+ bool useAA() const override { return UseAA; }
/// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
/// that still makes it profitable to inline the call.
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Thu Jun 2 13:03:53 2016
@@ -147,8 +147,7 @@ static void initReciprocals(AArch64Targe
// (52 mantissa bits) are 2 and 3, respectively.
unsigned ExtraStepsF = 2,
ExtraStepsD = ExtraStepsF + 1;
- // FIXME: Enable x^-1/2 only for Exynos M1 at the moment.
- bool UseRsqrt = ST.isExynosM1();
+ bool UseRsqrt = ST.useRSqrt();
TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);
TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Thu Jun 2 13:03:53 2016
@@ -368,9 +368,7 @@ int AArch64TTIImpl::getVectorInstrCost(u
}
// All other insert/extracts cost this much.
- if (ST->isKryo())
- return 2;
- return 3;
+ return ST->getVectorInsertExtractBaseCost();
}
int AArch64TTIImpl::getArithmeticInstrCost(
@@ -529,9 +527,7 @@ int AArch64TTIImpl::getCostOfKeepingLive
}
unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
- if (ST->isCortexA57() || ST->isKryo())
- return 4;
- return 2;
+ return ST->getMaxInterleaveFactor();
}
void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
@@ -630,28 +626,17 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(
}
unsigned AArch64TTIImpl::getCacheLineSize() {
- if (ST->isCyclone())
- return 64;
- return BaseT::getCacheLineSize();
+ return ST->getCacheLineSize();
}
unsigned AArch64TTIImpl::getPrefetchDistance() {
- if (ST->isCyclone())
- return 280;
- return BaseT::getPrefetchDistance();
+ return ST->getPrefetchDistance();
}
unsigned AArch64TTIImpl::getMinPrefetchStride() {
- if (ST->isCyclone())
- // The HW prefetcher handles accesses with strides up to 2KB.
- return 2048;
- return BaseT::getMinPrefetchStride();
+ return ST->getMinPrefetchStride();
}
unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
- if (ST->isCyclone())
- // Be conservative for now and don't prefetch ahead too much since the loop
- // may terminate early.
- return 3;
- return BaseT::getMaxPrefetchIterationsAhead();
+ return ST->getMaxPrefetchIterationsAhead();
}
More information about the llvm-commits
mailing list