[llvm] r271555 - AArch64: Do not test for CPUs, use SubtargetFeatures
Eric Christopher via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 2 13:08:16 PDT 2016
Looks much nicer now, thanks!
-eric
On Thu, Jun 2, 2016 at 11:10 AM Matthias Braun via llvm-commits <
llvm-commits at lists.llvm.org> wrote:
> Author: matze
> Date: Thu Jun 2 13:03:53 2016
> New Revision: 271555
>
> URL: http://llvm.org/viewvc/llvm-project?rev=271555&view=rev
> Log:
> AArch64: Do not test for CPUs, use SubtargetFeatures
>
> Testing for specific CPUs has a number of problems, better use subtarget
> features:
> - When some tweak is added for a specific CPU it is often desirable for
> the next version of that CPU as well, yet we often forget to add it.
> - It is hard to keep track of checks scattered around the target code;
> Declaring all target specifics together with the CPU in the tablegen
> file is a clear representation.
> - Subtarget features can be tweaked from the command line.
>
> To discourage people from using CPU checks in the future I removed the
> isCortexXX(), isCyclone(), ... functions. I added an getProcFamily()
> function for exceptional circumstances but made it clear in the comment
> that usage is discouraged.
>
> Reformat feature list in AArch64.td to have 1 feature per line in
> alphabetical order to simplify merging and sorting for out of tree
> tweaks.
>
> No functional change intended.
>
> Differential Revision: http://reviews.llvm.org/D20762
>
> Modified:
> llvm/trunk/lib/Target/AArch64/AArch64.td
> llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
> llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
> llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
> llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
> llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
> llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
> llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
> llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64.td Thu Jun 2 13:03:53 2016
> @@ -58,6 +58,50 @@ def FeatureReserveX18 : SubtargetFeature
> "Reserve X18, making it
> unavailable "
> "as a GPR">;
>
> +def FeatureMergeNarrowLd : SubtargetFeature<"merge-narrow-ld",
> + "MergeNarrowLoads", "true",
> + "Merge narrow load
> instructions">;
> +
> +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
> + "Use alias analysis during codegen">;
> +
> +def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops",
> "BalanceFPOps",
> + "true",
> + "balance mix of odd and even D-registers for fp multiply(-accumulate)
> ops">;
> +
> +def FeaturePredictableSelectIsExpensive : SubtargetFeature<
> + "predictable-select-expensive", "PredictableSelectIsExpensive",
> "true",
> + "Prefer likely predicted branches over selects">;
> +
> +def FeatureCustomCheapAsMoveHandling :
> SubtargetFeature<"custom-cheap-as-move",
> + "CustomAsCheapAsMove", "true",
> + "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
> +
> +def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
> + "UsePostRAScheduler", "true", "Schedule again after register
> allocation">;
> +
> +def FeatureSlowMisaligned128Store :
> SubtargetFeature<"slow-misaligned-128store",
> + "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are
> slow">;
> +
> +def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
> + "AvoidQuadLdStPairs", "true",
> + "Do not form quad load/store pair operations">;
> +
> +def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
> + "alternate-sextload-cvt-f32-pattern",
> "UseAlternateSExtLoadCVTF32Pattern",
> + "true", "Use alternative pattern for sextload convert to f32">;
> +
> +def FeatureMacroOpFusion : SubtargetFeature<
> + "macroop-fusion", "HasMacroOpFusion", "true",
> + "CPU supports macro op fusion">;
> +
> +def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
> + "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic",
> "true",
> + "Disable latency scheduling heuristic">;
> +
> +def FeatureUseRSqrt : SubtargetFeature<
> + "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square
> root">;
> +
>
> //===----------------------------------------------------------------------===//
> // Architectures.
> //
> @@ -94,57 +138,87 @@ include "AArch64SchedM1.td"
> include "AArch64SchedKryo.td"
>
> def ProcA35 : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
> - "Cortex-A35 ARM processors",
> - [FeatureFPARMv8,
> - FeatureNEON,
> - FeatureCrypto,
> + "Cortex-A35 ARM processors", [
> FeatureCRC,
> - FeaturePerfMon]>;
> + FeatureCrypto,
> + FeatureFPARMv8,
> + FeatureNEON,
> + FeaturePerfMon
> + ]>;
>
> def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
> - "Cortex-A53 ARM processors",
> - [FeatureFPARMv8,
> - FeatureNEON,
> - FeatureCrypto,
> + "Cortex-A53 ARM processors", [
> + FeatureBalanceFPOps,
> FeatureCRC,
> - FeaturePerfMon]>;
> + FeatureCrypto,
> + FeatureCustomCheapAsMoveHandling,
> + FeatureFPARMv8,
> + FeatureNEON,
> + FeaturePerfMon,
> + FeaturePostRAScheduler,
> + FeatureUseAA
> + ]>;
>
> def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
> - "Cortex-A57 ARM processors",
> - [FeatureFPARMv8,
> - FeatureNEON,
> - FeatureCrypto,
> + "Cortex-A57 ARM processors", [
> + FeatureBalanceFPOps,
> FeatureCRC,
> - FeaturePerfMon]>;
> + FeatureCrypto,
> + FeatureCustomCheapAsMoveHandling,
> + FeatureFPARMv8,
> + FeatureMergeNarrowLd,
> + FeatureNEON,
> + FeaturePerfMon,
> + FeaturePostRAScheduler,
> + FeaturePredictableSelectIsExpensive
> + ]>;
>
> def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
> - "Cyclone",
> - [FeatureFPARMv8,
> - FeatureNEON,
> + "Cyclone", [
> + FeatureAlternateSExtLoadCVTF32Pattern,
> FeatureCrypto,
> + FeatureDisableLatencySchedHeuristic,
> + FeatureFPARMv8,
> + FeatureMacroOpFusion,
> + FeatureNEON,
> FeaturePerfMon,
> - FeatureZCRegMove, FeatureZCZeroing]>;
> + FeatureSlowMisaligned128Store,
> + FeatureZCRegMove,
> + FeatureZCZeroing
> + ]>;
>
> def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily",
> "ExynosM1",
> - "Samsung Exynos-M1 processors",
> - [FeatureFPARMv8,
> - FeatureNEON,
> - FeatureCrypto,
> + "Samsung Exynos-M1 processors", [
> + FeatureAvoidQuadLdStPairs,
> FeatureCRC,
> - FeaturePerfMon]>;
> + FeatureCrypto,
> + FeatureCustomCheapAsMoveHandling,
> + FeatureFPARMv8,
> + FeatureNEON,
> + FeaturePerfMon,
> + FeatureUseRSqrt
> + ]>;
>
> def ProcKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
> - "Qualcomm Kryo processors",
> - [FeatureFPARMv8,
> - FeatureNEON,
> - FeatureCrypto,
> + "Qualcomm Kryo processors", [
> FeatureCRC,
> - FeaturePerfMon]>;
> -
> -def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
> - FeatureNEON,
> - FeatureCRC,
> - FeaturePerfMon]>;
> + FeatureCrypto,
> + FeatureCustomCheapAsMoveHandling,
> + FeatureFPARMv8,
> + FeatureMergeNarrowLd,
> + FeatureNEON,
> + FeaturePerfMon,
> + FeaturePostRAScheduler,
> + FeaturePredictableSelectIsExpensive
> + ]>;
> +
> +def : ProcessorModel<"generic", NoSchedModel, [
> + FeatureCRC,
> + FeatureFPARMv8,
> + FeatureNEON,
> + FeaturePerfMon,
> + FeaturePostRAScheduler
> + ]>;
>
> // FIXME: Cortex-A35 is currently modelled as a Cortex-A53
> def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp Thu Jun 2
> 13:03:53 2016
> @@ -314,9 +314,7 @@ bool AArch64A57FPLoadBalancing::runOnMac
> if (skipFunction(*F.getFunction()))
> return false;
>
> - // Don't do anything if this isn't an A53 or A57.
> - if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
> - F.getSubtarget<AArch64Subtarget>().isCortexA57()))
> + if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
> return false;
>
> bool Changed = false;
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Jun 2
> 13:03:53 2016
> @@ -634,9 +634,7 @@ AArch64TargetLowering::AArch64TargetLowe
> }
> }
>
> - // Prefer likely predicted branches to selects on out-of-order cores.
> - if (Subtarget->isCortexA57() || Subtarget->isKryo())
> - PredictableSelectIsExpensive = true;
> + PredictableSelectIsExpensive =
> Subtarget->predictableSelectIsExpensive();
> }
>
> void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT)
> {
> @@ -814,12 +812,9 @@ bool AArch64TargetLowering::allowsMisali
> if (Subtarget->requiresStrictAlign())
> return false;
>
> - // FIXME: This is mostly true for Cyclone, but not necessarily others.
> if (Fast) {
> - // FIXME: Define an attribute for slow unaligned accesses instead of
> - // relying on the CPU type as a proxy.
> - // On Cyclone, unaligned 128-bit stores are slow.
> - *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
> + // Some CPUs are fine with unaligned stores except for 128-bit ones.
> + *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize()
> != 16 ||
> // See comments in performSTORECombine() for more details
> about
> // these conditions.
>
> @@ -8792,9 +8787,7 @@ static SDValue split16BStores(SDNode *N,
> // be included in TLI.allowsMisalignedMemoryAccesses(), and there
> should be
> // a call to that function here.
>
> - // Cyclone has bad performance on unaligned 16B stores when crossing
> line and
> - // page boundaries. We want to split such stores.
> - if (!Subtarget->isCyclone())
> + if (!Subtarget->isMisaligned128StoreSlow())
> return SDValue();
>
> // Don't split at -Oz.
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Jun 2 13:03:53
> 2016
> @@ -544,8 +544,7 @@ static bool canBeExpandedToORR(const Mac
> // FIXME: this implementation should be micro-architecture dependent, so a
> // micro-architecture target hook should be introduced here in future.
> bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
> - if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53() &&
> - !Subtarget.isExynosM1() && !Subtarget.isKryo())
> + if (!Subtarget.hasCustomCheapAsMoveHandling())
> return MI->isAsCheapAsAMove();
>
> unsigned Imm;
> @@ -559,7 +558,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
> case AArch64::ADDXri:
> case AArch64::SUBWri:
> case AArch64::SUBXri:
> - return (Subtarget.isExynosM1() ||
> + return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
> MI->getOperand(3).getImm() == 0);
>
> // add/sub on register with shift
> @@ -568,7 +567,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
> case AArch64::SUBWrs:
> case AArch64::SUBXrs:
> Imm = MI->getOperand(3).getImm();
> - return (Subtarget.isExynosM1() &&
> + return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
> AArch64_AM::getArithShiftValue(Imm) < 4);
>
> // logical ops on immediate
> @@ -609,7 +608,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
> case AArch64::ORRWrs:
> case AArch64::ORRXrs:
> Imm = MI->getOperand(3).getImm();
> - return (Subtarget.isExynosM1() &&
> + return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
> AArch64_AM::getShiftValue(Imm) < 4 &&
> AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
>
> @@ -1522,8 +1521,8 @@ bool AArch64InstrInfo::isCandidateToMerg
> if (isLdStPairSuppressed(MI))
> return false;
>
> - // Do not pair quad ld/st for Exynos.
> - if (Subtarget.isExynosM1()) {
> + // On some CPUs quad load/store pairs are slower than two single
> load/stores.
> + if (Subtarget.avoidQuadLdStPairs()) {
> switch (MI->getOpcode()) {
> default:
> break;
> @@ -1801,8 +1800,8 @@ bool AArch64InstrInfo::shouldClusterMemO
>
> bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
> MachineInstr *Second) const
> {
> - if (Subtarget.isCyclone()) {
> - // Cyclone can fuse CMN, CMP, TST followed by Bcc.
> + if (Subtarget.hasMacroOpFusion()) {
> + // Fuse CMN, CMP, TST followed by Bcc.
> unsigned SecondOpcode = Second->getOpcode();
> if (SecondOpcode == AArch64::Bcc) {
> switch (First->getOpcode()) {
> @@ -1817,7 +1816,7 @@ bool AArch64InstrInfo::shouldScheduleAdj
> return true;
> }
> }
> - // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
> + // Fuse ALU operations followed by CBZ/CBNZ.
> if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX
> ||
> SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
> switch (First->getOpcode()) {
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Thu Jun 2 13:03:53
> 2016
> @@ -34,7 +34,8 @@ def HasSPE : Predicate<"Subtar
>
> def IsLE : Predicate<"Subtarget->isLittleEndian()">;
> def IsBE : Predicate<"!Subtarget->isLittleEndian()">;
> -def IsCyclone : Predicate<"Subtarget->isCyclone()">;
> +def UseAlternateSExtLoadCVTF32
> + : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
>
>
> //===----------------------------------------------------------------------===//
> // AArch64-specific DAG Nodes.
> @@ -4957,7 +4958,8 @@ class SExtLoadi8CVTf32Pat<dag addrmode,
> 0),
> dsub)),
> 0),
> - ssub)))>, Requires<[NotForCodeSize,
> IsCyclone]>;
> + ssub)))>,
> + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
>
> def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm,
> ro8.Wext:$ext),
> (LDRBroW GPR64sp:$Rn, GPR32:$Rm,
> ro8.Wext:$ext)>;
> @@ -5010,7 +5012,8 @@ class SExtLoadi16CVTf64Pat<dag addrmode,
> 0),
> dsub)),
> 0),
> - dsub)))>, Requires<[NotForCodeSize,
> IsCyclone]>;
> + dsub)))>,
> + Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
>
> def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm,
> ro16.Wext:$ext),
> (LDRHroW GPR64sp:$Rn, GPR32:$Rm,
> ro16.Wext:$ext)>;
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Thu Jun 2
> 13:03:53 2016
> @@ -160,10 +160,6 @@ struct AArch64LoadStoreOpt : public Mach
> // Find and promote load instructions which read directly from store.
> bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
>
> - // Check if converting two narrow loads into a single wider load with
> - // bitfield extracts could be enabled.
> - bool enableNarrowLdMerge(MachineFunction &Fn);
> -
> bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
>
> bool runOnMachineFunction(MachineFunction &Fn) override;
> @@ -1912,15 +1908,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(
> return Modified;
> }
>
> -bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
> - bool ProfitableArch = Subtarget->isCortexA57() || Subtarget->isKryo();
> - // FIXME: The benefit from converting narrow loads into a wider load
> could be
> - // microarchitectural as it assumes that a single load with two bitfield
> - // extracts is cheaper than two narrow loads. Currently, this
> conversion is
> - // enabled only in cortex-a57 on which performance benefits were
> verified.
> - return ProfitableArch && !Subtarget->requiresStrictAlign();
> -}
> -
> bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
> if (skipFunction(*Fn.getFunction()))
> return false;
> @@ -1936,7 +1923,8 @@ bool AArch64LoadStoreOpt::runOnMachineFu
> UsedRegs.resize(TRI->getNumRegs());
>
> bool Modified = false;
> - bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
> + bool enableNarrowLdOpt =
> + Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();
> for (auto &MBB : Fn)
> Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
>
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp Thu Jun 2 13:03:53
> 2016
> @@ -44,9 +44,36 @@ AArch64Subtarget::initializeSubtargetDep
> CPUString = "generic";
>
> ParseSubtargetFeatures(CPUString, FS);
> + initializeProperties();
> +
> return *this;
> }
>
> +void AArch64Subtarget::initializeProperties() {
> + // Initialize CPU specific properties. We should add a tablegen feature
> for
> + // this in the future so we can specify it together with the subtarget
> + // features.
> + switch (ARMProcFamily) {
> + case Cyclone:
> + CacheLineSize = 64;
> + PrefetchDistance = 280;
> + MinPrefetchStride = 2048;
> + MaxPrefetchIterationsAhead = 3;
> + break;
> + case CortexA57:
> + MaxInterleaveFactor = 4;
> + break;
> + case Kryo:
> + MaxInterleaveFactor = 4;
> + VectorInsertExtractBaseCost = 2;
> + break;
> + case Others: break;
> + case CortexA35: break;
> + case CortexA53: break;
> + case ExynosM1: break;
> + }
> +}
> +
> AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string
> &CPU,
> const std::string &FS,
> const TargetMachine &TM, bool
> LittleEndian)
> @@ -110,8 +137,7 @@ void AArch64Subtarget::overrideSchedPoli
> // Enabling or Disabling the latency heuristic is a close call: It
> seems to
> // help nearly no benchmark on out-of-order architectures, on the other
> hand
> // it regresses register pressure on a few benchmarking.
> - if (isCyclone())
> - Policy.DisableLatencyHeuristic = true;
> + Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
> }
>
> bool AArch64Subtarget::enableEarlyIfConversion() const {
> @@ -133,8 +159,5 @@ bool AArch64Subtarget::supportsAddressTo
>
> std::unique_ptr<PBQPRAConstraint>
> AArch64Subtarget::getCustomPBQPConstraints() const {
> - if (!isCortexA57())
> - return nullptr;
> -
> - return llvm::make_unique<A57ChainingConstraint>();
> + return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() :
> nullptr;
> }
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Thu Jun 2 13:03:53
> 2016
> @@ -33,8 +33,8 @@ class StringRef;
> class Triple;
>
> class AArch64Subtarget : public AArch64GenSubtargetInfo {
> -protected:
> - enum ARMProcFamilyEnum {
> +public:
> + enum ARMProcFamilyEnum : uint8_t {
> Others,
> CortexA35,
> CortexA53,
> @@ -44,6 +44,7 @@ protected:
> Kryo
> };
>
> +protected:
> /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and
> others.
> ARMProcFamilyEnum ARMProcFamily = Others;
>
> @@ -66,6 +67,24 @@ protected:
>
> // StrictAlign - Disallow unaligned memory accesses.
> bool StrictAlign = false;
> + bool MergeNarrowLoads = false;
> + bool UseAA = false;
> + bool PredictableSelectIsExpensive = false;
> + bool BalanceFPOps = false;
> + bool CustomAsCheapAsMove = false;
> + bool UsePostRAScheduler = false;
> + bool Misaligned128StoreIsSlow = false;
> + bool AvoidQuadLdStPairs = false;
> + bool UseAlternateSExtLoadCVTF32Pattern = false;
> + bool HasMacroOpFusion = false;
> + bool DisableLatencySchedHeuristic = false;
> + bool UseRSqrt = false;
> + uint8_t MaxInterleaveFactor = 2;
> + uint8_t VectorInsertExtractBaseCost = 3;
> + uint16_t CacheLineSize = 0;
> + uint16_t PrefetchDistance = 0;
> + uint16_t MinPrefetchStride = 1;
> + unsigned MaxPrefetchIterationsAhead = UINT_MAX;
>
> // ReserveX18 - X18 is not available as a general purpose register.
> bool ReserveX18;
> @@ -93,6 +112,9 @@ private:
> /// subtarget initialization.
> AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);
>
> + /// Initialize properties based on the selected processor family.
> + void initializeProperties();
> +
> public:
> /// This constructor initializes the data members to match that
> /// of the specified triple.
> @@ -123,7 +145,15 @@ public:
> const Triple &getTargetTriple() const { return TargetTriple; }
> bool enableMachineScheduler() const override { return true; }
> bool enablePostRAScheduler() const override {
> - return isGeneric() || isCortexA53() || isCortexA57() || isKryo();
> + return UsePostRAScheduler;
> + }
> +
> + /// Returns ARM processor family.
> + /// Avoid this function! CPU specifics should be kept local to this
> class
> + /// and preferably modeled with SubtargetFeatures or properties in
> + /// initializeProperties().
> + ARMProcFamilyEnum getProcFamily() const {
> + return ARMProcFamily;
> }
>
> bool hasV8_1aOps() const { return HasV8_1aOps; }
> @@ -140,6 +170,30 @@ public:
> bool hasNEON() const { return HasNEON; }
> bool hasCrypto() const { return HasCrypto; }
> bool hasCRC() const { return HasCRC; }
> + bool mergeNarrowLoads() const { return MergeNarrowLoads; }
> + bool balanceFPOps() const { return BalanceFPOps; }
> + bool predictableSelectIsExpensive() const {
> + return PredictableSelectIsExpensive;
> + }
> + bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove;
> }
> + bool isMisaligned128StoreSlow() const { return
> Misaligned128StoreIsSlow; }
> + bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }
> + bool useAlternateSExtLoadCVTF32Pattern() const {
> + return UseAlternateSExtLoadCVTF32Pattern;
> + }
> + bool hasMacroOpFusion() const { return HasMacroOpFusion; }
> + bool useRSqrt() const { return UseRSqrt; }
> + unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
> + unsigned getVectorInsertExtractBaseCost() const {
> + return VectorInsertExtractBaseCost;
> + }
> + unsigned getCacheLineSize() const { return CacheLineSize; }
> + unsigned getPrefetchDistance() const { return PrefetchDistance; }
> + unsigned getMinPrefetchStride() const { return MinPrefetchStride; }
> + unsigned getMaxPrefetchIterationsAhead() const {
> + return MaxPrefetchIterationsAhead;
> + }
> +
> /// CPU has TBI (top byte of addresses is ignored during HW address
> /// translation) and OS enables it.
> bool supportsAddressTopByteIgnored() const;
> @@ -160,14 +214,7 @@ public:
> bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
> bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
>
> - bool isGeneric() const { return CPUString == "generic"; }
> - bool isCyclone() const { return CPUString == "cyclone"; }
> - bool isCortexA57() const { return CPUString == "cortex-a57"; }
> - bool isCortexA53() const { return CPUString == "cortex-a53"; }
> - bool isExynosM1() const { return CPUString == "exynos-m1"; }
> - bool isKryo() const { return CPUString == "kryo"; }
> -
> - bool useAA() const override { return isCortexA53(); }
> + bool useAA() const override { return UseAA; }
>
> /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
> /// that still makes it profitable to inline the call.
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Thu Jun 2
> 13:03:53 2016
> @@ -147,8 +147,7 @@ static void initReciprocals(AArch64Targe
> // (52 mantissa bits) are 2 and 3, respectively.
> unsigned ExtraStepsF = 2,
> ExtraStepsD = ExtraStepsF + 1;
> - // FIXME: Enable x^-1/2 only for Exynos M1 at the moment.
> - bool UseRsqrt = ST.isExynosM1();
> + bool UseRsqrt = ST.useRSqrt();
>
> TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);
> TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Thu Jun
> 2 13:03:53 2016
> @@ -368,9 +368,7 @@ int AArch64TTIImpl::getVectorInstrCost(u
> }
>
> // All other insert/extracts cost this much.
> - if (ST->isKryo())
> - return 2;
> - return 3;
> + return ST->getVectorInsertExtractBaseCost();
> }
>
> int AArch64TTIImpl::getArithmeticInstrCost(
> @@ -529,9 +527,7 @@ int AArch64TTIImpl::getCostOfKeepingLive
> }
>
> unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
> - if (ST->isCortexA57() || ST->isKryo())
> - return 4;
> - return 2;
> + return ST->getMaxInterleaveFactor();
> }
>
> void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
> @@ -630,28 +626,17 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(
> }
>
> unsigned AArch64TTIImpl::getCacheLineSize() {
> - if (ST->isCyclone())
> - return 64;
> - return BaseT::getCacheLineSize();
> + return ST->getCacheLineSize();
> }
>
> unsigned AArch64TTIImpl::getPrefetchDistance() {
> - if (ST->isCyclone())
> - return 280;
> - return BaseT::getPrefetchDistance();
> + return ST->getPrefetchDistance();
> }
>
> unsigned AArch64TTIImpl::getMinPrefetchStride() {
> - if (ST->isCyclone())
> - // The HW prefetcher handles accesses with strides up to 2KB.
> - return 2048;
> - return BaseT::getMinPrefetchStride();
> + return ST->getMinPrefetchStride();
> }
>
> unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
> - if (ST->isCyclone())
> - // Be conservative for now and don't prefetch ahead too much since
> the loop
> - // may terminate early.
> - return 3;
> - return BaseT::getMaxPrefetchIterationsAhead();
> + return ST->getMaxPrefetchIterationsAhead();
> }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160602/9718f1a9/attachment.html>
More information about the llvm-commits
mailing list