[llvm] r271555 - AArch64: Do not test for CPUs, use SubtargetFeatures

Eric Christopher via llvm-commits llvm-commits at lists.llvm.org
Thu Jun 2 13:08:16 PDT 2016


Looks much nicer now, thanks!

-eric

On Thu, Jun 2, 2016 at 11:10 AM Matthias Braun via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Author: matze
> Date: Thu Jun  2 13:03:53 2016
> New Revision: 271555
>
> URL: http://llvm.org/viewvc/llvm-project?rev=271555&view=rev
> Log:
> AArch64: Do not test for CPUs, use SubtargetFeatures
>
> Testing for specific CPUs has a number of problems, better use subtarget
> features:
> - When some tweak is added for a specific CPU it is often desirable for
>   the next version of that CPU as well, yet we often forget to add it.
> - It is hard to keep track of checks scattered around the target code;
>   Declaring all target specifics together with the CPU in the tablegen
>   file is a clear representation.
> - Subtarget features can be tweaked from the command line.
>
> To discourage people from using CPU checks in the future I removed the
> isCortexXX(), isCyclone(), ... functions. I added an getProcFamily()
> function for exceptional circumstances but made it clear in the comment
> that usage is discouraged.
>
> Reformat feature list in AArch64.td to have 1 feature per line in
> alphabetical order to simplify merging and sorting for out of tree
> tweaks.
>
> No functional change intended.
>
> Differential Revision: http://reviews.llvm.org/D20762
>
> Modified:
>     llvm/trunk/lib/Target/AArch64/AArch64.td
>     llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
>     llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
>     llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
>     llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64.td Thu Jun  2 13:03:53 2016
> @@ -58,6 +58,50 @@ def FeatureReserveX18 : SubtargetFeature
>                                           "Reserve X18, making it
> unavailable "
>                                           "as a GPR">;
>
> +def FeatureMergeNarrowLd : SubtargetFeature<"merge-narrow-ld",
> +                                            "MergeNarrowLoads", "true",
> +                                            "Merge narrow load
> instructions">;
> +
> +def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",
> +                                    "Use alias analysis during codegen">;
> +
> +def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops",
> "BalanceFPOps",
> +    "true",
> +    "balance mix of odd and even D-registers for fp multiply(-accumulate)
> ops">;
> +
> +def FeaturePredictableSelectIsExpensive : SubtargetFeature<
> +    "predictable-select-expensive", "PredictableSelectIsExpensive",
> "true",
> +    "Prefer likely predicted branches over selects">;
> +
> +def FeatureCustomCheapAsMoveHandling :
> SubtargetFeature<"custom-cheap-as-move",
> +    "CustomAsCheapAsMove", "true",
> +    "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;
> +
> +def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",
> +    "UsePostRAScheduler", "true", "Schedule again after register
> allocation">;
> +
> +def FeatureSlowMisaligned128Store :
> SubtargetFeature<"slow-misaligned-128store",
> +    "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are
> slow">;
> +
> +def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",
> +    "AvoidQuadLdStPairs", "true",
> +    "Do not form quad load/store pair operations">;
> +
> +def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<
> +    "alternate-sextload-cvt-f32-pattern",
> "UseAlternateSExtLoadCVTF32Pattern",
> +    "true", "Use alternative pattern for sextload convert to f32">;
> +
> +def FeatureMacroOpFusion : SubtargetFeature<
> +    "macroop-fusion", "HasMacroOpFusion", "true",
> +    "CPU supports macro op fusion">;
> +
> +def FeatureDisableLatencySchedHeuristic : SubtargetFeature<
> +    "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic",
> "true",
> +    "Disable latency scheduling heuristic">;
> +
> +def FeatureUseRSqrt : SubtargetFeature<
> +    "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square
> root">;
> +
>
>  //===----------------------------------------------------------------------===//
>  // Architectures.
>  //
> @@ -94,57 +138,87 @@ include "AArch64SchedM1.td"
>  include "AArch64SchedKryo.td"
>
>  def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",
> -                                   "Cortex-A35 ARM processors",
> -                                   [FeatureFPARMv8,
> -                                   FeatureNEON,
> -                                   FeatureCrypto,
> +                                   "Cortex-A35 ARM processors", [
>                                     FeatureCRC,
> -                                   FeaturePerfMon]>;
> +                                   FeatureCrypto,
> +                                   FeatureFPARMv8,
> +                                   FeatureNEON,
> +                                   FeaturePerfMon
> +                                   ]>;
>
>  def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",
> -                                   "Cortex-A53 ARM processors",
> -                                   [FeatureFPARMv8,
> -                                   FeatureNEON,
> -                                   FeatureCrypto,
> +                                   "Cortex-A53 ARM processors", [
> +                                   FeatureBalanceFPOps,
>                                     FeatureCRC,
> -                                   FeaturePerfMon]>;
> +                                   FeatureCrypto,
> +                                   FeatureCustomCheapAsMoveHandling,
> +                                   FeatureFPARMv8,
> +                                   FeatureNEON,
> +                                   FeaturePerfMon,
> +                                   FeaturePostRAScheduler,
> +                                   FeatureUseAA
> +                                   ]>;
>
>  def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",
> -                                   "Cortex-A57 ARM processors",
> -                                   [FeatureFPARMv8,
> -                                   FeatureNEON,
> -                                   FeatureCrypto,
> +                                   "Cortex-A57 ARM processors", [
> +                                   FeatureBalanceFPOps,
>                                     FeatureCRC,
> -                                   FeaturePerfMon]>;
> +                                   FeatureCrypto,
> +                                   FeatureCustomCheapAsMoveHandling,
> +                                   FeatureFPARMv8,
> +                                   FeatureMergeNarrowLd,
> +                                   FeatureNEON,
> +                                   FeaturePerfMon,
> +                                   FeaturePostRAScheduler,
> +                                   FeaturePredictableSelectIsExpensive
> +                                   ]>;
>
>  def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",
> -                                   "Cyclone",
> -                                   [FeatureFPARMv8,
> -                                   FeatureNEON,
> +                                   "Cyclone", [
> +                                   FeatureAlternateSExtLoadCVTF32Pattern,
>                                     FeatureCrypto,
> +                                   FeatureDisableLatencySchedHeuristic,
> +                                   FeatureFPARMv8,
> +                                   FeatureMacroOpFusion,
> +                                   FeatureNEON,
>                                     FeaturePerfMon,
> -                                   FeatureZCRegMove, FeatureZCZeroing]>;
> +                                   FeatureSlowMisaligned128Store,
> +                                   FeatureZCRegMove,
> +                                   FeatureZCZeroing
> +                                   ]>;
>
>  def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily",
> "ExynosM1",
> -                                    "Samsung Exynos-M1 processors",
> -                                    [FeatureFPARMv8,
> -                                    FeatureNEON,
> -                                    FeatureCrypto,
> +                                    "Samsung Exynos-M1 processors", [
> +                                    FeatureAvoidQuadLdStPairs,
>                                      FeatureCRC,
> -                                    FeaturePerfMon]>;
> +                                    FeatureCrypto,
> +                                    FeatureCustomCheapAsMoveHandling,
> +                                    FeatureFPARMv8,
> +                                    FeatureNEON,
> +                                    FeaturePerfMon,
> +                                    FeatureUseRSqrt
> +                                    ]>;
>
>  def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",
> -                                   "Qualcomm Kryo processors",
> -                                   [FeatureFPARMv8,
> -                                   FeatureNEON,
> -                                   FeatureCrypto,
> +                                   "Qualcomm Kryo processors", [
>                                     FeatureCRC,
> -                                   FeaturePerfMon]>;
> -
> -def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,
> -                                              FeatureNEON,
> -                                              FeatureCRC,
> -                                              FeaturePerfMon]>;
> +                                   FeatureCrypto,
> +                                   FeatureCustomCheapAsMoveHandling,
> +                                   FeatureFPARMv8,
> +                                   FeatureMergeNarrowLd,
> +                                   FeatureNEON,
> +                                   FeaturePerfMon,
> +                                   FeaturePostRAScheduler,
> +                                   FeaturePredictableSelectIsExpensive
> +                                   ]>;
> +
> +def : ProcessorModel<"generic", NoSchedModel, [
> +                     FeatureCRC,
> +                     FeatureFPARMv8,
> +                     FeatureNEON,
> +                     FeaturePerfMon,
> +                     FeaturePostRAScheduler
> +                     ]>;
>
>  // FIXME: Cortex-A35 is currently modelled as a Cortex-A53
>  def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp Thu Jun  2
> 13:03:53 2016
> @@ -314,9 +314,7 @@ bool AArch64A57FPLoadBalancing::runOnMac
>    if (skipFunction(*F.getFunction()))
>      return false;
>
> -  // Don't do anything if this isn't an A53 or A57.
> -  if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||
> -        F.getSubtarget<AArch64Subtarget>().isCortexA57()))
> +  if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())
>      return false;
>
>    bool Changed = false;
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Jun  2
> 13:03:53 2016
> @@ -634,9 +634,7 @@ AArch64TargetLowering::AArch64TargetLowe
>      }
>    }
>
> -  // Prefer likely predicted branches to selects on out-of-order cores.
> -  if (Subtarget->isCortexA57() || Subtarget->isKryo())
> -    PredictableSelectIsExpensive = true;
> +  PredictableSelectIsExpensive =
> Subtarget->predictableSelectIsExpensive();
>  }
>
>  void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT)
> {
> @@ -814,12 +812,9 @@ bool AArch64TargetLowering::allowsMisali
>    if (Subtarget->requiresStrictAlign())
>      return false;
>
> -  // FIXME: This is mostly true for Cyclone, but not necessarily others.
>    if (Fast) {
> -    // FIXME: Define an attribute for slow unaligned accesses instead of
> -    // relying on the CPU type as a proxy.
> -    // On Cyclone, unaligned 128-bit stores are slow.
> -    *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||
> +    // Some CPUs are fine with unaligned stores except for 128-bit ones.
> +    *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize()
> != 16 ||
>              // See comments in performSTORECombine() for more details
> about
>              // these conditions.
>
> @@ -8792,9 +8787,7 @@ static SDValue split16BStores(SDNode *N,
>    // be included in TLI.allowsMisalignedMemoryAccesses(), and there
> should be
>    // a call to that function here.
>
> -  // Cyclone has bad performance on unaligned 16B stores when crossing
> line and
> -  // page boundaries. We want to split such stores.
> -  if (!Subtarget->isCyclone())
> +  if (!Subtarget->isMisaligned128StoreSlow())
>      return SDValue();
>
>    // Don't split at -Oz.
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Jun  2 13:03:53
> 2016
> @@ -544,8 +544,7 @@ static bool canBeExpandedToORR(const Mac
>  // FIXME: this implementation should be micro-architecture dependent, so a
>  // micro-architecture target hook should be introduced here in future.
>  bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {
> -  if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53() &&
> -      !Subtarget.isExynosM1() && !Subtarget.isKryo())
> +  if (!Subtarget.hasCustomCheapAsMoveHandling())
>      return MI->isAsCheapAsAMove();
>
>    unsigned Imm;
> @@ -559,7 +558,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
>    case AArch64::ADDXri:
>    case AArch64::SUBWri:
>    case AArch64::SUBXri:
> -    return (Subtarget.isExynosM1() ||
> +    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||
>              MI->getOperand(3).getImm() == 0);
>
>    // add/sub on register with shift
> @@ -568,7 +567,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
>    case AArch64::SUBWrs:
>    case AArch64::SUBXrs:
>      Imm = MI->getOperand(3).getImm();
> -    return (Subtarget.isExynosM1() &&
> +    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
>              AArch64_AM::getArithShiftValue(Imm) < 4);
>
>    // logical ops on immediate
> @@ -609,7 +608,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(
>    case AArch64::ORRWrs:
>    case AArch64::ORRXrs:
>      Imm = MI->getOperand(3).getImm();
> -    return (Subtarget.isExynosM1() &&
> +    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&
>              AArch64_AM::getShiftValue(Imm) < 4 &&
>              AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);
>
> @@ -1522,8 +1521,8 @@ bool AArch64InstrInfo::isCandidateToMerg
>    if (isLdStPairSuppressed(MI))
>      return false;
>
> -  // Do not pair quad ld/st for Exynos.
> -  if (Subtarget.isExynosM1()) {
> +  // On some CPUs quad load/store pairs are slower than two single
> load/stores.
> +  if (Subtarget.avoidQuadLdStPairs()) {
>      switch (MI->getOpcode()) {
>      default:
>        break;
> @@ -1801,8 +1800,8 @@ bool AArch64InstrInfo::shouldClusterMemO
>
>  bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,
>                                                MachineInstr *Second) const
> {
> -  if (Subtarget.isCyclone()) {
> -    // Cyclone can fuse CMN, CMP, TST followed by Bcc.
> +  if (Subtarget.hasMacroOpFusion()) {
> +    // Fuse CMN, CMP, TST followed by Bcc.
>      unsigned SecondOpcode = Second->getOpcode();
>      if (SecondOpcode == AArch64::Bcc) {
>        switch (First->getOpcode()) {
> @@ -1817,7 +1816,7 @@ bool AArch64InstrInfo::shouldScheduleAdj
>          return true;
>        }
>      }
> -    // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.
> +    // Fuse ALU operations followed by CBZ/CBNZ.
>      if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX
> ||
>          SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {
>        switch (First->getOpcode()) {
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Thu Jun  2 13:03:53
> 2016
> @@ -34,7 +34,8 @@ def HasSPE           : Predicate<"Subtar
>
>  def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
>  def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
> -def IsCyclone        : Predicate<"Subtarget->isCyclone()">;
> +def UseAlternateSExtLoadCVTF32
> +    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
>
>
>  //===----------------------------------------------------------------------===//
>  // AArch64-specific DAG Nodes.
> @@ -4957,7 +4958,8 @@ class SExtLoadi8CVTf32Pat<dag addrmode,
>                                      0),
>                                    dsub)),
>                                 0),
> -                             ssub)))>, Requires<[NotForCodeSize,
> IsCyclone]>;
> +                             ssub)))>,
> +    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
>
>  def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm,
> ro8.Wext:$ext),
>                            (LDRBroW  GPR64sp:$Rn, GPR32:$Rm,
> ro8.Wext:$ext)>;
> @@ -5010,7 +5012,8 @@ class SExtLoadi16CVTf64Pat<dag addrmode,
>                                       0),
>                                     dsub)),
>                                 0),
> -                             dsub)))>, Requires<[NotForCodeSize,
> IsCyclone]>;
> +                             dsub)))>,
> +    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;
>
>  def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm,
> ro16.Wext:$ext),
>                             (LDRHroW GPR64sp:$Rn, GPR32:$Rm,
> ro16.Wext:$ext)>;
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Thu Jun  2
> 13:03:53 2016
> @@ -160,10 +160,6 @@ struct AArch64LoadStoreOpt : public Mach
>    // Find and promote load instructions which read directly from store.
>    bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);
>
> -  // Check if converting two narrow loads into a single wider load with
> -  // bitfield extracts could be enabled.
> -  bool enableNarrowLdMerge(MachineFunction &Fn);
> -
>    bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);
>
>    bool runOnMachineFunction(MachineFunction &Fn) override;
> @@ -1912,15 +1908,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(
>    return Modified;
>  }
>
> -bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {
> -  bool ProfitableArch = Subtarget->isCortexA57() || Subtarget->isKryo();
> -  // FIXME: The benefit from converting narrow loads into a wider load
> could be
> -  // microarchitectural as it assumes that a single load with two bitfield
> -  // extracts is cheaper than two narrow loads. Currently, this
> conversion is
> -  // enabled only in cortex-a57 on which performance benefits were
> verified.
> -  return ProfitableArch && !Subtarget->requiresStrictAlign();
> -}
> -
>  bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
>    if (skipFunction(*Fn.getFunction()))
>      return false;
> @@ -1936,7 +1923,8 @@ bool AArch64LoadStoreOpt::runOnMachineFu
>    UsedRegs.resize(TRI->getNumRegs());
>
>    bool Modified = false;
> -  bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);
> +  bool enableNarrowLdOpt =
> +    Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();
>    for (auto &MBB : Fn)
>      Modified |= optimizeBlock(MBB, enableNarrowLdOpt);
>
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp Thu Jun  2 13:03:53
> 2016
> @@ -44,9 +44,36 @@ AArch64Subtarget::initializeSubtargetDep
>      CPUString = "generic";
>
>    ParseSubtargetFeatures(CPUString, FS);
> +  initializeProperties();
> +
>    return *this;
>  }
>
> +void AArch64Subtarget::initializeProperties() {
> +  // Initialize CPU specific properties. We should add a tablegen feature
> for
> +  // this in the future so we can specify it together with the subtarget
> +  // features.
> +  switch (ARMProcFamily) {
> +  case Cyclone:
> +    CacheLineSize = 64;
> +    PrefetchDistance = 280;
> +    MinPrefetchStride = 2048;
> +    MaxPrefetchIterationsAhead = 3;
> +    break;
> +  case CortexA57:
> +    MaxInterleaveFactor = 4;
> +    break;
> +  case Kryo:
> +    MaxInterleaveFactor = 4;
> +    VectorInsertExtractBaseCost = 2;
> +    break;
> +  case Others: break;
> +  case CortexA35: break;
> +  case CortexA53: break;
> +  case ExynosM1: break;
> +  }
> +}
> +
>  AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string
> &CPU,
>                                     const std::string &FS,
>                                     const TargetMachine &TM, bool
> LittleEndian)
> @@ -110,8 +137,7 @@ void AArch64Subtarget::overrideSchedPoli
>    // Enabling or Disabling the latency heuristic is a close call: It
> seems to
>    // help nearly no benchmark on out-of-order architectures, on the other
> hand
>    // it regresses register pressure on a few benchmarking.
> -  if (isCyclone())
> -    Policy.DisableLatencyHeuristic = true;
> +  Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
>  }
>
>  bool AArch64Subtarget::enableEarlyIfConversion() const {
> @@ -133,8 +159,5 @@ bool AArch64Subtarget::supportsAddressTo
>
>  std::unique_ptr<PBQPRAConstraint>
>  AArch64Subtarget::getCustomPBQPConstraints() const {
> -  if (!isCortexA57())
> -    return nullptr;
> -
> -  return llvm::make_unique<A57ChainingConstraint>();
> +  return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() :
> nullptr;
>  }
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Thu Jun  2 13:03:53
> 2016
> @@ -33,8 +33,8 @@ class StringRef;
>  class Triple;
>
>  class AArch64Subtarget : public AArch64GenSubtargetInfo {
> -protected:
> -  enum ARMProcFamilyEnum {
> +public:
> +  enum ARMProcFamilyEnum : uint8_t {
>      Others,
>      CortexA35,
>      CortexA53,
> @@ -44,6 +44,7 @@ protected:
>      Kryo
>    };
>
> +protected:
>    /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and
> others.
>    ARMProcFamilyEnum ARMProcFamily = Others;
>
> @@ -66,6 +67,24 @@ protected:
>
>    // StrictAlign - Disallow unaligned memory accesses.
>    bool StrictAlign = false;
> +  bool MergeNarrowLoads = false;
> +  bool UseAA = false;
> +  bool PredictableSelectIsExpensive = false;
> +  bool BalanceFPOps = false;
> +  bool CustomAsCheapAsMove = false;
> +  bool UsePostRAScheduler = false;
> +  bool Misaligned128StoreIsSlow = false;
> +  bool AvoidQuadLdStPairs = false;
> +  bool UseAlternateSExtLoadCVTF32Pattern = false;
> +  bool HasMacroOpFusion = false;
> +  bool DisableLatencySchedHeuristic = false;
> +  bool UseRSqrt = false;
> +  uint8_t MaxInterleaveFactor = 2;
> +  uint8_t VectorInsertExtractBaseCost = 3;
> +  uint16_t CacheLineSize = 0;
> +  uint16_t PrefetchDistance = 0;
> +  uint16_t MinPrefetchStride = 1;
> +  unsigned MaxPrefetchIterationsAhead = UINT_MAX;
>
>    // ReserveX18 - X18 is not available as a general purpose register.
>    bool ReserveX18;
> @@ -93,6 +112,9 @@ private:
>    /// subtarget initialization.
>    AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);
>
> +  /// Initialize properties based on the selected processor family.
> +  void initializeProperties();
> +
>  public:
>    /// This constructor initializes the data members to match that
>    /// of the specified triple.
> @@ -123,7 +145,15 @@ public:
>    const Triple &getTargetTriple() const { return TargetTriple; }
>    bool enableMachineScheduler() const override { return true; }
>    bool enablePostRAScheduler() const override {
> -    return isGeneric() || isCortexA53() || isCortexA57() || isKryo();
> +    return UsePostRAScheduler;
> +  }
> +
> +  /// Returns ARM processor family.
> +  /// Avoid this function! CPU specifics should be kept local to this
> class
> +  /// and preferably modeled with SubtargetFeatures or properties in
> +  /// initializeProperties().
> +  ARMProcFamilyEnum getProcFamily() const {
> +    return ARMProcFamily;
>    }
>
>    bool hasV8_1aOps() const { return HasV8_1aOps; }
> @@ -140,6 +170,30 @@ public:
>    bool hasNEON() const { return HasNEON; }
>    bool hasCrypto() const { return HasCrypto; }
>    bool hasCRC() const { return HasCRC; }
> +  bool mergeNarrowLoads() const { return MergeNarrowLoads; }
> +  bool balanceFPOps() const { return BalanceFPOps; }
> +  bool predictableSelectIsExpensive() const {
> +    return PredictableSelectIsExpensive;
> +  }
> +  bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove;
> }
> +  bool isMisaligned128StoreSlow() const { return
> Misaligned128StoreIsSlow; }
> +  bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }
> +  bool useAlternateSExtLoadCVTF32Pattern() const {
> +    return UseAlternateSExtLoadCVTF32Pattern;
> +  }
> +  bool hasMacroOpFusion() const { return HasMacroOpFusion; }
> +  bool useRSqrt() const { return UseRSqrt; }
> +  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
> +  unsigned getVectorInsertExtractBaseCost() const {
> +    return VectorInsertExtractBaseCost;
> +  }
> +  unsigned getCacheLineSize() const { return CacheLineSize; }
> +  unsigned getPrefetchDistance() const { return PrefetchDistance; }
> +  unsigned getMinPrefetchStride() const { return MinPrefetchStride; }
> +  unsigned getMaxPrefetchIterationsAhead() const {
> +    return MaxPrefetchIterationsAhead;
> +  }
> +
>    /// CPU has TBI (top byte of addresses is ignored during HW address
>    /// translation) and OS enables it.
>    bool supportsAddressTopByteIgnored() const;
> @@ -160,14 +214,7 @@ public:
>    bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
>    bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
>
> -  bool isGeneric() const { return CPUString == "generic"; }
> -  bool isCyclone() const { return CPUString == "cyclone"; }
> -  bool isCortexA57() const { return CPUString == "cortex-a57"; }
> -  bool isCortexA53() const { return CPUString == "cortex-a53"; }
> -  bool isExynosM1() const { return CPUString == "exynos-m1"; }
> -  bool isKryo() const { return CPUString == "kryo"; }
> -
> -  bool useAA() const override { return isCortexA53(); }
> +  bool useAA() const override { return UseAA; }
>
>    /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size
>    /// that still makes it profitable to inline the call.
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Thu Jun  2
> 13:03:53 2016
> @@ -147,8 +147,7 @@ static void initReciprocals(AArch64Targe
>    // (52 mantissa bits) are 2 and 3, respectively.
>    unsigned ExtraStepsF = 2,
>             ExtraStepsD = ExtraStepsF + 1;
> -  // FIXME: Enable x^-1/2 only for Exynos M1 at the moment.
> -  bool UseRsqrt = ST.isExynosM1();
> +  bool UseRsqrt = ST.useRSqrt();
>
>    TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);
>    TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);
>
> Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff
>
> ==============================================================================
> --- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Thu Jun
> 2 13:03:53 2016
> @@ -368,9 +368,7 @@ int AArch64TTIImpl::getVectorInstrCost(u
>    }
>
>    // All other insert/extracts cost this much.
> -  if (ST->isKryo())
> -    return 2;
> -  return 3;
> +  return ST->getVectorInsertExtractBaseCost();
>  }
>
>  int AArch64TTIImpl::getArithmeticInstrCost(
> @@ -529,9 +527,7 @@ int AArch64TTIImpl::getCostOfKeepingLive
>  }
>
>  unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
> -  if (ST->isCortexA57() || ST->isKryo())
> -    return 4;
> -  return 2;
> +  return ST->getMaxInterleaveFactor();
>  }
>
>  void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
> @@ -630,28 +626,17 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(
>  }
>
>  unsigned AArch64TTIImpl::getCacheLineSize() {
> -  if (ST->isCyclone())
> -    return 64;
> -  return BaseT::getCacheLineSize();
> +  return ST->getCacheLineSize();
>  }
>
>  unsigned AArch64TTIImpl::getPrefetchDistance() {
> -  if (ST->isCyclone())
> -    return 280;
> -  return BaseT::getPrefetchDistance();
> +  return ST->getPrefetchDistance();
>  }
>
>  unsigned AArch64TTIImpl::getMinPrefetchStride() {
> -  if (ST->isCyclone())
> -    // The HW prefetcher handles accesses with strides up to 2KB.
> -    return 2048;
> -  return BaseT::getMinPrefetchStride();
> +  return ST->getMinPrefetchStride();
>  }
>
>  unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {
> -  if (ST->isCyclone())
> -    // Be conservative for now and don't prefetch ahead too much since
> the loop
> -    // may terminate early.
> -    return 3;
> -  return BaseT::getMaxPrefetchIterationsAhead();
> +  return ST->getMaxPrefetchIterationsAhead();
>  }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160602/9718f1a9/attachment.html>


More information about the llvm-commits mailing list