<div dir="ltr">Looks much nicer now, thanks!<div><br></div><div>-eric</div></div><br><div class="gmail_quote"><div dir="ltr">On Thu, Jun 2, 2016 at 11:10 AM Matthias Braun via llvm-commits <<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: matze<br>
Date: Thu Jun  2 13:03:53 2016<br>
New Revision: 271555<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=271555&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project?rev=271555&view=rev</a><br>
Log:<br>
AArch64: Do not test for CPUs, use SubtargetFeatures<br>
<br>
Testing for specific CPUs has a number of problems, better use subtarget<br>
features:<br>
- When some tweak is added for a specific CPU it is often desirable for<br>
  the next version of that CPU as well, yet we often forget to add it.<br>
- It is hard to keep track of checks scattered around the target code;<br>
  Declaring all target specifics together with the CPU in the tablegen<br>
  file is a clear representation.<br>
- Subtarget features can be tweaked from the command line.<br>
<br>
To discourage people from using CPU checks in the future I removed the<br>
isCortexXX(), isCyclone(), ... functions. I added an getProcFamily()<br>
function for exceptional circumstances but made it clear in the comment<br>
that usage is discouraged.<br>
<br>
Reformat feature list in AArch64.td to have 1 feature per line in<br>
alphabetical order to simplify merging and sorting for out of tree<br>
tweaks.<br>
<br>
No functional change intended.<br>
<br>
Differential Revision: <a href="http://reviews.llvm.org/D20762" rel="noreferrer" target="_blank">http://reviews.llvm.org/D20762</a><br>
<br>
Modified:<br>
    llvm/trunk/lib/Target/AArch64/AArch64.td<br>
    llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp<br>
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp<br>
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp<br>
    llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td<br>
    llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp<br>
    llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp<br>
    llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h<br>
    llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp<br>
    llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.td?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64.td (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64.td Thu Jun  2 13:03:53 2016<br>
@@ -58,6 +58,50 @@ def FeatureReserveX18 : SubtargetFeature<br>
                                          "Reserve X18, making it unavailable "<br>
                                          "as a GPR">;<br>
<br>
+def FeatureMergeNarrowLd : SubtargetFeature<"merge-narrow-ld",<br>
+                                            "MergeNarrowLoads", "true",<br>
+                                            "Merge narrow load instructions">;<br>
+<br>
+def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true",<br>
+                                    "Use alias analysis during codegen">;<br>
+<br>
+def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps",<br>
+    "true",<br>
+    "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">;<br>
+<br>
+def FeaturePredictableSelectIsExpensive : SubtargetFeature<<br>
+    "predictable-select-expensive", "PredictableSelectIsExpensive", "true",<br>
+    "Prefer likely predicted branches over selects">;<br>
+<br>
+def FeatureCustomCheapAsMoveHandling : SubtargetFeature<"custom-cheap-as-move",<br>
+    "CustomAsCheapAsMove", "true",<br>
+    "Use custom code for TargetInstrInfo::isAsCheapAsAMove()">;<br>
+<br>
+def FeaturePostRAScheduler : SubtargetFeature<"use-postra-scheduler",<br>
+    "UsePostRAScheduler", "true", "Schedule again after register allocation">;<br>
+<br>
+def FeatureSlowMisaligned128Store : SubtargetFeature<"slow-misaligned-128store",<br>
+    "Misaligned128StoreIsSlow", "true", "Misaligned 128 bit stores are slow">;<br>
+<br>
+def FeatureAvoidQuadLdStPairs : SubtargetFeature<"no-quad-ldst-pairs",<br>
+    "AvoidQuadLdStPairs", "true",<br>
+    "Do not form quad load/store pair operations">;<br>
+<br>
+def FeatureAlternateSExtLoadCVTF32Pattern : SubtargetFeature<<br>
+    "alternate-sextload-cvt-f32-pattern", "UseAlternateSExtLoadCVTF32Pattern",<br>
+    "true", "Use alternative pattern for sextload convert to f32">;<br>
+<br>
+def FeatureMacroOpFusion : SubtargetFeature<<br>
+    "macroop-fusion", "HasMacroOpFusion", "true",<br>
+    "CPU supports macro op fusion">;<br>
+<br>
+def FeatureDisableLatencySchedHeuristic : SubtargetFeature<<br>
+    "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true",<br>
+    "Disable latency scheduling heuristic">;<br>
+<br>
+def FeatureUseRSqrt : SubtargetFeature<<br>
+    "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square root">;<br>
+<br>
 //===----------------------------------------------------------------------===//<br>
 // Architectures.<br>
 //<br>
@@ -94,57 +138,87 @@ include "AArch64SchedM1.td"<br>
 include "AArch64SchedKryo.td"<br>
<br>
 def ProcA35     : SubtargetFeature<"a35", "ARMProcFamily", "CortexA35",<br>
-                                   "Cortex-A35 ARM processors",<br>
-                                   [FeatureFPARMv8,<br>
-                                   FeatureNEON,<br>
-                                   FeatureCrypto,<br>
+                                   "Cortex-A35 ARM processors", [<br>
                                    FeatureCRC,<br>
-                                   FeaturePerfMon]>;<br>
+                                   FeatureCrypto,<br>
+                                   FeatureFPARMv8,<br>
+                                   FeatureNEON,<br>
+                                   FeaturePerfMon<br>
+                                   ]>;<br>
<br>
 def ProcA53     : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53",<br>
-                                   "Cortex-A53 ARM processors",<br>
-                                   [FeatureFPARMv8,<br>
-                                   FeatureNEON,<br>
-                                   FeatureCrypto,<br>
+                                   "Cortex-A53 ARM processors", [<br>
+                                   FeatureBalanceFPOps,<br>
                                    FeatureCRC,<br>
-                                   FeaturePerfMon]>;<br>
+                                   FeatureCrypto,<br>
+                                   FeatureCustomCheapAsMoveHandling,<br>
+                                   FeatureFPARMv8,<br>
+                                   FeatureNEON,<br>
+                                   FeaturePerfMon,<br>
+                                   FeaturePostRAScheduler,<br>
+                                   FeatureUseAA<br>
+                                   ]>;<br>
<br>
 def ProcA57     : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57",<br>
-                                   "Cortex-A57 ARM processors",<br>
-                                   [FeatureFPARMv8,<br>
-                                   FeatureNEON,<br>
-                                   FeatureCrypto,<br>
+                                   "Cortex-A57 ARM processors", [<br>
+                                   FeatureBalanceFPOps,<br>
                                    FeatureCRC,<br>
-                                   FeaturePerfMon]>;<br>
+                                   FeatureCrypto,<br>
+                                   FeatureCustomCheapAsMoveHandling,<br>
+                                   FeatureFPARMv8,<br>
+                                   FeatureMergeNarrowLd,<br>
+                                   FeatureNEON,<br>
+                                   FeaturePerfMon,<br>
+                                   FeaturePostRAScheduler,<br>
+                                   FeaturePredictableSelectIsExpensive<br>
+                                   ]>;<br>
<br>
 def ProcCyclone : SubtargetFeature<"cyclone", "ARMProcFamily", "Cyclone",<br>
-                                   "Cyclone",<br>
-                                   [FeatureFPARMv8,<br>
-                                   FeatureNEON,<br>
+                                   "Cyclone", [<br>
+                                   FeatureAlternateSExtLoadCVTF32Pattern,<br>
                                    FeatureCrypto,<br>
+                                   FeatureDisableLatencySchedHeuristic,<br>
+                                   FeatureFPARMv8,<br>
+                                   FeatureMacroOpFusion,<br>
+                                   FeatureNEON,<br>
                                    FeaturePerfMon,<br>
-                                   FeatureZCRegMove, FeatureZCZeroing]>;<br>
+                                   FeatureSlowMisaligned128Store,<br>
+                                   FeatureZCRegMove,<br>
+                                   FeatureZCZeroing<br>
+                                   ]>;<br>
<br>
 def ProcExynosM1 : SubtargetFeature<"exynosm1", "ARMProcFamily", "ExynosM1",<br>
-                                    "Samsung Exynos-M1 processors",<br>
-                                    [FeatureFPARMv8,<br>
-                                    FeatureNEON,<br>
-                                    FeatureCrypto,<br>
+                                    "Samsung Exynos-M1 processors", [<br>
+                                    FeatureAvoidQuadLdStPairs,<br>
                                     FeatureCRC,<br>
-                                    FeaturePerfMon]>;<br>
+                                    FeatureCrypto,<br>
+                                    FeatureCustomCheapAsMoveHandling,<br>
+                                    FeatureFPARMv8,<br>
+                                    FeatureNEON,<br>
+                                    FeaturePerfMon,<br>
+                                    FeatureUseRSqrt<br>
+                                    ]>;<br>
<br>
 def ProcKryo    : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo",<br>
-                                   "Qualcomm Kryo processors",<br>
-                                   [FeatureFPARMv8,<br>
-                                   FeatureNEON,<br>
-                                   FeatureCrypto,<br>
+                                   "Qualcomm Kryo processors", [<br>
                                    FeatureCRC,<br>
-                                   FeaturePerfMon]>;<br>
-<br>
-def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8,<br>
-                                              FeatureNEON,<br>
-                                              FeatureCRC,<br>
-                                              FeaturePerfMon]>;<br>
+                                   FeatureCrypto,<br>
+                                   FeatureCustomCheapAsMoveHandling,<br>
+                                   FeatureFPARMv8,<br>
+                                   FeatureMergeNarrowLd,<br>
+                                   FeatureNEON,<br>
+                                   FeaturePerfMon,<br>
+                                   FeaturePostRAScheduler,<br>
+                                   FeaturePredictableSelectIsExpensive<br>
+                                   ]>;<br>
+<br>
+def : ProcessorModel<"generic", NoSchedModel, [<br>
+                     FeatureCRC,<br>
+                     FeatureFPARMv8,<br>
+                     FeatureNEON,<br>
+                     FeaturePerfMon,<br>
+                     FeaturePostRAScheduler<br>
+                     ]>;<br>
<br>
 // FIXME: Cortex-A35 is currently modelled as a Cortex-A53<br>
 def : ProcessorModel<"cortex-a35", CortexA53Model, [ProcA35]>;<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64A57FPLoadBalancing.cpp Thu Jun  2 13:03:53 2016<br>
@@ -314,9 +314,7 @@ bool AArch64A57FPLoadBalancing::runOnMac<br>
   if (skipFunction(*F.getFunction()))<br>
     return false;<br>
<br>
-  // Don't do anything if this isn't an A53 or A57.<br>
-  if (!(F.getSubtarget<AArch64Subtarget>().isCortexA53() ||<br>
-        F.getSubtarget<AArch64Subtarget>().isCortexA57()))<br>
+  if (!F.getSubtarget<AArch64Subtarget>().balanceFPOps())<br>
     return false;<br>
<br>
   bool Changed = false;<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Thu Jun  2 13:03:53 2016<br>
@@ -634,9 +634,7 @@ AArch64TargetLowering::AArch64TargetLowe<br>
     }<br>
   }<br>
<br>
-  // Prefer likely predicted branches to selects on out-of-order cores.<br>
-  if (Subtarget->isCortexA57() || Subtarget->isKryo())<br>
-    PredictableSelectIsExpensive = true;<br>
+  PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();<br>
 }<br>
<br>
 void AArch64TargetLowering::addTypeForNEON(MVT VT, MVT PromotedBitwiseVT) {<br>
@@ -814,12 +812,9 @@ bool AArch64TargetLowering::allowsMisali<br>
   if (Subtarget->requiresStrictAlign())<br>
     return false;<br>
<br>
-  // FIXME: This is mostly true for Cyclone, but not necessarily others.<br>
   if (Fast) {<br>
-    // FIXME: Define an attribute for slow unaligned accesses instead of<br>
-    // relying on the CPU type as a proxy.<br>
-    // On Cyclone, unaligned 128-bit stores are slow.<br>
-    *Fast = !Subtarget->isCyclone() || VT.getStoreSize() != 16 ||<br>
+    // Some CPUs are fine with unaligned stores except for 128-bit ones.<br>
+    *Fast = !Subtarget->isMisaligned128StoreSlow() || VT.getStoreSize() != 16 ||<br>
             // See comments in performSTORECombine() for more details about<br>
             // these conditions.<br>
<br>
@@ -8792,9 +8787,7 @@ static SDValue split16BStores(SDNode *N,<br>
   // be included in TLI.allowsMisalignedMemoryAccesses(), and there should be<br>
   // a call to that function here.<br>
<br>
-  // Cyclone has bad performance on unaligned 16B stores when crossing line and<br>
-  // page boundaries. We want to split such stores.<br>
-  if (!Subtarget->isCyclone())<br>
+  if (!Subtarget->isMisaligned128StoreSlow())<br>
     return SDValue();<br>
<br>
   // Don't split at -Oz.<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.cpp Thu Jun  2 13:03:53 2016<br>
@@ -544,8 +544,7 @@ static bool canBeExpandedToORR(const Mac<br>
 // FIXME: this implementation should be micro-architecture dependent, so a<br>
 // micro-architecture target hook should be introduced here in future.<br>
 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr *MI) const {<br>
-  if (!Subtarget.isCortexA57() && !Subtarget.isCortexA53() &&<br>
-      !Subtarget.isExynosM1() && !Subtarget.isKryo())<br>
+  if (!Subtarget.hasCustomCheapAsMoveHandling())<br>
     return MI->isAsCheapAsAMove();<br>
<br>
   unsigned Imm;<br>
@@ -559,7 +558,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(<br>
   case AArch64::ADDXri:<br>
   case AArch64::SUBWri:<br>
   case AArch64::SUBXri:<br>
-    return (Subtarget.isExynosM1() ||<br>
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 ||<br>
             MI->getOperand(3).getImm() == 0);<br>
<br>
   // add/sub on register with shift<br>
@@ -568,7 +567,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(<br>
   case AArch64::SUBWrs:<br>
   case AArch64::SUBXrs:<br>
     Imm = MI->getOperand(3).getImm();<br>
-    return (Subtarget.isExynosM1() &&<br>
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&<br>
             AArch64_AM::getArithShiftValue(Imm) < 4);<br>
<br>
   // logical ops on immediate<br>
@@ -609,7 +608,7 @@ bool AArch64InstrInfo::isAsCheapAsAMove(<br>
   case AArch64::ORRWrs:<br>
   case AArch64::ORRXrs:<br>
     Imm = MI->getOperand(3).getImm();<br>
-    return (Subtarget.isExynosM1() &&<br>
+    return (Subtarget.getProcFamily() == AArch64Subtarget::ExynosM1 &&<br>
             AArch64_AM::getShiftValue(Imm) < 4 &&<br>
             AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL);<br>
<br>
@@ -1522,8 +1521,8 @@ bool AArch64InstrInfo::isCandidateToMerg<br>
   if (isLdStPairSuppressed(MI))<br>
     return false;<br>
<br>
-  // Do not pair quad ld/st for Exynos.<br>
-  if (Subtarget.isExynosM1()) {<br>
+  // On some CPUs quad load/store pairs are slower than two single load/stores.<br>
+  if (Subtarget.avoidQuadLdStPairs()) {<br>
     switch (MI->getOpcode()) {<br>
     default:<br>
       break;<br>
@@ -1801,8 +1800,8 @@ bool AArch64InstrInfo::shouldClusterMemO<br>
<br>
 bool AArch64InstrInfo::shouldScheduleAdjacent(MachineInstr *First,<br>
                                               MachineInstr *Second) const {<br>
-  if (Subtarget.isCyclone()) {<br>
-    // Cyclone can fuse CMN, CMP, TST followed by Bcc.<br>
+  if (Subtarget.hasMacroOpFusion()) {<br>
+    // Fuse CMN, CMP, TST followed by Bcc.<br>
     unsigned SecondOpcode = Second->getOpcode();<br>
     if (SecondOpcode == AArch64::Bcc) {<br>
       switch (First->getOpcode()) {<br>
@@ -1817,7 +1816,7 @@ bool AArch64InstrInfo::shouldScheduleAdj<br>
         return true;<br>
       }<br>
     }<br>
-    // Cyclone B0 also supports ALU operations followed by CBZ/CBNZ.<br>
+    // Fuse ALU operations followed by CBZ/CBNZ.<br>
     if (SecondOpcode == AArch64::CBNZW || SecondOpcode == AArch64::CBNZX ||<br>
         SecondOpcode == AArch64::CBZW || SecondOpcode == AArch64::CBZX) {<br>
       switch (First->getOpcode()) {<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64InstrInfo.td Thu Jun  2 13:03:53 2016<br>
@@ -34,7 +34,8 @@ def HasSPE           : Predicate<"Subtar<br>
<br>
 def IsLE             : Predicate<"Subtarget->isLittleEndian()">;<br>
 def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;<br>
-def IsCyclone        : Predicate<"Subtarget->isCyclone()">;<br>
+def UseAlternateSExtLoadCVTF32<br>
+    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;<br>
<br>
 //===----------------------------------------------------------------------===//<br>
 // AArch64-specific DAG Nodes.<br>
@@ -4957,7 +4958,8 @@ class SExtLoadi8CVTf32Pat<dag addrmode,<br>
                                     0),<br>
                                   dsub)),<br>
                                0),<br>
-                             ssub)))>, Requires<[NotForCodeSize, IsCyclone]>;<br>
+                             ssub)))>,<br>
+    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;<br>
<br>
 def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),<br>
                           (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;<br>
@@ -5010,7 +5012,8 @@ class SExtLoadi16CVTf64Pat<dag addrmode,<br>
                                      0),<br>
                                    dsub)),<br>
                                0),<br>
-                             dsub)))>, Requires<[NotForCodeSize, IsCyclone]>;<br>
+                             dsub)))>,<br>
+    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32]>;<br>
<br>
 def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),<br>
                            (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp Thu Jun  2 13:03:53 2016<br>
@@ -160,10 +160,6 @@ struct AArch64LoadStoreOpt : public Mach<br>
   // Find and promote load instructions which read directly from store.<br>
   bool tryToPromoteLoadFromStore(MachineBasicBlock::iterator &MBBI);<br>
<br>
-  // Check if converting two narrow loads into a single wider load with<br>
-  // bitfield extracts could be enabled.<br>
-  bool enableNarrowLdMerge(MachineFunction &Fn);<br>
-<br>
   bool optimizeBlock(MachineBasicBlock &MBB, bool enableNarrowLdOpt);<br>
<br>
   bool runOnMachineFunction(MachineFunction &Fn) override;<br>
@@ -1912,15 +1908,6 @@ bool AArch64LoadStoreOpt::optimizeBlock(<br>
   return Modified;<br>
 }<br>
<br>
-bool AArch64LoadStoreOpt::enableNarrowLdMerge(MachineFunction &Fn) {<br>
-  bool ProfitableArch = Subtarget->isCortexA57() || Subtarget->isKryo();<br>
-  // FIXME: The benefit from converting narrow loads into a wider load could be<br>
-  // microarchitectural as it assumes that a single load with two bitfield<br>
-  // extracts is cheaper than two narrow loads. Currently, this conversion is<br>
-  // enabled only in cortex-a57 on which performance benefits were verified.<br>
-  return ProfitableArch && !Subtarget->requiresStrictAlign();<br>
-}<br>
-<br>
 bool AArch64LoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {<br>
   if (skipFunction(*Fn.getFunction()))<br>
     return false;<br>
@@ -1936,7 +1923,8 @@ bool AArch64LoadStoreOpt::runOnMachineFu<br>
   UsedRegs.resize(TRI->getNumRegs());<br>
<br>
   bool Modified = false;<br>
-  bool enableNarrowLdOpt = enableNarrowLdMerge(Fn);<br>
+  bool enableNarrowLdOpt =<br>
+    Subtarget->mergeNarrowLoads() && !Subtarget->requiresStrictAlign();<br>
   for (auto &MBB : Fn)<br>
     Modified |= optimizeBlock(MBB, enableNarrowLdOpt);<br>
<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.cpp Thu Jun  2 13:03:53 2016<br>
@@ -44,9 +44,36 @@ AArch64Subtarget::initializeSubtargetDep<br>
     CPUString = "generic";<br>
<br>
   ParseSubtargetFeatures(CPUString, FS);<br>
+  initializeProperties();<br>
+<br>
   return *this;<br>
 }<br>
<br>
+void AArch64Subtarget::initializeProperties() {<br>
+  // Initialize CPU specific properties. We should add a tablegen feature for<br>
+  // this in the future so we can specify it together with the subtarget<br>
+  // features.<br>
+  switch (ARMProcFamily) {<br>
+  case Cyclone:<br>
+    CacheLineSize = 64;<br>
+    PrefetchDistance = 280;<br>
+    MinPrefetchStride = 2048;<br>
+    MaxPrefetchIterationsAhead = 3;<br>
+    break;<br>
+  case CortexA57:<br>
+    MaxInterleaveFactor = 4;<br>
+    break;<br>
+  case Kryo:<br>
+    MaxInterleaveFactor = 4;<br>
+    VectorInsertExtractBaseCost = 2;<br>
+    break;<br>
+  case Others: break;<br>
+  case CortexA35: break;<br>
+  case CortexA53: break;<br>
+  case ExynosM1: break;<br>
+  }<br>
+}<br>
+<br>
 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,<br>
                                    const std::string &FS,<br>
                                    const TargetMachine &TM, bool LittleEndian)<br>
@@ -110,8 +137,7 @@ void AArch64Subtarget::overrideSchedPoli<br>
   // Enabling or Disabling the latency heuristic is a close call: It seems to<br>
   // help nearly no benchmark on out-of-order architectures, on the other hand<br>
   // it regresses register pressure on a few benchmarking.<br>
-  if (isCyclone())<br>
-    Policy.DisableLatencyHeuristic = true;<br>
+  Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;<br>
 }<br>
<br>
 bool AArch64Subtarget::enableEarlyIfConversion() const {<br>
@@ -133,8 +159,5 @@ bool AArch64Subtarget::supportsAddressTo<br>
<br>
 std::unique_ptr<PBQPRAConstraint><br>
 AArch64Subtarget::getCustomPBQPConstraints() const {<br>
-  if (!isCortexA57())<br>
-    return nullptr;<br>
-<br>
-  return llvm::make_unique<A57ChainingConstraint>();<br>
+  return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;<br>
 }<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64Subtarget.h Thu Jun  2 13:03:53 2016<br>
@@ -33,8 +33,8 @@ class StringRef;<br>
 class Triple;<br>
<br>
 class AArch64Subtarget : public AArch64GenSubtargetInfo {<br>
-protected:<br>
-  enum ARMProcFamilyEnum {<br>
+public:<br>
+  enum ARMProcFamilyEnum : uint8_t {<br>
     Others,<br>
     CortexA35,<br>
     CortexA53,<br>
@@ -44,6 +44,7 @@ protected:<br>
     Kryo<br>
   };<br>
<br>
+protected:<br>
   /// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.<br>
   ARMProcFamilyEnum ARMProcFamily = Others;<br>
<br>
@@ -66,6 +67,24 @@ protected:<br>
<br>
   // StrictAlign - Disallow unaligned memory accesses.<br>
   bool StrictAlign = false;<br>
+  bool MergeNarrowLoads = false;<br>
+  bool UseAA = false;<br>
+  bool PredictableSelectIsExpensive = false;<br>
+  bool BalanceFPOps = false;<br>
+  bool CustomAsCheapAsMove = false;<br>
+  bool UsePostRAScheduler = false;<br>
+  bool Misaligned128StoreIsSlow = false;<br>
+  bool AvoidQuadLdStPairs = false;<br>
+  bool UseAlternateSExtLoadCVTF32Pattern = false;<br>
+  bool HasMacroOpFusion = false;<br>
+  bool DisableLatencySchedHeuristic = false;<br>
+  bool UseRSqrt = false;<br>
+  uint8_t MaxInterleaveFactor = 2;<br>
+  uint8_t VectorInsertExtractBaseCost = 3;<br>
+  uint16_t CacheLineSize = 0;<br>
+  uint16_t PrefetchDistance = 0;<br>
+  uint16_t MinPrefetchStride = 1;<br>
+  unsigned MaxPrefetchIterationsAhead = UINT_MAX;<br>
<br>
   // ReserveX18 - X18 is not available as a general purpose register.<br>
   bool ReserveX18;<br>
@@ -93,6 +112,9 @@ private:<br>
   /// subtarget initialization.<br>
   AArch64Subtarget &initializeSubtargetDependencies(StringRef FS);<br>
<br>
+  /// Initialize properties based on the selected processor family.<br>
+  void initializeProperties();<br>
+<br>
 public:<br>
   /// This constructor initializes the data members to match that<br>
   /// of the specified triple.<br>
@@ -123,7 +145,15 @@ public:<br>
   const Triple &getTargetTriple() const { return TargetTriple; }<br>
   bool enableMachineScheduler() const override { return true; }<br>
   bool enablePostRAScheduler() const override {<br>
-    return isGeneric() || isCortexA53() || isCortexA57() || isKryo();<br>
+    return UsePostRAScheduler;<br>
+  }<br>
+<br>
+  /// Returns ARM processor family.<br>
+  /// Avoid this function! CPU specifics should be kept local to this class<br>
+  /// and preferably modeled with SubtargetFeatures or properties in<br>
+  /// initializeProperties().<br>
+  ARMProcFamilyEnum getProcFamily() const {<br>
+    return ARMProcFamily;<br>
   }<br>
<br>
   bool hasV8_1aOps() const { return HasV8_1aOps; }<br>
@@ -140,6 +170,30 @@ public:<br>
   bool hasNEON() const { return HasNEON; }<br>
   bool hasCrypto() const { return HasCrypto; }<br>
   bool hasCRC() const { return HasCRC; }<br>
+  bool mergeNarrowLoads() const { return MergeNarrowLoads; }<br>
+  bool balanceFPOps() const { return BalanceFPOps; }<br>
+  bool predictableSelectIsExpensive() const {<br>
+    return PredictableSelectIsExpensive;<br>
+  }<br>
+  bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }<br>
+  bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }<br>
+  bool avoidQuadLdStPairs() const { return AvoidQuadLdStPairs; }<br>
+  bool useAlternateSExtLoadCVTF32Pattern() const {<br>
+    return UseAlternateSExtLoadCVTF32Pattern;<br>
+  }<br>
+  bool hasMacroOpFusion() const { return HasMacroOpFusion; }<br>
+  bool useRSqrt() const { return UseRSqrt; }<br>
+  unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }<br>
+  unsigned getVectorInsertExtractBaseCost() const {<br>
+    return VectorInsertExtractBaseCost;<br>
+  }<br>
+  unsigned getCacheLineSize() const { return CacheLineSize; }<br>
+  unsigned getPrefetchDistance() const { return PrefetchDistance; }<br>
+  unsigned getMinPrefetchStride() const { return MinPrefetchStride; }<br>
+  unsigned getMaxPrefetchIterationsAhead() const {<br>
+    return MaxPrefetchIterationsAhead;<br>
+  }<br>
+<br>
   /// CPU has TBI (top byte of addresses is ignored during HW address<br>
   /// translation) and OS enables it.<br>
   bool supportsAddressTopByteIgnored() const;<br>
@@ -160,14 +214,7 @@ public:<br>
   bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }<br>
   bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }<br>
<br>
-  bool isGeneric() const { return CPUString == "generic"; }<br>
-  bool isCyclone() const { return CPUString == "cyclone"; }<br>
-  bool isCortexA57() const { return CPUString == "cortex-a57"; }<br>
-  bool isCortexA53() const { return CPUString == "cortex-a53"; }<br>
-  bool isExynosM1() const { return CPUString == "exynos-m1"; }<br>
-  bool isKryo() const { return CPUString == "kryo"; }<br>
-<br>
-  bool useAA() const override { return isCortexA53(); }<br>
+  bool useAA() const override { return UseAA; }<br>
<br>
   /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size<br>
   /// that still makes it profitable to inline the call.<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Thu Jun  2 13:03:53 2016<br>
@@ -147,8 +147,7 @@ static void initReciprocals(AArch64Targe<br>
   // (52 mantissa bits) are 2 and 3, respectively.<br>
   unsigned ExtraStepsF = 2,<br>
            ExtraStepsD = ExtraStepsF + 1;<br>
-  // FIXME: Enable x^-1/2 only for Exynos M1 at the moment.<br>
-  bool UseRsqrt = ST.isExynosM1();<br>
+  bool UseRsqrt = ST.useRSqrt();<br>
<br>
   TM.Options.Reciprocals.setDefaults("sqrtf", UseRsqrt, ExtraStepsF);<br>
   TM.Options.Reciprocals.setDefaults("sqrtd", UseRsqrt, ExtraStepsD);<br>
<br>
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp?rev=271555&r1=271554&r2=271555&view=diff</a><br>
==============================================================================<br>
--- llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (original)<br>
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetTransformInfo.cpp Thu Jun  2 13:03:53 2016<br>
@@ -368,9 +368,7 @@ int AArch64TTIImpl::getVectorInstrCost(u<br>
   }<br>
<br>
   // All other insert/extracts cost this much.<br>
-  if (ST->isKryo())<br>
-    return 2;<br>
-  return 3;<br>
+  return ST->getVectorInsertExtractBaseCost();<br>
 }<br>
<br>
 int AArch64TTIImpl::getArithmeticInstrCost(<br>
@@ -529,9 +527,7 @@ int AArch64TTIImpl::getCostOfKeepingLive<br>
 }<br>
<br>
 unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {<br>
-  if (ST->isCortexA57() || ST->isKryo())<br>
-    return 4;<br>
-  return 2;<br>
+  return ST->getMaxInterleaveFactor();<br>
 }<br>
<br>
 void AArch64TTIImpl::getUnrollingPreferences(Loop *L,<br>
@@ -630,28 +626,17 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(<br>
 }<br>
<br>
 unsigned AArch64TTIImpl::getCacheLineSize() {<br>
-  if (ST->isCyclone())<br>
-    return 64;<br>
-  return BaseT::getCacheLineSize();<br>
+  return ST->getCacheLineSize();<br>
 }<br>
<br>
 unsigned AArch64TTIImpl::getPrefetchDistance() {<br>
-  if (ST->isCyclone())<br>
-    return 280;<br>
-  return BaseT::getPrefetchDistance();<br>
+  return ST->getPrefetchDistance();<br>
 }<br>
<br>
 unsigned AArch64TTIImpl::getMinPrefetchStride() {<br>
-  if (ST->isCyclone())<br>
-    // The HW prefetcher handles accesses with strides up to 2KB.<br>
-    return 2048;<br>
-  return BaseT::getMinPrefetchStride();<br>
+  return ST->getMinPrefetchStride();<br>
 }<br>
<br>
 unsigned AArch64TTIImpl::getMaxPrefetchIterationsAhead() {<br>
-  if (ST->isCyclone())<br>
-    // Be conservative for now and don't prefetch ahead too much since the loop<br>
-    // may terminate early.<br>
-    return 3;<br>
-  return BaseT::getMaxPrefetchIterationsAhead();<br>
+  return ST->getMaxPrefetchIterationsAhead();<br>
 }<br>
<br>
<br>
_______________________________________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits</a><br>
</blockquote></div>