[llvm] r274620 - [ARM] Do not test for CPUs, use SubtargetFeatures. Also remove 2 flags.

Wed Jul 6 04:22:13 PDT 2016

Author: rovka
Date: Wed Jul  6 06:22:11 2016
New Revision: 274620

URL: http://llvm.org/viewvc/llvm-project?rev=274620&view=rev
Log:
[ARM] Do not test for CPUs, use SubtargetFeatures. Also remove 2 flags.

This is a follow-up for r273544.

The end goal is to get rid of the isSwift / isCortexXY / isWhatever methods.

This commit also removes two command-line flags that weren't used in any of the
tests: widen-vmovs and swift-partial-update-clearance. The former may be easily
replaced with the mattr mechanism, but the latter may not (as it is a subtarget
property, and not a proper feature).

Differential Revision: http://reviews.llvm.org/D21797

Modified:
    llvm/trunk/lib/Target/ARM/ARM.td
    llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
    llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
    llvm/trunk/lib/Target/ARM/ARMSubtarget.h
    llvm/trunk/test/CodeGen/ARM/widen-vmovs.ll

Modified: llvm/trunk/lib/Target/ARM/ARM.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARM.td?rev=274620&r1=274619&r2=274620&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARM.td (original)
+++ llvm/trunk/lib/Target/ARM/ARM.td Wed Jul  6 06:22:11 2016
@@ -145,6 +145,10 @@ def FeatureSlowOddRegister : SubtargetFe
 def FeatureSlowLoadDSubreg : SubtargetFeature<"slow-load-D-subreg",
                                               "SlowLoadDSubregister", "true",
                                               "Loading into D subregs is slow">;
+// Some targets (e.g. Cortex-A15) never want VMOVS to be widened to VMOVD.
+def FeatureDontWidenVMOVS : SubtargetFeature<"dont-widen-vmovs",
+                                             "DontWidenVMOVS", "true",
+                                             "Don't widen VMOVS to VMOVD">;
 
 // Some targets (e.g. Cortex-A9) want to convert VMOVRS, VMOVSR and VMOVS from
 // VFP to NEON, as an execution domain optimization.
@@ -612,6 +616,7 @@ def : ProcessorModel<"cortex-a12",  Cort
 
 // FIXME: A15 has currently the same Schedule model as A9.
 def : ProcessorModel<"cortex-a15",  CortexA9Model,      [ARMv7a, ProcA15,
+                                                         FeatureDontWidenVMOVS,
                                                          FeatureHasRetAddrStack,
                                                          FeatureMuxedUnits,
                                                          FeatureTrustZone,

Modified: llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp?rev=274620&r1=274619&r2=274620&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMBaseInstrInfo.cpp Wed Jul  6 06:22:11 2016
@@ -51,15 +51,6 @@ static cl::opt<bool>
 EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
                cl::desc("Enable ARM 2-addr to 3-addr conv"));
 
-static cl::opt<bool>
-WidenVMOVS("widen-vmovs", cl::Hidden, cl::init(true),
-           cl::desc("Widen ARM vmovs to vmovd when possible"));
-
-static cl::opt<unsigned>
-SwiftPartialUpdateClearance("swift-partial-update-clearance",
-     cl::Hidden, cl::init(12),
-     cl::desc("Clearance before partial register updates"));
-
 /// ARM_MLxEntry - Record information about MLA / MLS instructions.
 struct ARM_MLxEntry {
   uint16_t MLxOpc;     // MLA / MLS opcode
@@ -1305,8 +1296,7 @@ bool ARMBaseInstrInfo::expandPostRAPseud
   // copyPhysReg() calls.  Look for VMOVS instructions that can legally be
   // widened to VMOVD.  We prefer the VMOVD when possible because it may be
   // changed into a VORR that can go down the NEON pipeline.
-  if (!WidenVMOVS || !MI.isCopy() || Subtarget.isCortexA15() ||
-      Subtarget.isFPOnlySP())
+  if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || Subtarget.isFPOnlySP())
     return false;
 
   // Look for a copy between even S-registers.  That is where we keep floats
@@ -4492,8 +4482,8 @@ void ARMBaseInstrInfo::setExecutionDomai
 unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(
     const MachineInstr &MI, unsigned OpNum,
     const TargetRegisterInfo *TRI) const {
-  if (!SwiftPartialUpdateClearance ||
-      !(Subtarget.isSwift() || Subtarget.isCortexA15()))
+  auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();
+  if (!PartialUpdateClearance)
     return 0;
 
   assert(TRI && "Need TRI instance");
@@ -4545,7 +4535,7 @@ unsigned ARMBaseInstrInfo::getPartialReg
 
   // MI has an unwanted D-register dependency.
   // Avoid defs in the previous N instructrions.
-  return SwiftPartialUpdateClearance;
+  return PartialUpdateClearance;
 }
 
 // Break a partial register dependency after getPartialRegUpdateClearance

Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp?rev=274620&r1=274619&r2=274620&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.cpp Wed Jul  6 06:22:11 2016
@@ -219,6 +219,7 @@ void ARMSubtarget::initSubtargetFeatures
   case CortexA15:
     MaxInterleaveFactor = 2;
     PreISelOperandLatencyAdjustment = 1;
+    PartialUpdateClearance = 12;
     break;
   case CortexA17:
   case CortexA32:
@@ -241,6 +242,7 @@ void ARMSubtarget::initSubtargetFeatures
     MaxInterleaveFactor = 2;
     LdStMultipleTiming = SingleIssuePlusExtras;
     PreISelOperandLatencyAdjustment = 1;
+    PartialUpdateClearance = 12;
     break;
   }
 }

Modified: llvm/trunk/lib/Target/ARM/ARMSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMSubtarget.h?rev=274620&r1=274619&r2=274620&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMSubtarget.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMSubtarget.h Wed Jul  6 06:22:11 2016
@@ -259,6 +259,9 @@ protected:
   /// If true, the AGU and NEON/FPU units are multiplexed.
   bool HasMuxedUnits = false;
 
+  /// If true, VMOVS will never be widened to VMOVD
+  bool DontWidenVMOVS = false;
+
   /// If true, VMOVRS, VMOVSR and VMOVS will be converted from VFP to NEON.
   bool UseNEONForFPMovs = false;
 
@@ -302,6 +305,9 @@ protected:
 
   unsigned MaxInterleaveFactor = 1;
 
+  /// Clearance before partial register updates (in number of instructions)
+  unsigned PartialUpdateClearance = 0;
+
   /// What kind of timing do load multiple/store multiple have (double issue,
   /// single issue etc).
   ARMLdStMultipleTiming LdStMultipleTiming = SingleIssue;
@@ -448,6 +454,7 @@ public:
   bool hasSlowOddRegister() const { return SlowOddRegister; }
   bool hasSlowLoadDSubregister() const { return SlowLoadDSubregister; }
   bool hasMuxedUnits() const { return HasMuxedUnits; }
+  bool dontWidenVMOVS() const { return DontWidenVMOVS; }
   bool useNEONForFPMovs() const { return UseNEONForFPMovs; }
   bool checkVLDnAccessAlignment() const { return CheckVLDnAlign; }
   bool nonpipelinedVFP() const { return NonpipelinedVFP; }
@@ -591,6 +598,8 @@ public:
 
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
 
+  unsigned getPartialUpdateClearance() const { return PartialUpdateClearance; }
+
   ARMLdStMultipleTiming getLdStMultipleTiming() const {
     return LdStMultipleTiming;
   }

Modified: llvm/trunk/test/CodeGen/ARM/widen-vmovs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/widen-vmovs.ll?rev=274620&r1=274619&r2=274620&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/widen-vmovs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/widen-vmovs.ll Wed Jul  6 06:22:11 2016
@@ -1,4 +1,4 @@
-; RUN: llc < %s -widen-vmovs -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s
+; RUN: llc < %s -mcpu=cortex-a8 -verify-machineinstrs -disable-block-placement | FileCheck %s
 target triple = "thumbv7-apple-ios"
 
 ; The 1.0e+10 constant is loaded from the constant pool and kept in a register.