[llvm-commits] [llvm] r62074 - in /llvm/trunk: include/llvm/CodeGen/ScheduleDAG.h lib/CodeGen/ScheduleDAGEmit.cpp lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp test/CodeGen/X86/pr3244.ll

Evan Cheng evan.cheng at apple.com
Sun Jan 11 19:19:55 PST 2009


Author: evancheng
Date: Sun Jan 11 21:19:55 2009
New Revision: 62074

URL: http://llvm.org/viewvc/llvm-project?rev=62074&view=rev
Log:
Fix PR3241: Currently EmitCopyFromReg emits a copy from the physical register to a virtual register unless it requires an expensive cross class copy. That means we are only treating "expensive to copy" register dependency as physical register dependency.
Also future proof the scheduler to handle "normal" physical register dependencies. The code is not exercised yet.

Added:
    llvm/trunk/test/CodeGen/X86/pr3244.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
    llvm/trunk/lib/CodeGen/ScheduleDAGEmit.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp

Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h?rev=62074&r1=62073&r2=62074&view=diff

==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Sun Jan 11 21:19:55 2009
@@ -485,7 +485,7 @@
   protected:
     void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO);
 
-    void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
+    void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
 
     /// ForceUnitLatencies - Return true if all scheduling edges should be given a
     /// latency value of one.  The default is to return false; schedulers may

Modified: llvm/trunk/lib/CodeGen/ScheduleDAGEmit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/ScheduleDAGEmit.cpp?rev=62074&r1=62073&r2=62074&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/ScheduleDAGEmit.cpp (original)
+++ llvm/trunk/lib/CodeGen/ScheduleDAGEmit.cpp Sun Jan 11 21:19:55 2009
@@ -36,7 +36,7 @@
   TII->insertNoop(*BB, BB->end());
 }
 
-void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
+void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
                                   DenseMap<SUnit*, unsigned> &VRBaseMap) {
   for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
        I != E; ++I) {
@@ -49,12 +49,11 @@
       unsigned Reg = 0;
       for (SUnit::const_succ_iterator II = SU->Succs.begin(),
              EE = SU->Succs.end(); II != EE; ++II) {
-        if (I->getReg()) {
-          Reg = I->getReg();
+        if (II->getReg()) {
+          Reg = II->getReg();
           break;
         }
       }
-      assert(I->getReg() && "Unknown physical register!");
       TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second,
                         SU->CopyDstRC, SU->CopySrcRC);
     } else {

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp?rev=62074&r1=62073&r2=62074&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp Sun Jan 11 21:19:55 2009
@@ -28,7 +28,7 @@
 
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
 STATISTIC(NumDups,       "Number of duplicated nodes");
-STATISTIC(NumCCCopies,   "Number of cross class copies");
+STATISTIC(NumPRCopies,   "Number of physical copies");
 
 static RegisterScheduler
   fastDAGScheduler("fast", "Fast suboptimal list scheduling",
@@ -93,10 +93,10 @@
   void ReleasePred(SUnit *SU, SDep *PredEdge);
   void ScheduleNodeBottomUp(SUnit*, unsigned);
   SUnit *CopyAndMoveSuccessors(SUnit*);
-  void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
-                                  const TargetRegisterClass*,
-                                  const TargetRegisterClass*,
-                                  SmallVector<SUnit*, 2>&);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleBottomUp();
 
@@ -361,17 +361,16 @@
       DelDeps.push_back(std::make_pair(SuccSU, D));
     }
   }
-  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
-  }
 
   ++NumDups;
   return NewSU;
 }
 
-/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
-/// and move all scheduled successors of the given SUnit to the last copy.
-void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
                                               const TargetRegisterClass *DestRC,
                                               const TargetRegisterClass *SrcRC,
                                                SmallVector<SUnit*, 2> &Copies) {
@@ -408,7 +407,7 @@
   Copies.push_back(CopyFromSU);
   Copies.push_back(CopyToSU);
 
-  ++NumCCCopies;
+  ++NumPRCopies;
 }
 
 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
@@ -524,19 +523,22 @@
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
-        SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
+        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        const TargetRegisterClass *RC =
+          TRI->getPhysicalRegisterRegClass(Reg, VT);
+        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+        // If cross copy register class is null, then it must be possible copy
+        // the value directly. Do not try duplicate the def.
+        SUnit *NewDef = 0;
+        if (DestRC)
+          NewDef = CopyAndMoveSuccessors(LRDef);
+        else
+          DestRC = RC;
         if (!NewDef) {
-          // Issue expensive cross register class copies.
-          MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
-          const TargetRegisterClass *RC =
-            TRI->getPhysicalRegisterRegClass(Reg, VT);
-          const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-          if (!DestRC) {
-            assert(false && "Don't know how to copy this physical register!");
-            abort();
-          }
+          // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
-          InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
           DOUT << "Adding an edge from SU # " << TrySU->NodeNum
                << " to SU #" << Copies.front()->NodeNum << "\n";
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp?rev=62074&r1=62073&r2=62074&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Sun Jan 11 21:19:55 2009
@@ -35,7 +35,7 @@
 STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
 STATISTIC(NumUnfolds,    "Number of nodes unfolded");
 STATISTIC(NumDups,       "Number of duplicated nodes");
-STATISTIC(NumCCCopies,   "Number of cross class copies");
+STATISTIC(NumPRCopies,   "Number of physical register copies");
 
 static RegisterScheduler
   burrListDAGScheduler("list-burr",
@@ -121,10 +121,10 @@
   void UnscheduleNodeBottomUp(SUnit*);
   void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
   SUnit *CopyAndMoveSuccessors(SUnit*);
-  void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
-                                  const TargetRegisterClass*,
-                                  const TargetRegisterClass*,
-                                  SmallVector<SUnit*, 2>&);
+  void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
+                                const TargetRegisterClass*,
+                                const TargetRegisterClass*,
+                                SmallVector<SUnit*, 2>&);
   bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
   void ListScheduleTopDown();
   void ListScheduleBottomUp();
@@ -517,11 +517,11 @@
   return NewSU;
 }
 
-/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
-/// and move all scheduled successors of the given SUnit to the last copy.
-void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
-                                              const TargetRegisterClass *DestRC,
-                                              const TargetRegisterClass *SrcRC,
+/// InsertCopiesAndMoveSuccs - Insert register copies and move all
+/// scheduled successors of the given SUnit to the last copy.
+void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
+                                               const TargetRegisterClass *DestRC,
+                                               const TargetRegisterClass *SrcRC,
                                                SmallVector<SUnit*, 2> &Copies) {
   SUnit *CopyFromSU = CreateNewSUnit(NULL);
   CopyFromSU->CopySrcRC = SrcRC;
@@ -546,9 +546,8 @@
       DelDeps.push_back(std::make_pair(SuccSU, *I));
     }
   }
-  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
+  for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
     RemovePred(DelDeps[i].first, DelDeps[i].second);
-  }
 
   AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
   AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
@@ -559,7 +558,7 @@
   Copies.push_back(CopyFromSU);
   Copies.push_back(CopyToSU);
 
-  ++NumCCCopies;
+  ++NumPRCopies;
 }
 
 /// getPhysicalRegisterVT - Returns the ValueType of the physical register
@@ -705,27 +704,32 @@
       }
 
       if (!CurSU) {
-        // Can't backtrack. Try duplicating the nodes that produces these
-        // "expensive to copy" values to break the dependency. In case even
-        // that doesn't work, insert cross class copies.
+        // Can't backtrack. If it's too expensive to copy the value, then try
+        // duplicate the nodes that produces these "too expensive to copy"
+        // values to break the dependency. In case even that doesn't work,
+        // insert cross class copies.
+        // If it's not too expensive, i.e. cost != -1, issue copies.
         SUnit *TrySU = NotReady[0];
         SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
         assert(LRegs.size() == 1 && "Can't handle this yet!");
         unsigned Reg = LRegs[0];
         SUnit *LRDef = LiveRegDefs[Reg];
-        SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
+        MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+        const TargetRegisterClass *RC =
+          TRI->getPhysicalRegisterRegClass(Reg, VT);
+        const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+        // If cross copy register class is null, then it must be possible copy
+        // the value directly. Do not try duplicate the def.
+        SUnit *NewDef = 0;
+        if (DestRC)
+          NewDef = CopyAndMoveSuccessors(LRDef);
+        else
+          DestRC = RC;
         if (!NewDef) {
-          // Issue expensive cross register class copies.
-          MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
-          const TargetRegisterClass *RC =
-            TRI->getPhysicalRegisterRegClass(Reg, VT);
-          const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-          if (!DestRC) {
-            assert(false && "Don't know how to copy this physical register!");
-            abort();
-          }
+          // Issue copies, these can be expensive cross register class copies.
           SmallVector<SUnit*, 2> Copies;
-          InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+          InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
           DOUT << "Adding an edge from SU #" << TrySU->NodeNum
                << " to SU #" << Copies.front()->NodeNum << "\n";
           AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=62074&r1=62073&r2=62074&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Sun Jan 11 21:19:55 2009
@@ -39,11 +39,11 @@
 
 /// CheckForPhysRegDependency - Check if the dependency between def and use of
 /// a specified operand is a physical register dependency. If so, returns the
-/// register.
+/// register and the cost of copying the register.
 static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
                                       const TargetRegisterInfo *TRI, 
                                       const TargetInstrInfo *TII,
-                                      unsigned &PhysReg) {
+                                      unsigned &PhysReg, int &Cost) {
   if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
     return;
 
@@ -55,8 +55,12 @@
   if (Def->isMachineOpcode()) {
     const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
     if (ResNo >= II.getNumDefs() &&
-        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
+        II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
       PhysReg = Reg;
+      const TargetRegisterClass *RC =
+        TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
+      Cost = RC->getCopyCost();
+    }
   }
 }
 
@@ -179,10 +183,18 @@
         bool isChain = OpVT == MVT::Other;
 
         unsigned PhysReg = 0;
+        int Cost = 1;
         // Determine if this is a physical register dependency.
-        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg);
+        CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
         assert((PhysReg == 0 || !isChain) &&
                "Chain dependence via physreg data?");
+        // FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
+        // emits a copy from the physical register to a virtual register unless
+        // it requires a cross class copy (cost < 0). That means we are only
+        // treating "expensive to copy" register dependency as physical register
+        // dependency. This may change in the future though.
+        if (Cost >= 0)
+          PhysReg = 0;
         SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
                          OpSU->Latency, PhysReg));
       }
@@ -252,10 +264,12 @@
 
 
 void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
-  if (SU->getNode())
-    SU->getNode()->dump(DAG);
-  else
-    cerr << "CROSS RC COPY ";
+  if (!SU->getNode()) {
+    cerr << "PHYS REG COPY\n";
+    return;
+  }
+
+  SU->getNode()->dump(DAG);
   cerr << "\n";
   SmallVector<SDNode *, 4> FlaggedNodes;
   for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp?rev=62074&r1=62073&r2=62074&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodesEmit.cpp Sun Jan 11 21:19:55 2009
@@ -629,6 +629,12 @@
 
     // For pre-regalloc scheduling, create instructions corresponding to the
     // SDNode and any flagged SDNodes and append them to the block.
+    if (!SU->getNode()) {
+      // Emit a copy.
+      EmitPhysRegCopy(SU, CopyVRBaseMap);
+      continue;
+    }
+
     SmallVector<SDNode *, 4> FlaggedNodes;
     for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
       FlaggedNodes.push_back(N);
@@ -636,10 +642,7 @@
       EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap);
       FlaggedNodes.pop_back();
     }
-    if (!SU->getNode())
-      EmitCrossRCCopy(SU, CopyVRBaseMap);
-    else
-      EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
+    EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
   }
 
   return BB;

Added: llvm/trunk/test/CodeGen/X86/pr3244.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr3244.ll?rev=62074&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr3244.ll (added)
+++ llvm/trunk/test/CodeGen/X86/pr3244.ll Sun Jan 11 21:19:55 2009
@@ -0,0 +1,26 @@
+; RUN: llvm-as < %s | llc -march=x86
+; PR3244
+
+ at g_62 = external global i16             ; <i16*> [#uses=1]
+ at g_487 = external global i32            ; <i32*> [#uses=1]
+
+define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
+entry:
+        %0 = load i16* @g_62, align 2           ; <i16> [#uses=1]
+        %1 = load i32* @g_487, align 4          ; <i32> [#uses=1]
+        %2 = trunc i16 %0 to i8         ; <i8> [#uses=1]
+        %3 = trunc i32 %1 to i8         ; <i8> [#uses=1]
+        %4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
+nounwind             ; <i32> [#uses=1]
+        %5 = trunc i32 %4 to i8         ; <i8> [#uses=1]
+        %6 = mul i8 %3, %2              ; <i8> [#uses=1]
+        %7 = mul i8 %6, %5              ; <i8> [#uses=1]
+        %8 = sext i8 %7 to i16          ; <i16> [#uses=1]
+        %9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind     
+        ; <i32> [#uses=0]
+        ret i32 undef
+}
+
+declare i32 @func_7(...)
+
+declare i32 @func_85(i16 signext, i32, i32)





More information about the llvm-commits mailing list