[llvm-commits] [llvm] r130245 - in /llvm/trunk: include/llvm/CodeGen/ScheduleDAG.h lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp test/CodeGen/ARM/2011-04-26-SchedTweak.ll test/CodeGen/ARM/shifter_operand.ll test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll test/CodeGen/X86/pic.ll

Evan Cheng evan.cheng at apple.com
Tue Apr 26 14:31:36 PDT 2011


Author: evancheng
Date: Tue Apr 26 16:31:35 2011
New Revision: 130245

URL: http://llvm.org/viewvc/llvm-project?rev=130245&view=rev
Log:
Be careful about scheduling nodes above previous calls. It increase usages of
more callee-saved registers and introduce copies. Only allows it if scheduling
a node above calls would end up lessen register pressure.

Call operands also has added ABI restrictions for register allocation, so be
extra careful with hoisting them above calls.

rdar://9329627

Added:
    llvm/trunk/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
Removed:
    llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
Modified:
    llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
    llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
    llvm/trunk/test/CodeGen/ARM/shifter_operand.ll
    llvm/trunk/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
    llvm/trunk/test/CodeGen/X86/pic.ll

Modified: llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h?rev=130245&r1=130244&r2=130245&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/ScheduleDAG.h Tue Apr 26 16:31:35 2011
@@ -252,6 +252,7 @@
     unsigned short Latency;             // Node latency.
     bool isVRegCycle      : 1;          // May use and def the same vreg.
     bool isCall           : 1;          // Is a function call.
+    bool isCallOp         : 1;          // Is a function call operand.
     bool isTwoAddress     : 1;          // Is a two-address instruction.
     bool isCommutable     : 1;          // Is a commutable instruction.
     bool hasPhysRegDefs   : 1;          // Has physreg defs that are being used.
@@ -280,7 +281,7 @@
       : Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isVRegCycle(false), isCall(false), isTwoAddress(false),
+        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
         isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isScheduleLow(false), isCloned(false),
@@ -294,7 +295,7 @@
       : Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isVRegCycle(false), isCall(false), isTwoAddress(false),
+        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
         isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isScheduleLow(false), isCloned(false),
@@ -307,7 +308,7 @@
       : Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
         NodeQueueId(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
         NumSuccsLeft(0), NumRegDefsLeft(0), Latency(0),
-        isVRegCycle(false), isCall(false), isTwoAddress(false),
+        isVRegCycle(false), isCall(false), isCallOp(false), isTwoAddress(false),
         isCommutable(false), hasPhysRegDefs(false), hasPhysRegClobbers(false),
         isPending(false), isAvailable(false), isScheduled(false),
         isScheduleHigh(false), isScheduleLow(false), isCloned(false),

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp?rev=130245&r1=130244&r2=130245&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp Tue Apr 26 16:31:35 2011
@@ -1732,7 +1732,17 @@
     // If SU does not have a register def, schedule it close to its uses
     // because it does not lengthen any live ranges.
     return 0;
+#if 1
   return SethiUllmanNumbers[SU->NodeNum];
+#else
+  unsigned Priority = SethiUllmanNumbers[SU->NodeNum];
+  if (SU->isCallOp) {
+    // FIXME: This assumes all of the defs are used as call operands.
+    int NP = (int)Priority - SU->getNode()->getNumValues();
+    return (NP > 0) ? NP : 0;
+  }
+  return Priority;
+#endif
 }
 
 //===----------------------------------------------------------------------===//
@@ -2238,11 +2248,35 @@
   // Prioritize by Sethi-Ulmann number and push CopyToReg nodes down.
   unsigned LPriority = SPQ->getNodePriority(left);
   unsigned RPriority = SPQ->getNodePriority(right);
+
+  // Be really careful about hoisting call operands above previous calls.
+  // Only allows it if it would reduce register pressure.
+  if (left->isCall && right->isCallOp) {
+    unsigned RNumVals = right->getNode()->getNumValues();
+    RPriority = (RPriority > RNumVals) ? (RPriority - RNumVals) : 0;
+  }
+  if (right->isCall && left->isCallOp) {
+    unsigned LNumVals = left->getNode()->getNumValues();
+    LPriority = (LPriority > LNumVals) ? (LPriority - LNumVals) : 0;
+  }
+
   if (LPriority != RPriority) {
     DEBUG(++FactorCount[FactStatic]);
     return LPriority > RPriority;
   }
 
+  // One or both of the nodes are calls and their sethi-ullman numbers are the
+  // same, then keep source order.
+  if (left->isCall || right->isCall) {
+    unsigned LOrder = SPQ->getNodeOrdering(left);
+    unsigned ROrder = SPQ->getNodeOrdering(right);
+
+    // Prefer an ordering where the lower the non-zero order number, the higher
+    // the preference.
+    if ((LOrder || ROrder) && LOrder != ROrder)
+      return LOrder != 0 && (LOrder < ROrder || ROrder == 0);
+  }
+
   // Try schedule def + use closer when Sethi-Ullman numbers are the same.
   // e.g.
   // t1 = op t2, c1
@@ -2275,7 +2309,14 @@
     return LScratch > RScratch;
   }
 
-  if (!DisableSchedCycles) {
+  // Comparing latency against a call makes little sense unless the node
+  // is register pressure-neutral.
+  if ((left->isCall && RPriority > 0) || (right->isCall && LPriority > 0))
+    return (left->NodeQueueId > right->NodeQueueId);
+
+  // Do not compare latencies when one or both of the nodes are calls.
+  if (!DisableSchedCycles &&
+      !(left->isCall || right->isCall)) {
     int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
     if (result != 0)
       return result > 0;

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp?rev=130245&r1=130244&r2=130245&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp Tue Apr 26 16:31:35 2011
@@ -83,6 +83,7 @@
   SU->Latency = Old->Latency;
   SU->isVRegCycle = Old->isVRegCycle;
   SU->isCall = Old->isCall;
+  SU->isCallOp = Old->isCallOp;
   SU->isTwoAddress = Old->isTwoAddress;
   SU->isCommutable = Old->isCommutable;
   SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -285,6 +286,7 @@
   Worklist.push_back(DAG->getRoot().getNode());
   Visited.insert(DAG->getRoot().getNode());
 
+  SmallVector<SUnit*, 8> CallSUnits;
   while (!Worklist.empty()) {
     SDNode *NI = Worklist.pop_back_val();
 
@@ -337,6 +339,9 @@
       if (!HasGlueUse) break;
     }
 
+    if (NodeSUnit->isCall)
+      CallSUnits.push_back(NodeSUnit);
+
     // Schedule zero-latency TokenFactor below any nodes that may increase the
     // schedule height. Otherwise, ancestors of the TokenFactor may appear to
     // have false stalls.
@@ -356,6 +361,20 @@
     // Assign the Latency field of NodeSUnit using target-provided information.
     ComputeLatency(NodeSUnit);
   }
+
+  // Find all call operands.
+  while (!CallSUnits.empty()) {
+    SUnit *SU = CallSUnits.pop_back_val();
+    for (const SDNode *SUNode = SU->getNode(); SUNode;
+         SUNode = SUNode->getGluedNode()) {
+      if (SUNode->getOpcode() != ISD::CopyToReg)
+        continue;
+      SDNode *SrcN = SUNode->getOperand(2).getNode();
+      if (isPassiveNode(SrcN)) continue;   // Not scheduled.
+      SUnit *SrcSU = &SUnits[SrcN->getNodeId()];
+      SrcSU->isCallOp = true;
+    }
+  }
 }
 
 void ScheduleDAGSDNodes::AddSchedEdges() {

Added: llvm/trunk/test/CodeGen/ARM/2011-04-26-SchedTweak.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2011-04-26-SchedTweak.ll?rev=130245&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/2011-04-26-SchedTweak.ll (added)
+++ llvm/trunk/test/CodeGen/ARM/2011-04-26-SchedTweak.ll Tue Apr 26 16:31:35 2011
@@ -0,0 +1,70 @@
+; RUN: llc < %s -mtriple=thumbv7-apple-ios -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
+
+; Do not move the umull above previous call which would require use of
+; more callee-saved registers and introduce copies.
+; rdar://9329627
+
+%struct.FF = type { i32 (i32*)*, i32 (i32*, i32*, i32, i32, i32, i32)*, i32 (i32, i32, i8*)*, void ()*, i32 (i32, i8*, i32*)*, i32 ()* }
+%struct.BD = type { %struct.BD*, i32, i32, i32, i32, i64, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i8*, i64, i32)*, i32 (%struct.BD*, i8*, i32, i32)*, i32 (%struct.BD*, i64, i32)*, [16 x i8], i64, i64 }
+
+ at FuncPtr = external hidden unnamed_addr global %struct.FF*
+ at .str1 = external hidden unnamed_addr constant [6 x i8], align 4
+ at G = external unnamed_addr global i32
+ at .str2 = external hidden unnamed_addr constant [58 x i8], align 4
+ at .str3 = external hidden unnamed_addr constant [58 x i8], align 4
+
+define i32 @test() nounwind optsize ssp {
+entry:
+; CHECK: test:
+; CHECK: push
+; CHECK-NOT: push
+  %block_size = alloca i32, align 4
+  %block_count = alloca i32, align 4
+  %index_cache = alloca i32, align 4
+  store i32 0, i32* %index_cache, align 4
+  %tmp = load i32* @G, align 4
+  %tmp1 = call i32 @bar(i32 0, i32 0, i32 %tmp) nounwind
+  switch i32 %tmp1, label %bb8 [
+    i32 0, label %bb
+    i32 536870913, label %bb4
+    i32 536870914, label %bb6
+  ]
+
+bb:
+  %tmp2 = load i32* @G, align 4
+  %tmp4 = icmp eq i32 %tmp2, 0
+  br i1 %tmp4, label %bb1, label %bb8
+
+bb1:
+; CHECK: %bb1
+; CHECK-NOT: umull
+; CHECK: blx _Get
+; CHECK: umull
+; CHECK: blx _foo
+  %tmp5 = load i32* %block_size, align 4
+  %tmp6 = load i32* %block_count, align 4
+  %tmp7 = call %struct.FF* @Get() nounwind
+  store %struct.FF* %tmp7, %struct.FF** @FuncPtr, align 4
+  %tmp10 = zext i32 %tmp6 to i64
+  %tmp11 = zext i32 %tmp5 to i64
+  %tmp12 = mul nsw i64 %tmp10, %tmp11
+  %tmp13 = call i32 @foo(i8* getelementptr inbounds ([6 x i8]* @.str1, i32 0, i32 0), i64 %tmp12, i32 %tmp5) nounwind
+  br label %bb8
+
+bb4:
+  ret i32 0
+
+bb6:
+  ret i32 1
+
+bb8:
+  ret i32 -1
+}
+
+declare i32 @printf(i8*, ...)
+
+declare %struct.FF* @Get()
+
+declare i32 @foo(i8*, i64, i32)
+
+declare i32 @bar(i32, i32, i32)

Modified: llvm/trunk/test/CodeGen/ARM/shifter_operand.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/shifter_operand.ll?rev=130245&r1=130244&r2=130245&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/shifter_operand.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/shifter_operand.ll Tue Apr 26 16:31:35 2011
@@ -58,7 +58,7 @@
 ; A8: str r2, [r0, r1, lsl #2]
 
 ; A9: test4:
-; A9: add r0, r0, r4, lsl #2
+; A9: add r0, r0, r{{[0-9]+}}, lsl #2
 ; A9: ldr r1, [r0]
 ; A9: str r1, [r0]
   %0 = tail call i8* (...)* @malloc(i32 undef) nounwind

Removed: llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll?rev=130244&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb/2010-01-15-local-alloc-spill-physical.ll (removed)
@@ -1,20 +0,0 @@
-; RUN: llc < %s -regalloc=fast -relocation-model=pic | FileCheck %s
-
-target triple = "thumbv6-apple-darwin10"
-
- at fred = internal global i32 0              ; <i32*> [#uses=1]
-
-define void @foo() nounwind {
-entry:
-; CHECK: str r0, [sp
-  %0 = call  i32 (...)* @bar() nounwind ; <i32> [#uses=1]
-; CHECK: blx _bar
-; CHECK: ldr r1, [sp
-  store i32 %0, i32* @fred, align 4
-  br label %return
-
-return:                                           ; preds = %entry
-  ret void
-}
-
-declare i32 @bar(...)

Modified: llvm/trunk/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll?rev=130245&r1=130244&r2=130245&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/2009-10-15-ITBlockBranch.ll Tue Apr 26 16:31:35 2011
@@ -13,7 +13,7 @@
 ; CHECK: _ZNKSs7compareERKSs:
 ; CHECK:      it  eq
 ; CHECK-NEXT: subeq r0, r{{[0-9]+}}, r{{[0-9]+}}
-; CHECK-NEXT: ldmia.w sp!, {r4, r5, r6, r7, r8, pc}
+; CHECK-NEXT: ldmia.w sp!,
 entry:
   %0 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %this) ; <i32> [#uses=3]
   %1 = tail call arm_aapcs_vfpcc  i32 @_ZNKSs4sizeEv(%"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >"* %__str) ; <i32> [#uses=3]

Modified: llvm/trunk/test/CodeGen/X86/pic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pic.ll?rev=130245&r1=130244&r2=130245&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pic.ll Tue Apr 26 16:31:35 2011
@@ -79,8 +79,8 @@
 ; LINUX-NEXT: .L3$pb:
 ; LINUX: 	popl
 ; LINUX: 	addl	$_GLOBAL_OFFSET_TABLE_+(.L{{.*}}-.L3$pb), %[[REG3:e..]]
-; LINUX: 	movl	pfoo at GOT(%[[REG3]]),
 ; LINUX: 	calll	afoo at PLT
+; LINUX: 	movl	pfoo at GOT(%[[REG3]]),
 ; LINUX: 	calll	*
 }
 





More information about the llvm-commits mailing list