[llvm-commits] [llvm] r69585 - in /llvm/trunk: lib/CodeGen/LiveIntervalAnalysis.cpp lib/CodeGen/RegAllocLinearScan.cpp test/CodeGen/X86/2008-08-05-SpillerBug.ll test/CodeGen/X86/2008-10-16-SpillerBug.ll test/CodeGen/X86/2009-04-20-LinearScanOpt.ll

Evan Cheng evan.cheng at apple.com
Mon Apr 20 01:01:14 PDT 2009


Author: evancheng
Date: Mon Apr 20 03:01:12 2009
New Revision: 69585

URL: http://llvm.org/viewvc/llvm-project?rev=69585&view=rev
Log:
Added a linearscan register allocation optimization. When the register allocator spill an interval with multiple uses in the same basic block, it creates a different virtual register for each of the reloads. e.g.

	%reg1498<def> = MOV32rm %reg1024, 1, %reg0, 12, %reg0, Mem:LD(4,4) [sunkaddr39 + 0]
        %reg1506<def> = MOV32rm %reg1024, 1, %reg0, 8, %reg0, Mem:LD(4,4) [sunkaddr42 + 0]
        %reg1486<def> = MOV32rr %reg1506
        %reg1486<def> = XOR32rr %reg1486, %reg1498, %EFLAGS<imp-def,dead>
        %reg1510<def> = MOV32rm %reg1024, 1, %reg0, 4, %reg0, Mem:LD(4,4) [sunkaddr45 + 0]

=>

        %reg1498<def> = MOV32rm %reg2036, 1, %reg0, 12, %reg0, Mem:LD(4,4) [sunkaddr39 + 0]
        %reg1506<def> = MOV32rm %reg2037, 1, %reg0, 8, %reg0, Mem:LD(4,4) [sunkaddr42 + 0]
        %reg1486<def> = MOV32rr %reg1506
        %reg1486<def> = XOR32rr %reg1486, %reg1498, %EFLAGS<imp-def,dead>
        %reg1510<def> = MOV32rm %reg2038, 1, %reg0, 4, %reg0, Mem:LD(4,4) [sunkaddr45 + 0]

>From linearscan's point of view, each of reg2036, 2037, and 2038 are separate registers, each is "killed" after a single use. The reloaded register is available and it's often clobbered right away. e.g. In thise case reg1498 is allocated EAX while reg2036 is allocated RAX. This means we end up with multiple reloads from the same stack slot in the same basic block.

Now linearscan recognize there are other reloads from same SS in the same BB. So it'll "downgrade" RAX (and its aliases) after reg2036 is allocated until the next reload (reg2037) is done. This greatly increase the likihood reloads from SS are reused.

This speeds up sha1 from OpenSSL by 5.8%. It is also an across the board win for SPEC2000 and 2006.

Added:
    llvm/trunk/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
Modified:
    llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp
    llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp
    llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll
    llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll

Modified: llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp?rev=69585&r1=69584&r2=69585&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp (original)
+++ llvm/trunk/lib/CodeGen/LiveIntervalAnalysis.cpp Mon Apr 20 03:01:12 2009
@@ -1734,14 +1734,6 @@
   }
 }
 
-namespace {
-  struct LISorter {
-    bool operator()(LiveInterval* A, LiveInterval* B) {
-      return A->beginNumber() < B->beginNumber();
-    }
-  };
-}
-
 std::vector<LiveInterval*> LiveIntervals::
 addIntervalsForSpillsFast(const LiveInterval &li,
                           const MachineLoopInfo *loopInfo,
@@ -1838,9 +1830,6 @@
     RI = mri_->reg_begin(li.reg);
   }
 
-  // Clients expect the new intervals to be returned in sorted order.
-  std::sort(added.begin(), added.end(), LISorter());
-
   return added;
 }
 

Modified: llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp?rev=69585&r1=69584&r2=69585&view=diff

==============================================================================
--- llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp (original)
+++ llvm/trunk/lib/CodeGen/RegAllocLinearScan.cpp Mon Apr 20 03:01:12 2009
@@ -45,6 +45,7 @@
 STATISTIC(NumIters     , "Number of iterations performed");
 STATISTIC(NumBacktracks, "Number of times we had to backtrack");
 STATISTIC(NumCoalesce,   "Number of copies coalesced");
+STATISTIC(NumDowngrade,  "Number of registers downgraded");
 
 static cl::opt<bool>
 NewHeuristic("new-spilling-heuristic",
@@ -74,6 +75,19 @@
     EquivalenceClasses<const TargetRegisterClass*> RelatedRegClasses;
     DenseMap<unsigned, const TargetRegisterClass*> OneClassForEachPhysReg;
 
+    // NextReloadMap - For each register in the map, it maps to the another
+    // register which is defined by a reload from the same stack slot and
+    // both reloads are in the same basic block.
+    DenseMap<unsigned, unsigned> NextReloadMap;
+
+    // DowngradedRegs - A set of registers which are being "downgraded", i.e.
+    // un-favored for allocation.
+    SmallSet<unsigned, 8> DowngradedRegs;
+
+    // DowngradeMap - A map from virtual registers to physical registers being
+    // downgraded for the virtual registers.
+    DenseMap<unsigned, unsigned> DowngradeMap;
+
     MachineFunction* mf_;
     MachineRegisterInfo* mri_;
     const TargetMachine* tm_;
@@ -151,6 +165,16 @@
     /// ones to the active list.
     void processInactiveIntervals(unsigned CurPoint);
 
+    /// hasNextReloadInterval - Return the next liveinterval that's being
+    /// defined by a reload from the same SS as the specified one.
+    LiveInterval *hasNextReloadInterval(LiveInterval *cur);
+
+    /// DowngradeRegister - Downgrade a register for allocation.
+    void DowngradeRegister(LiveInterval *li, unsigned Reg);
+
+    /// UpgradeRegister - Upgrade a register for allocation.
+    void UpgradeRegister(unsigned Reg);
+
     /// assignRegOrStackSlotAtInterval - assign a register if one
     /// is available, or spill.
     void assignRegOrStackSlotAtInterval(LiveInterval* cur);
@@ -184,6 +208,10 @@
     /// getFreePhysReg - return a free physical register for this virtual
     /// register interval if we have one, otherwise return 0.
     unsigned getFreePhysReg(LiveInterval* cur);
+    unsigned getFreePhysReg(const TargetRegisterClass *RC,
+                            unsigned MaxInactiveCount,
+                            SmallVector<unsigned, 256> &inactiveCounts,
+                            bool SkipDGRegs);
 
     /// assignVirt2StackSlot - assigns this virtual register to a
     /// stack slot. returns the stack slot
@@ -211,17 +239,15 @@
 X("linearscan-regalloc", "Linear Scan Register Allocator");
 
 void RALinScan::ComputeRelatedRegClasses() {
-  const TargetRegisterInfo &TRI = *tri_;
-  
   // First pass, add all reg classes to the union, and determine at least one
   // reg class that each register is in.
   bool HasAliases = false;
-  for (TargetRegisterInfo::regclass_iterator RCI = TRI.regclass_begin(),
-       E = TRI.regclass_end(); RCI != E; ++RCI) {
+  for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(),
+       E = tri_->regclass_end(); RCI != E; ++RCI) {
     RelatedRegClasses.insert(*RCI);
     for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end();
          I != E; ++I) {
-      HasAliases = HasAliases || *TRI.getAliasSet(*I) != 0;
+      HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0;
       
       const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I];
       if (PRC) {
@@ -241,7 +267,7 @@
     for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
          I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
          I != E; ++I)
-      for (const unsigned *AS = TRI.getAliasSet(I->first); *AS; ++AS)
+      for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
         RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
 }
 
@@ -326,6 +352,9 @@
   active_.clear();
   inactive_.clear();
   handled_.clear();
+  NextReloadMap.clear();
+  DowngradedRegs.clear();
+  DowngradeMap.clear();
 
   return true;
 }
@@ -525,6 +554,9 @@
   SmallSet<unsigned, 4> Processed;
   SmallSet<unsigned, 4> SuperAdded;
   SmallVector<unsigned, 4> Supers;
+  // Unfavor downgraded registers for spilling.
+  if (DowngradedRegs.count(reg))
+    weight *= 2.0f;
   Weights[reg] += weight;
   Processed.insert(reg);
   for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) {
@@ -692,6 +724,43 @@
   return (diff / w2) <= 0.05f;  // Within 5%.
 }
 
+LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
+  DenseMap<unsigned, unsigned>::iterator I = NextReloadMap.find(cur->reg);
+  if (I == NextReloadMap.end())
+    return 0;
+  return &li_->getInterval(I->second);
+}
+
+void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
+  bool isNew = DowngradedRegs.insert(Reg);
+  isNew = isNew; // Silence compiler warning.
+  assert(isNew && "Multiple reloads holding the same register?");
+  DowngradeMap.insert(std::make_pair(li->reg, Reg));
+  for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
+    isNew = DowngradedRegs.insert(*AS);
+    isNew = isNew; // Silence compiler warning.
+    assert(isNew && "Multiple reloads holding the same register?");
+    DowngradeMap.insert(std::make_pair(li->reg, *AS));
+  }
+  ++NumDowngrade;
+}
+
+void RALinScan::UpgradeRegister(unsigned Reg) {
+  if (Reg) {
+    DowngradedRegs.erase(Reg);
+    for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS)
+      DowngradedRegs.erase(*AS);
+  }
+}
+
+namespace {
+  struct LISorter {
+    bool operator()(LiveInterval* A, LiveInterval* B) {
+      return A->beginNumber() < B->beginNumber();
+    }
+  };
+}
+
 /// assignRegOrStackSlotAtInterval - assign a register if one is available, or
 /// spill.
 void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur)
@@ -836,6 +905,15 @@
     prt_->addRegUse(physReg);
     active_.push_back(std::make_pair(cur, cur->begin()));
     handled_.push_back(cur);
+
+    // "Upgrade" the physical register since it has been allocated.
+    UpgradeRegister(physReg);
+    if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) {
+      // "Downgrade" physReg to try to keep physReg from being allocated until
+      // the next reload from the same SS is allocated. 
+      NextReloadLI->preference = physReg;
+      DowngradeRegister(cur, physReg);
+    }
     return;
   }
   DOUT << "no free registers\n";
@@ -896,9 +974,10 @@
     if (cur->weight == HUGE_VALF ||
         li_->getApproximateInstructionCount(*cur) == 0) {
       // Spill a physical register around defs and uses.
-      if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_))
+      if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) {
+        DowngradedRegs.clear();
         assignRegOrStackSlotAtInterval(cur);
-      else {
+      } else {
         cerr << "Ran out of registers during register allocation!\n";
         exit(1);
       }
@@ -919,7 +998,7 @@
           DOUT << tri_->getName(RegsWeights[i].first)
                << " (" << RegsWeights[i].second << ")\n");
 
-  // if the current has the minimum weight, we need to spill it and
+  // If the current has the minimum weight, we need to spill it and
   // add any added intervals back to unhandled, and restart
   // linearscan.
   if (cur->weight != HUGE_VALF && cur->weight <= minWeight) {
@@ -928,12 +1007,13 @@
     SmallVector<LiveInterval*, 8> spillIs;
     std::vector<LiveInterval*> added =
       li_->addIntervalsForSpills(*cur, spillIs, loopInfo, *vrm_, SSWeight);
+    std::sort(added.begin(), added.end(), LISorter());
     addStackInterval(cur, ls_, li_, SSWeight, *vrm_);
     if (added.empty())
       return;  // Early exit if all spills were folded.
 
-    // Merge added with unhandled.  Note that we know that
-    // addIntervalsForSpills returns intervals sorted by their starting
+    // Merge added with unhandled.  Note that we have already sorted
+    // intervals returned by addIntervalsForSpills by their starting
     // point.
     for (unsigned i = 0, e = added.size(); i != e; ++i)
       unhandled_.push(added[i]);
@@ -942,7 +1022,7 @@
 
   ++NumBacktracks;
 
-  // push the current interval back to unhandled since we are going
+  // Push the current interval back to unhandled since we are going
   // to re-run at least this iteration. Since we didn't modify it it
   // should go back right in the front of the list
   unhandled_.push(cur);
@@ -1020,10 +1100,15 @@
       unhandled_.push(i);
     }
 
-    // It interval has a preference, it must be defined by a copy. Clear the
-    // preference now since the source interval allocation may have been undone
-    // as well.
-    i->preference = 0;
+    DenseMap<unsigned, unsigned>::iterator ii = DowngradeMap.find(i->reg);
+    if (ii == DowngradeMap.end())
+      // It interval has a preference, it must be defined by a copy. Clear the
+      // preference now since the source interval allocation may have been
+      // undone as well.
+      i->preference = 0;
+    else {
+      UpgradeRegister(ii->second);
+    }
   }
 
   // Rewind the iterators in the active, inactive, and fixed lists back to the
@@ -1032,7 +1117,7 @@
   RevertVectorIteratorsTo(inactive_, earliestStart);
   RevertVectorIteratorsTo(fixed_, earliestStart);
 
-  // scan the rest and undo each interval that expired after t and
+  // Scan the rest and undo each interval that expired after t and
   // insert it in active (the next iteration of the algorithm will
   // put it in inactive if required)
   for (unsigned i = 0, e = handled_.size(); i != e; ++i) {
@@ -1046,9 +1131,87 @@
     }
   }
 
-  // merge added with unhandled
-  for (unsigned i = 0, e = added.size(); i != e; ++i)
-    unhandled_.push(added[i]);
+  // Merge added with unhandled.
+  // This also update the NextReloadMap. That is, it adds mapping from a
+  // register defined by a reload from SS to the next reload from SS in the
+  // same basic block.
+  MachineBasicBlock *LastReloadMBB = 0;
+  LiveInterval *LastReload = 0;
+  int LastReloadSS = VirtRegMap::NO_STACK_SLOT;
+  std::sort(added.begin(), added.end(), LISorter());
+  for (unsigned i = 0, e = added.size(); i != e; ++i) {
+    LiveInterval *ReloadLi = added[i];
+    if (ReloadLi->weight == HUGE_VALF &&
+        li_->getApproximateInstructionCount(*ReloadLi) == 0) {
+      unsigned ReloadIdx = ReloadLi->beginNumber();
+      MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx);
+      int ReloadSS = vrm_->getStackSlot(ReloadLi->reg);
+      if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) {
+        // Last reload of same SS is in the same MBB. We want to try to
+        // allocate both reloads the same register and make sure the reg
+        // isn't clobbered in between if at all possible.
+        assert(LastReload->beginNumber() < ReloadIdx);
+        NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg));
+      }
+      LastReloadMBB = ReloadMBB;
+      LastReload = ReloadLi;
+      LastReloadSS = ReloadSS;
+    }
+    unhandled_.push(ReloadLi);
+  }
+}
+
+unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC,
+                                   unsigned MaxInactiveCount,
+                                   SmallVector<unsigned, 256> &inactiveCounts,
+                                   bool SkipDGRegs) {
+  unsigned FreeReg = 0;
+  unsigned FreeRegInactiveCount = 0;
+
+  TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
+  TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
+  assert(I != E && "No allocatable register in this register class!");
+
+  // Scan for the first available register.
+  for (; I != E; ++I) {
+    unsigned Reg = *I;
+    // Ignore "downgraded" registers.
+    if (SkipDGRegs && DowngradedRegs.count(Reg))
+      continue;
+    if (prt_->isRegAvail(Reg)) {
+      FreeReg = Reg;
+      if (FreeReg < inactiveCounts.size())
+        FreeRegInactiveCount = inactiveCounts[FreeReg];
+      else
+        FreeRegInactiveCount = 0;
+      break;
+    }
+  }
+
+  // If there are no free regs, or if this reg has the max inactive count,
+  // return this register.
+  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount)
+    return FreeReg;
+  
+  // Continue scanning the registers, looking for the one with the highest
+  // inactive count.  Alkis found that this reduced register pressure very
+  // slightly on X86 (in rev 1.94 of this file), though this should probably be
+  // reevaluated now.
+  for (; I != E; ++I) {
+    unsigned Reg = *I;
+    // Ignore "downgraded" registers.
+    if (SkipDGRegs && DowngradedRegs.count(Reg))
+      continue;
+    if (prt_->isRegAvail(Reg) && Reg < inactiveCounts.size() &&
+        FreeRegInactiveCount < inactiveCounts[Reg]) {
+      FreeReg = Reg;
+      FreeRegInactiveCount = inactiveCounts[Reg];
+      if (FreeRegInactiveCount == MaxInactiveCount)
+        break;    // We found the one with the max inactive count.
+    }
+  }
+
+  return FreeReg;
 }
 
 /// getFreePhysReg - return a free physical register for this virtual register
@@ -1078,9 +1241,6 @@
     }
   }
 
-  unsigned FreeReg = 0;
-  unsigned FreeRegInactiveCount = 0;
-
   // If copy coalescer has assigned a "preferred" register, check if it's
   // available first.
   if (cur->preference) {
@@ -1094,40 +1254,13 @@
            << tri_->getName(cur->preference) << "\n";
   }
 
-  // Scan for the first available register.
-  TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_);
-  TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_);
-  assert(I != E && "No allocatable register in this register class!");
-  for (; I != E; ++I)
-    if (prt_->isRegAvail(*I)) {
-      FreeReg = *I;
-      if (FreeReg < inactiveCounts.size())
-        FreeRegInactiveCount = inactiveCounts[FreeReg];
-      else
-        FreeRegInactiveCount = 0;
-      break;
-    }
-
-  // If there are no free regs, or if this reg has the max inactive count,
-  // return this register.
-  if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg;
-  
-  // Continue scanning the registers, looking for the one with the highest
-  // inactive count.  Alkis found that this reduced register pressure very
-  // slightly on X86 (in rev 1.94 of this file), though this should probably be
-  // reevaluated now.
-  for (; I != E; ++I) {
-    unsigned Reg = *I;
-    if (prt_->isRegAvail(Reg) && Reg < inactiveCounts.size() &&
-        FreeRegInactiveCount < inactiveCounts[Reg]) {
-      FreeReg = Reg;
-      FreeRegInactiveCount = inactiveCounts[Reg];
-      if (FreeRegInactiveCount == MaxInactiveCount)
-        break;    // We found the one with the max inactive count.
-    }
+  if (!DowngradedRegs.empty()) {
+    unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts,
+                                      true);
+    if (FreeReg)
+      return FreeReg;
   }
-  
-  return FreeReg;
+  return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false);
 }
 
 FunctionPass* llvm::createLinearScanRegisterAllocator() {

Modified: llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll?rev=69585&r1=69584&r2=69585&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2008-08-05-SpillerBug.ll Mon Apr 20 03:01:12 2009
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 57
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -disable-fp-elim -stats |& grep asm-printer | grep 56
 ; PR2568
 
 @g_3 = external global i16		; <i16*> [#uses=1]

Modified: llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll?rev=69585&r1=69584&r2=69585&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2008-10-16-SpillerBug.ll Mon Apr 20 03:01:12 2009
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*ecx}
+; RUN: llvm-as < %s | llc -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edx}
 
 	%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
 	%struct.XXDAlphaTest = type { float, i16, i8, i8 }

Added: llvm/trunk/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll?rev=69585&view=auto

==============================================================================
--- llvm/trunk/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll (added)
+++ llvm/trunk/test/CodeGen/X86/2009-04-20-LinearScanOpt.ll Mon Apr 20 03:01:12 2009
@@ -0,0 +1,121 @@
+; RUN: llvm-as < %s | llc -mtriple=x86_64-apple-darwin10.0 -relocation-model=pic -disable-fp-elim -stats |& grep {Number of registers downgraded}
+; rdar://6802189
+
+; Test if linearscan is unfavoring registers for allocation to allow more reuse
+; of reloads from stack slots.
+
+	%struct.SHA_CTX = type { i32, i32, i32, i32, i32, i32, i32, [16 x i32], i32 }
+
+define fastcc void @sha1_block_data_order(%struct.SHA_CTX* nocapture %c, i8* %p, i64 %num) nounwind {
+entry:
+	br label %bb
+
+bb:		; preds = %bb, %entry
+	%asmtmp511 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=3]
+	%asmtmp513 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=2]
+	%asmtmp516 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=1]
+	%asmtmp517 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%0 = xor i32 0, %asmtmp513		; <i32> [#uses=0]
+	%1 = add i32 0, %asmtmp517		; <i32> [#uses=1]
+	%2 = add i32 %1, 0		; <i32> [#uses=1]
+	%3 = add i32 %2, 0		; <i32> [#uses=1]
+	%asmtmp519 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 0) nounwind		; <i32> [#uses=1]
+	%4 = xor i32 0, %asmtmp511		; <i32> [#uses=1]
+	%asmtmp520 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %4) nounwind		; <i32> [#uses=2]
+	%5 = xor i32 0, %asmtmp516		; <i32> [#uses=1]
+	%6 = xor i32 %5, %asmtmp519		; <i32> [#uses=1]
+	%7 = add i32 %asmtmp513, -899497514		; <i32> [#uses=1]
+	%8 = add i32 %7, %asmtmp520		; <i32> [#uses=1]
+	%9 = add i32 %8, %6		; <i32> [#uses=1]
+	%10 = add i32 %9, 0		; <i32> [#uses=1]
+	%asmtmp523 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=1]
+	%asmtmp525 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %3) nounwind		; <i32> [#uses=2]
+	%11 = xor i32 0, %asmtmp525		; <i32> [#uses=1]
+	%12 = add i32 0, %11		; <i32> [#uses=1]
+	%13 = add i32 %12, 0		; <i32> [#uses=2]
+	%asmtmp528 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %10) nounwind		; <i32> [#uses=1]
+	%14 = xor i32 0, %asmtmp520		; <i32> [#uses=1]
+	%asmtmp529 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %14) nounwind		; <i32> [#uses=1]
+	%asmtmp530 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %13) nounwind		; <i32> [#uses=1]
+	%15 = add i32 0, %asmtmp530		; <i32> [#uses=1]
+	%16 = xor i32 0, %asmtmp523		; <i32> [#uses=1]
+	%asmtmp532 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %16) nounwind		; <i32> [#uses=2]
+	%asmtmp533 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %15) nounwind		; <i32> [#uses=1]
+	%17 = xor i32 %13, %asmtmp528		; <i32> [#uses=1]
+	%18 = xor i32 %17, 0		; <i32> [#uses=1]
+	%19 = add i32 %asmtmp525, -899497514		; <i32> [#uses=1]
+	%20 = add i32 %19, %asmtmp532		; <i32> [#uses=1]
+	%21 = add i32 %20, %18		; <i32> [#uses=1]
+	%22 = add i32 %21, %asmtmp533		; <i32> [#uses=1]
+	%23 = xor i32 0, %asmtmp511		; <i32> [#uses=1]
+	%24 = xor i32 %23, 0		; <i32> [#uses=1]
+	%asmtmp535 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %24) nounwind		; <i32> [#uses=3]
+	%25 = add i32 0, %asmtmp535		; <i32> [#uses=1]
+	%26 = add i32 %25, 0		; <i32> [#uses=1]
+	%27 = add i32 %26, 0		; <i32> [#uses=1]
+	%28 = xor i32 0, %asmtmp529		; <i32> [#uses=0]
+	%29 = xor i32 %22, 0		; <i32> [#uses=1]
+	%30 = xor i32 %29, 0		; <i32> [#uses=1]
+	%31 = add i32 0, %30		; <i32> [#uses=1]
+	%32 = add i32 %31, 0		; <i32> [#uses=3]
+	%asmtmp541 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%asmtmp542 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %32) nounwind		; <i32> [#uses=1]
+	%33 = add i32 0, %asmtmp541		; <i32> [#uses=1]
+	%34 = add i32 %33, 0		; <i32> [#uses=1]
+	%35 = add i32 %34, %asmtmp542		; <i32> [#uses=1]
+	%asmtmp543 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %27) nounwind		; <i32> [#uses=2]
+	%36 = xor i32 0, %asmtmp535		; <i32> [#uses=0]
+	%37 = xor i32 %32, 0		; <i32> [#uses=1]
+	%38 = xor i32 %37, %asmtmp543		; <i32> [#uses=1]
+	%39 = add i32 0, %38		; <i32> [#uses=1]
+	%40 = add i32 %39, 0		; <i32> [#uses=2]
+	%asmtmp546 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %32) nounwind		; <i32> [#uses=1]
+	%asmtmp547 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 0) nounwind		; <i32> [#uses=2]
+	%41 = add i32 0, -899497514		; <i32> [#uses=1]
+	%42 = add i32 %41, %asmtmp547		; <i32> [#uses=1]
+	%43 = add i32 %42, 0		; <i32> [#uses=1]
+	%44 = add i32 %43, 0		; <i32> [#uses=3]
+	%asmtmp549 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %35) nounwind		; <i32> [#uses=2]
+	%45 = xor i32 0, %asmtmp541		; <i32> [#uses=1]
+	%asmtmp550 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %45) nounwind		; <i32> [#uses=2]
+	%asmtmp551 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 5, i32 %44) nounwind		; <i32> [#uses=1]
+	%46 = xor i32 %40, %asmtmp546		; <i32> [#uses=1]
+	%47 = xor i32 %46, %asmtmp549		; <i32> [#uses=1]
+	%48 = add i32 %asmtmp543, -899497514		; <i32> [#uses=1]
+	%49 = add i32 %48, %asmtmp550		; <i32> [#uses=1]
+	%50 = add i32 %49, %47		; <i32> [#uses=1]
+	%51 = add i32 %50, %asmtmp551		; <i32> [#uses=1]
+	%asmtmp552 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %40) nounwind		; <i32> [#uses=2]
+	%52 = xor i32 %44, %asmtmp549		; <i32> [#uses=1]
+	%53 = xor i32 %52, %asmtmp552		; <i32> [#uses=1]
+	%54 = add i32 0, %53		; <i32> [#uses=1]
+	%55 = add i32 %54, 0		; <i32> [#uses=2]
+	%asmtmp555 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %44) nounwind		; <i32> [#uses=2]
+	%56 = xor i32 0, %asmtmp532		; <i32> [#uses=1]
+	%57 = xor i32 %56, %asmtmp547		; <i32> [#uses=1]
+	%asmtmp556 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %57) nounwind		; <i32> [#uses=1]
+	%58 = add i32 0, %asmtmp556		; <i32> [#uses=1]
+	%59 = add i32 %58, 0		; <i32> [#uses=1]
+	%60 = add i32 %59, 0		; <i32> [#uses=1]
+	%asmtmp558 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %51) nounwind		; <i32> [#uses=1]
+	%61 = xor i32 %asmtmp517, %asmtmp511		; <i32> [#uses=1]
+	%62 = xor i32 %61, %asmtmp535		; <i32> [#uses=1]
+	%63 = xor i32 %62, %asmtmp550		; <i32> [#uses=1]
+	%asmtmp559 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 1, i32 %63) nounwind		; <i32> [#uses=1]
+	%64 = xor i32 %55, %asmtmp555		; <i32> [#uses=1]
+	%65 = xor i32 %64, %asmtmp558		; <i32> [#uses=1]
+	%asmtmp561 = tail call i32 asm "roll $1,$0", "=r,I,0,~{dirflag},~{fpsr},~{flags},~{cc}"(i32 30, i32 %55) nounwind		; <i32> [#uses=1]
+	%66 = add i32 %asmtmp552, -899497514		; <i32> [#uses=1]
+	%67 = add i32 %66, %65		; <i32> [#uses=1]
+	%68 = add i32 %67, %asmtmp559		; <i32> [#uses=1]
+	%69 = add i32 %68, 0		; <i32> [#uses=1]
+	%70 = add i32 %69, 0		; <i32> [#uses=1]
+	store i32 %70, i32* null, align 4
+	%71 = add i32 0, %60		; <i32> [#uses=1]
+	store i32 %71, i32* null, align 4
+	%72 = add i32 0, %asmtmp561		; <i32> [#uses=1]
+	store i32 %72, i32* null, align 4
+	%73 = add i32 0, %asmtmp555		; <i32> [#uses=1]
+	store i32 %73, i32* null, align 4
+	br label %bb
+}





More information about the llvm-commits mailing list