[llvm] ce61def - [regalloc] Ensure Query::collectInterferringVregs is called before interval iteration

Mircea Trofin via llvm-commits llvm-commits at lists.llvm.org
Thu Apr 1 08:33:36 PDT 2021


Author: Mircea Trofin
Date: 2021-04-01T08:33:28-07:00
New Revision: ce61def529e2d9ef46b79c9d1f489d69b45b95bf

URL: https://github.com/llvm/llvm-project/commit/ce61def529e2d9ef46b79c9d1f489d69b45b95bf
DIFF: https://github.com/llvm/llvm-project/commit/ce61def529e2d9ef46b79c9d1f489d69b45b95bf.diff

LOG: [regalloc] Ensure Query::collectInterferringVregs is called before interval iteration

The main part of the patch is the change in RegAllocGreedy.cpp: Q.collectInterferringVregs()
needs to be called before iterating the interfering live ranges.

The rest of the patch offers support that is the case: instead of  clearing the query's
InterferingVRegs field, we invalidate it. The clearing happens when the live reg matrix
is invalidated (existing triggering mechanism).

Without the change in RegAllocGreedy.cpp, the compiler ices.

This patch should make it more easily discoverable by developers that
collectInterferringVregs needs to be called before iterating.

I will follow up with a subsequent patch to improve the usability and maintainability of Query.

Differential Revision: https://reviews.llvm.org/D98232

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/LiveIntervalUnion.h
    llvm/lib/CodeGen/LiveIntervalUnion.cpp
    llvm/lib/CodeGen/LiveRegMatrix.cpp
    llvm/lib/CodeGen/RegAllocGreedy.cpp
    llvm/lib/Target/AArch64/AArch64Subtarget.h
    llvm/lib/Target/X86/X86Subtarget.h
    llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
    llvm/test/CodeGen/X86/bug26810.ll
    llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll
    llvm/test/CodeGen/X86/i128-mul.ll
    llvm/test/CodeGen/X86/mmx-arith.ll
    llvm/test/CodeGen/X86/optimize-max-0.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
index ad9e06d2bcf01..4ebe0f2dcfd88 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervalUnion.h
@@ -114,30 +114,30 @@ class LiveIntervalUnion {
     const LiveRange *LR = nullptr;
     LiveRange::const_iterator LRI;  ///< current position in LR
     ConstSegmentIter LiveUnionI;    ///< current position in LiveUnion
-    SmallVector<LiveInterval*,4> InterferingVRegs;
+    Optional<SmallVector<LiveInterval *, 4>> InterferingVRegs;
     bool CheckedFirstInterference = false;
     bool SeenAllInterferences = false;
     unsigned Tag = 0;
     unsigned UserTag = 0;
 
+  public:
+    Query() = default;
+    Query(const LiveRange &LR, const LiveIntervalUnion &LIU)
+        : LiveUnion(&LIU), LR(&LR) {}
+    Query(const Query &) = delete;
+    Query &operator=(const Query &) = delete;
+
     void reset(unsigned NewUserTag, const LiveRange &NewLR,
                const LiveIntervalUnion &NewLiveUnion) {
       LiveUnion = &NewLiveUnion;
       LR = &NewLR;
-      InterferingVRegs.clear();
+      InterferingVRegs = None;
       CheckedFirstInterference = false;
       SeenAllInterferences = false;
       Tag = NewLiveUnion.getTag();
       UserTag = NewUserTag;
     }
 
-  public:
-    Query() = default;
-    Query(const LiveRange &LR, const LiveIntervalUnion &LIU):
-      LiveUnion(&LIU), LR(&LR) {}
-    Query(const Query &) = delete;
-    Query &operator=(const Query &) = delete;
-
     void init(unsigned NewUserTag, const LiveRange &NewLR,
               const LiveIntervalUnion &NewLiveUnion) {
       if (UserTag == NewUserTag && LR == &NewLR && LiveUnion == &NewLiveUnion &&
@@ -164,7 +164,7 @@ class LiveIntervalUnion {
 
     // Vector generated by collectInterferingVRegs.
     const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
-      return InterferingVRegs;
+      return *InterferingVRegs;
     }
   };
 

diff  --git a/llvm/lib/CodeGen/LiveIntervalUnion.cpp b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
index 7ccb8df4bc055..dfa523d4bf41c 100644
--- a/llvm/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/llvm/lib/CodeGen/LiveIntervalUnion.cpp
@@ -112,7 +112,7 @@ LiveInterval *LiveIntervalUnion::getOneVReg() const {
 // Scan the vector of interfering virtual registers in this union. Assume it's
 // quite small.
 bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
-  return is_contained(InterferingVRegs, VirtReg);
+  return is_contained(*InterferingVRegs, VirtReg);
 }
 
 // Collect virtual registers in this union that interfere with this
@@ -126,9 +126,12 @@ bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
 //
 unsigned LiveIntervalUnion::Query::
 collectInterferingVRegs(unsigned MaxInterferingRegs) {
+  if (!InterferingVRegs)
+    InterferingVRegs.emplace();
+
   // Fast path return if we already have the desired information.
-  if (SeenAllInterferences || InterferingVRegs.size() >= MaxInterferingRegs)
-    return InterferingVRegs.size();
+  if (SeenAllInterferences || InterferingVRegs->size() >= MaxInterferingRegs)
+    return InterferingVRegs->size();
 
   // Set up iterators on the first call.
   if (!CheckedFirstInterference) {
@@ -157,14 +160,14 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
       LiveInterval *VReg = LiveUnionI.value();
       if (VReg != RecentReg && !isSeenInterference(VReg)) {
         RecentReg = VReg;
-        InterferingVRegs.push_back(VReg);
-        if (InterferingVRegs.size() >= MaxInterferingRegs)
-          return InterferingVRegs.size();
+        InterferingVRegs->push_back(VReg);
+        if (InterferingVRegs->size() >= MaxInterferingRegs)
+          return InterferingVRegs->size();
       }
       // This LiveUnion segment is no longer interesting.
       if (!(++LiveUnionI).valid()) {
         SeenAllInterferences = true;
-        return InterferingVRegs.size();
+        return InterferingVRegs->size();
       }
     }
 
@@ -185,7 +188,7 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) {
     LiveUnionI.advanceTo(LRI->start);
   }
   SeenAllInterferences = true;
-  return InterferingVRegs.size();
+  return InterferingVRegs->size();
 }
 
 void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc,

diff  --git a/llvm/lib/CodeGen/LiveRegMatrix.cpp b/llvm/lib/CodeGen/LiveRegMatrix.cpp
index a69aa6557e462..4c0172a930b59 100644
--- a/llvm/lib/CodeGen/LiveRegMatrix.cpp
+++ b/llvm/lib/CodeGen/LiveRegMatrix.cpp
@@ -216,7 +216,21 @@ bool LiveRegMatrix::checkInterference(SlotIndex Start, SlotIndex End,
 
   // Check for interference with that segment
   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
-    if (query(LR, *Units).checkInterference())
+    // LR is stack-allocated. LiveRegMatrix caches queries by a key that
+    // includes the address of the live range. If (for the same reg unit) this
+    // checkInterference overload is called twice, without any other query()
+    // calls in between (on heap-allocated LiveRanges)  - which would invalidate
+    // the cached query - the LR address seen the second time may well be the
+    // same as that seen the first time, while the Start/End/valno may not - yet
+    // the same cached result would be fetched. To avoid that, we don't cache
+    // this query.
+    //
+    // FIXME: the usability of the Query API needs to be improved to avoid
+    // subtle bugs due to query identity. Avoiding caching, for example, would
+    // greatly simplify things.
+    LiveIntervalUnion::Query Q;
+    Q.reset(UserTag, LR, Matrix[*Units]);
+    if (Q.checkInterference())
       return true;
   }
   return false;

diff  --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp
index 4ec275fdc0bf8..26e7a1f17a22e 100644
--- a/llvm/lib/CodeGen/RegAllocGreedy.cpp
+++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp
@@ -471,12 +471,13 @@ class RAGreedy : public MachineFunctionPass,
   bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool) const;
   bool canEvictInterference(LiveInterval &, MCRegister, bool, EvictionCost &,
                             const SmallVirtRegSet &) const;
-  bool canEvictInterferenceInRange(LiveInterval &VirtReg, MCRegister PhysReg,
-                                   SlotIndex Start, SlotIndex End,
-                                   EvictionCost &MaxCost) const;
+  bool canEvictInterferenceInRange(const LiveInterval &VirtReg,
+                                   MCRegister PhysReg, SlotIndex Start,
+                                   SlotIndex End, EvictionCost &MaxCost) const;
   MCRegister getCheapestEvicteeWeight(const AllocationOrder &Order,
-                                      LiveInterval &VirtReg, SlotIndex Start,
-                                      SlotIndex End, float *BestEvictWeight);
+                                      const LiveInterval &VirtReg,
+                                      SlotIndex Start, SlotIndex End,
+                                      float *BestEvictWeight) const;
   void evictInterference(LiveInterval &, MCRegister,
                          SmallVectorImpl<Register> &);
   bool mayRecolorAllInterferences(MCRegister PhysReg, LiveInterval &VirtReg,
@@ -979,7 +980,7 @@ bool RAGreedy::canEvictInterference(
 /// \param MaxCost Only look for cheaper candidates and update with new cost
 ///                when returning true.
 /// \return True when interference can be evicted cheaper than MaxCost.
-bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
+bool RAGreedy::canEvictInterferenceInRange(const LiveInterval &VirtReg,
                                            MCRegister PhysReg, SlotIndex Start,
                                            SlotIndex End,
                                            EvictionCost &MaxCost) const {
@@ -987,6 +988,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
 
   for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) {
     LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units);
+    Q.collectInterferingVRegs();
 
     // Check if any interfering live range is heavier than MaxWeight.
     for (const LiveInterval *Intf : reverse(Q.interferingVRegs())) {
@@ -1031,9 +1033,9 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
 /// \return The PhysReg which is the best candidate for eviction and the
 /// eviction cost in BestEvictweight
 MCRegister RAGreedy::getCheapestEvicteeWeight(const AllocationOrder &Order,
-                                              LiveInterval &VirtReg,
+                                              const LiveInterval &VirtReg,
                                               SlotIndex Start, SlotIndex End,
-                                              float *BestEvictweight) {
+                                              float *BestEvictweight) const {
   EvictionCost BestEvictCost;
   BestEvictCost.setMax();
   BestEvictCost.MaxWeight = VirtReg.weight();
@@ -1556,25 +1558,9 @@ bool RAGreedy::splitCanCauseLocalSpill(unsigned VirtRegToSplit,
       return false;
   }
 
-  // Check if the local interval will evict a cheaper interval.
-  float CheapestEvictWeight = 0;
-  MCRegister FutureEvictedPhysReg = getCheapestEvicteeWeight(
-      Order, LIS->getInterval(VirtRegToSplit), Cand.Intf.first(),
-      Cand.Intf.last(), &CheapestEvictWeight);
-
-  // Have we found an interval that can be evicted?
-  if (FutureEvictedPhysReg) {
-    float splitArtifactWeight =
-        VRAI->futureWeight(LIS->getInterval(VirtRegToSplit),
-                           Cand.Intf.first().getPrevIndex(), Cand.Intf.last());
-    // Will the weight of the local interval be higher than the cheapest evictee
-    // weight? If so it will evict it and will not cause a spill.
-    if (splitArtifactWeight >= 0 && splitArtifactWeight > CheapestEvictWeight)
-      return false;
-  }
-
-  // The local interval is not able to find non interferencing assignment and
-  // not able to evict a less worthy interval, therfore, it can cause a spill.
+  // The local interval is not able to find non interferencing assignment
+  // and not able to evict a less worthy interval, therfore, it can cause a
+  // spill.
   return true;
 }
 

diff  --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 8fe2f125982fb..6447103128a58 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -557,7 +557,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   bool enableEarlyIfConversion() const override;
 
-  bool enableAdvancedRASplitCost() const override { return true; }
+  bool enableAdvancedRASplitCost() const override { return false; }
 
   std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
 

diff  --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index fa2622333d605..96bb960605432 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -941,7 +941,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
     return TargetSubtargetInfo::ANTIDEP_CRITICAL;
   }
 
-  bool enableAdvancedRASplitCost() const override { return true; }
+  bool enableAdvancedRASplitCost() const override { return false; }
 };
 
 } // end namespace llvm

diff  --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
index f3bd66ceae8cf..1a9bff7915bac 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s
+; RUN: llc -consider-local-interval-cost -mtriple=aarch64-arm-none-eabi < %s | FileCheck %s
 
 @A = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8
 @B = external dso_local local_unnamed_addr global [8 x [8 x i64]], align 8

diff  --git a/llvm/test/CodeGen/X86/bug26810.ll b/llvm/test/CodeGen/X86/bug26810.ll
index 7146f4cc38506..f0ea14e97edaa 100644
--- a/llvm/test/CodeGen/X86/bug26810.ll
+++ b/llvm/test/CodeGen/X86/bug26810.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
+; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
 ; Make sure bad eviction sequence doesnt occur
 
 ; Fix for bugzilla 26810.

diff  --git a/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll b/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll
index f300c8ce3d819..053225c1b547e 100644
--- a/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll
+++ b/llvm/test/CodeGen/X86/greedy_regalloc_bad_eviction_sequence.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
+; RUN: llc -consider-local-interval-cost < %s -march=x86 -regalloc=greedy -stop-after=greedy | FileCheck %s
 ; Make sure bad eviction sequence doesnt occur
 
 ; Part of the fix for bugzilla 26810.

diff  --git a/llvm/test/CodeGen/X86/i128-mul.ll b/llvm/test/CodeGen/X86/i128-mul.ll
index 552a383e6da75..1142c31b69ec9 100644
--- a/llvm/test/CodeGen/X86/i128-mul.ll
+++ b/llvm/test/CodeGen/X86/i128-mul.ll
@@ -162,9 +162,9 @@ define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind
 ; X86-NOBMI-NEXT:    movl (%esp), %edi # 4-byte Reload
 ; X86-NOBMI-NEXT:    adcl $0, %edi
 ; X86-NOBMI-NEXT:    movl %ebp, %esi
-; X86-NOBMI-NEXT:    xorl %ebx, %esi
+; X86-NOBMI-NEXT:    xorl {{[0-9]+}}(%esp), %esi
 ; X86-NOBMI-NEXT:    movl %edi, (%esp) # 4-byte Spill
-; X86-NOBMI-NEXT:    xorl {{[0-9]+}}(%esp), %edi
+; X86-NOBMI-NEXT:    xorl %ebx, %edi
 ; X86-NOBMI-NEXT:    orl %esi, %edi
 ; X86-NOBMI-NEXT:    jne .LBB1_2
 ; X86-NOBMI-NEXT:  .LBB1_3: # %for.end

diff  --git a/llvm/test/CodeGen/X86/mmx-arith.ll b/llvm/test/CodeGen/X86/mmx-arith.ll
index c81520b98cdb2..36dcdb967f1eb 100644
--- a/llvm/test/CodeGen/X86/mmx-arith.ll
+++ b/llvm/test/CodeGen/X86/mmx-arith.ll
@@ -390,25 +390,28 @@ define <1 x i64> @test3(<1 x i64>* %a, <1 x i64>* %b, i32 %count) nounwind {
 ; X32-NEXT:    pushl %ebx
 ; X32-NEXT:    pushl %edi
 ; X32-NEXT:    pushl %esi
-; X32-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    testl %ecx, %ecx
 ; X32-NEXT:    je .LBB3_1
 ; X32-NEXT:  # %bb.2: # %bb26.preheader
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X32-NEXT:    xorl %ebx, %ebx
 ; X32-NEXT:    xorl %eax, %eax
 ; X32-NEXT:    xorl %edx, %edx
 ; X32-NEXT:    .p2align 4, 0x90
 ; X32-NEXT:  .LBB3_3: # %bb26
 ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X32-NEXT:    movl (%edi,%ebx,8), %ebp
+; X32-NEXT:    movl %ecx, %esi
 ; X32-NEXT:    movl 4(%edi,%ebx,8), %ecx
-; X32-NEXT:    addl (%esi,%ebx,8), %ebp
-; X32-NEXT:    adcl 4(%esi,%ebx,8), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT:    addl (%edi,%ebx,8), %ebp
+; X32-NEXT:    adcl 4(%edi,%ebx,8), %ecx
 ; X32-NEXT:    addl %ebp, %eax
 ; X32-NEXT:    adcl %ecx, %edx
+; X32-NEXT:    movl %esi, %ecx
 ; X32-NEXT:    incl %ebx
-; X32-NEXT:    cmpl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT:    cmpl %esi, %ebx
 ; X32-NEXT:    jb .LBB3_3
 ; X32-NEXT:    jmp .LBB3_4
 ; X32-NEXT:  .LBB3_1:

diff  --git a/llvm/test/CodeGen/X86/optimize-max-0.ll b/llvm/test/CodeGen/X86/optimize-max-0.ll
index 5367f390d1cb0..b001e1cb04375 100644
--- a/llvm/test/CodeGen/X86/optimize-max-0.ll
+++ b/llvm/test/CodeGen/X86/optimize-max-0.ll
@@ -450,49 +450,51 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 ; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
 ; CHECK-NEXT:    subl $28, %esp
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; CHECK-NEXT:    movl %ebp, %eax
-; CHECK-NEXT:    imull %ecx, %eax
+; CHECK-NEXT:    movl %ebp, %edx
+; CHECK-NEXT:    imull %eax, %edx
 ; CHECK-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
-; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
 ; CHECK-NEXT:    je LBB1_19
 ; CHECK-NEXT:  ## %bb.1: ## %bb10.preheader
-; CHECK-NEXT:    shrl $2, %eax
-; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT:    movl %edx, %ecx
+; CHECK-NEXT:    shrl $2, %ecx
+; CHECK-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
 ; CHECK-NEXT:    testl %ebp, %ebp
+; CHECK-NEXT:    movl %eax, %edi
 ; CHECK-NEXT:    je LBB1_12
 ; CHECK-NEXT:  ## %bb.2: ## %bb.nph9
-; CHECK-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    testl %eax, %eax
 ; CHECK-NEXT:    je LBB1_12
 ; CHECK-NEXT:  ## %bb.3: ## %bb.nph9.split
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    incl %eax
 ; CHECK-NEXT:    xorl %ecx, %ecx
-; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB1_6: ## %bb7.preheader
-; CHECK-NEXT:    ## =>This Loop Header: Depth=1
-; CHECK-NEXT:    ## Child Loop BB1_4 Depth 2
+; CHECK-NEXT:    movl %esi, %edx
 ; CHECK-NEXT:    xorl %esi, %esi
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB1_4: ## %bb6
-; CHECK-NEXT:    ## Parent Loop BB1_6 Depth=1
-; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
+; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    movzbl (%eax,%esi,2), %ebx
 ; CHECK-NEXT:    movb %bl, (%edx,%esi)
 ; CHECK-NEXT:    incl %esi
 ; CHECK-NEXT:    cmpl %edi, %esi
 ; CHECK-NEXT:    jb LBB1_4
 ; CHECK-NEXT:  ## %bb.5: ## %bb9
-; CHECK-NEXT:    ## in Loop: Header=BB1_6 Depth=1
+; CHECK-NEXT:    ## in Loop: Header=BB1_4 Depth=1
+; CHECK-NEXT:    movl %edi, %ebx
 ; CHECK-NEXT:    incl %ecx
 ; CHECK-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    addl %edi, %edx
 ; CHECK-NEXT:    cmpl %ebp, %ecx
-; CHECK-NEXT:    jne LBB1_6
+; CHECK-NEXT:    je LBB1_12
+; CHECK-NEXT:  ## %bb.6: ## %bb7.preheader
+; CHECK-NEXT:    ## in Loop: Header=BB1_4 Depth=1
+; CHECK-NEXT:    xorl %esi, %esi
+; CHECK-NEXT:    jmp LBB1_4
 ; CHECK-NEXT:  LBB1_12: ## %bb18.loopexit
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx ## 4-byte Reload
@@ -501,10 +503,10 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 ; CHECK-NEXT:    cmpl $1, %ebp
 ; CHECK-NEXT:    jbe LBB1_13
 ; CHECK-NEXT:  ## %bb.7: ## %bb.nph5
-; CHECK-NEXT:    cmpl $2, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    cmpl $2, %edi
 ; CHECK-NEXT:    jb LBB1_13
 ; CHECK-NEXT:  ## %bb.8: ## %bb.nph5.split
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    movl %edi, %ebp
 ; CHECK-NEXT:    shrl %ebp
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    shrl %eax
@@ -518,14 +520,14 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
 ; CHECK-NEXT:    addl %edx, %eax
 ; CHECK-NEXT:    xorl %edx, %edx
-; CHECK-NEXT:    xorl %edi, %edi
+; CHECK-NEXT:    xorl %ebx, %ebx
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB1_9: ## %bb13
 ; CHECK-NEXT:    ## =>This Loop Header: Depth=1
 ; CHECK-NEXT:    ## Child Loop BB1_10 Depth 2
-; CHECK-NEXT:    movl %edi, %ebx
+; CHECK-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
 ; CHECK-NEXT:    andl $1, %ebx
-; CHECK-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Spill
+; CHECK-NEXT:    movl %edx, (%esp) ## 4-byte Spill
 ; CHECK-NEXT:    addl %edx, %ebx
 ; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %ebx
 ; CHECK-NEXT:    addl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
@@ -543,26 +545,27 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 ; CHECK-NEXT:    jb LBB1_10
 ; CHECK-NEXT:  ## %bb.11: ## %bb17
 ; CHECK-NEXT:    ## in Loop: Header=BB1_9 Depth=1
-; CHECK-NEXT:    incl %edi
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Reload
+; CHECK-NEXT:    incl %ebx
 ; CHECK-NEXT:    addl %ebp, %ecx
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; CHECK-NEXT:    movl (%esp), %edx ## 4-byte Reload
 ; CHECK-NEXT:    addl $2, %edx
 ; CHECK-NEXT:    addl %ebp, %eax
-; CHECK-NEXT:    cmpl {{[-0-9]+}}(%e{{[sb]}}p), %edi ## 4-byte Folded Reload
+; CHECK-NEXT:    cmpl {{[-0-9]+}}(%e{{[sb]}}p), %ebx ## 4-byte Folded Reload
 ; CHECK-NEXT:    jb LBB1_9
 ; CHECK-NEXT:  LBB1_13: ## %bb20
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    cmpl $1, %edx
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    cmpl $1, %esi
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; CHECK-NEXT:    je LBB1_19
 ; CHECK-NEXT:  ## %bb.14: ## %bb20
-; CHECK-NEXT:    cmpl $3, %edx
+; CHECK-NEXT:    cmpl $3, %esi
 ; CHECK-NEXT:    jne LBB1_24
 ; CHECK-NEXT:  ## %bb.15: ## %bb22
-; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
-; CHECK-NEXT:    addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
+; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx ## 4-byte Reload
+; CHECK-NEXT:    addl %edx, {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
 ; CHECK-NEXT:    testl %ebp, %ebp
 ; CHECK-NEXT:    je LBB1_18
 ; CHECK-NEXT:  ## %bb.16: ## %bb.nph
@@ -570,9 +573,11 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 ; CHECK-NEXT:    leal 15(%ebp), %eax
 ; CHECK-NEXT:    andl $-16, %eax
 ; CHECK-NEXT:    imull {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    leal 15(%ecx), %ebx
-; CHECK-NEXT:    andl $-16, %ebx
-; CHECK-NEXT:    addl %eax, %edi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    addl $15, %edx
+; CHECK-NEXT:    andl $-16, %edx
+; CHECK-NEXT:    movl %edx, (%esp) ## 4-byte Spill
+; CHECK-NEXT:    addl %eax, %ecx
 ; CHECK-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax ## 4-byte Reload
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    leal (%edx,%eax), %ebp
@@ -580,14 +585,16 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 ; CHECK-NEXT:  LBB1_17: ## %bb23
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    subl $4, %esp
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; CHECK-NEXT:    pushl %ebx
 ; CHECK-NEXT:    pushl %ecx
-; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %ecx, %edi
 ; CHECK-NEXT:    calll _memcpy
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl %edi, %ecx
 ; CHECK-NEXT:    addl $16, %esp
-; CHECK-NEXT:    addl %ecx, %ebp
-; CHECK-NEXT:    addl %ebx, %edi
+; CHECK-NEXT:    addl %ebx, %ebp
+; CHECK-NEXT:    addl (%esp), %ecx ## 4-byte Folded Reload
 ; CHECK-NEXT:    decl %esi
 ; CHECK-NEXT:    jne LBB1_17
 ; CHECK-NEXT:  LBB1_18: ## %bb26
@@ -607,21 +614,24 @@ define void @bar(i8* %r, i32 %s, i32 %w, i32 %x, i8* %j, i32 %d) nounwind {
 ; CHECK-NEXT:    je LBB1_22
 ; CHECK-NEXT:  ## %bb.20: ## %bb.nph11
 ; CHECK-NEXT:    movl %ebp, %esi
-; CHECK-NEXT:    leal 15(%ecx), %ebx
-; CHECK-NEXT:    andl $-16, %ebx
+; CHECK-NEXT:    movl %eax, %edi
+; CHECK-NEXT:    addl $15, %eax
+; CHECK-NEXT:    andl $-16, %eax
+; CHECK-NEXT:    movl %eax, (%esp) ## 4-byte Spill
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB1_21: ## %bb30
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    subl $4, %esp
-; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %edi
+; CHECK-NEXT:    pushl %ecx
 ; CHECK-NEXT:    pushl %ebp
+; CHECK-NEXT:    movl %ecx, %ebx
 ; CHECK-NEXT:    calll _memcpy
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT:    movl %ebx, %ecx
 ; CHECK-NEXT:    addl $16, %esp
-; CHECK-NEXT:    addl %ecx, %ebp
-; CHECK-NEXT:    addl %ebx, %edi
+; CHECK-NEXT:    addl %edi, %ebp
+; CHECK-NEXT:    addl (%esp), %ecx ## 4-byte Folded Reload
 ; CHECK-NEXT:    decl %esi
 ; CHECK-NEXT:    jne LBB1_21
 ; CHECK-NEXT:  LBB1_22: ## %bb33


        


More information about the llvm-commits mailing list