[llvm] bee8dcd - [SCEV] Fix and validate ValueExprMap/ExprValueMap consistency

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Nov 26 11:57:55 PST 2021


Author: Nikita Popov
Date: 2021-11-26T20:57:47+01:00
New Revision: bee8dcda1f01831808885ea23f9ac1467ce660f5

URL: https://github.com/llvm/llvm-project/commit/bee8dcda1f01831808885ea23f9ac1467ce660f5
DIFF: https://github.com/llvm/llvm-project/commit/bee8dcda1f01831808885ea23f9ac1467ce660f5.diff

LOG: [SCEV] Fix and validate ValueExprMap/ExprValueMap consistency

Relative to the previous landing attempt, this makes
insertValueToMap() resilient against the value already being
present in the map -- previously I only checked this for the
createSimpleAffineAddRec() case, but the same issue can also
occur for the general createNodeForPHI(). In both cases, the
addrec may be constructed and added to the map in a recursive
query trying to create said addrec. In this case, this happens
due to the invalidation when the BE count is computed, which
ends up clearing out the symbolic name as well.

-----

This adds validation for consistency of ValueExprMap and
ExprValueMap, and fixes identified issues:

* Addrec construction directly wrote to ValueExprMap in a few places,
  without updating ExprValueMap. Add a helper to ensures they stay
  consistent. The adjustment in forgetSymbolicName() explicitly
  drops the old value from the map, so that we don't rely on it
  being overwritten.
* forgetMemoizedResultsImpl() was dropping the SCEV from
  ExprValueMap, but not dropping the corresponding entries from
  ValueExprMap.

Differential Revision: https://reviews.llvm.org/D113349

Added: 
    llvm/test/Transforms/LoopStrengthReduce/X86/nested-ptr-addrec.ll

Modified: 
    llvm/include/llvm/Analysis/ScalarEvolution.h
    llvm/lib/Analysis/ScalarEvolution.cpp
    llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h
index a2260688e3d6e..4cfbf49bbc862 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1923,6 +1923,9 @@ class ScalarEvolution {
   /// Erase Value from ValueExprMap and ExprValueMap.
   void eraseValueFromMap(Value *V);
 
+  /// Insert V to S mapping into ValueExprMap and ExprValueMap.
+  void insertValueToMap(Value *V, const SCEV *S);
+
   /// Return false iff given SCEV contains a SCEVUnknown with NULL value-
   /// pointer.
   bool checkValidity(const SCEV *S) const;

diff  --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 4cdb8336d38b1..a6e4e848f0e9b 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4094,6 +4094,18 @@ void ScalarEvolution::eraseValueFromMap(Value *V) {
   }
 }
 
+void ScalarEvolution::insertValueToMap(Value *V, const SCEV *S) {
+  auto It = ValueExprMap.find_as(V);
+  if (It == ValueExprMap.end()) {
+    ValueExprMap.insert({SCEVCallbackVH(V, this), S});
+    ExprValueMap[S].insert({V, nullptr});
+  } else {
+    // A recursive query may have already computed the SCEV. It should have
+    // arrived at the same value.
+    assert(It->second == S);
+  }
+}
+
 /// Return an existing SCEV if it exists, otherwise analyze the expression and
 /// create a new one.
 const SCEV *ScalarEvolution::getSCEV(Value *V) {
@@ -4455,9 +4467,7 @@ void ScalarEvolution::forgetSymbolicName(Instruction *PN, const SCEV *SymName) {
       // In the second case, createNodeForPHI will perform the necessary
       // updates on its own when it gets to that point. In the third, we do
       // want to forget the SCEVUnknown.
-      if (!isa<PHINode>(I) ||
-          !isa<SCEVUnknown>(Old) ||
-          (I != PN && Old == SymName)) {
+      if (!isa<PHINode>(I) || !isa<SCEVUnknown>(Old) || Old == SymName) {
         eraseValueFromMap(It->first);
         ToForget.push_back(Old);
       }
@@ -5335,8 +5345,7 @@ const SCEV *ScalarEvolution::createSimpleAffineAddRec(PHINode *PN,
 
   const SCEV *StartVal = getSCEV(StartValueV);
   const SCEV *PHISCEV = getAddRecExpr(StartVal, Accum, L, Flags);
-
-  ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+  insertValueToMap(PN, PHISCEV);
 
   // We can add Flags to the post-inc expression only if we
   // know that it is *undefined behavior* for BEValueV to
@@ -5389,7 +5398,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
 
   // Handle PHI node value symbolically.
   const SCEV *SymbolicName = getUnknown(PN);
-  ValueExprMap.insert({SCEVCallbackVH(PN, this), SymbolicName});
+  insertValueToMap(PN, SymbolicName);
 
   // Using this symbolic name for the PHI, analyze the value coming around
   // the back-edge.
@@ -5461,7 +5470,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
         // to be symbolic.  We now need to go back and purge all of the
         // entries for the scalars that use the symbolic expression.
         forgetSymbolicName(PN, SymbolicName);
-        ValueExprMap[SCEVCallbackVH(PN, this)] = PHISCEV;
+        insertValueToMap(PN, PHISCEV);
 
         // We can add Flags to the post-inc expression only if we
         // know that it is *undefined behavior* for BEValueV to
@@ -5493,7 +5502,7 @@ const SCEV *ScalarEvolution::createAddRecFromPHI(PHINode *PN) {
         // to be symbolic.  We now need to go back and purge all of the
         // entries for the scalars that use the symbolic expression.
         forgetSymbolicName(PN, SymbolicName);
-        ValueExprMap[SCEVCallbackVH(PN, this)] = Shifted;
+        insertValueToMap(PN, Shifted);
         return Shifted;
       }
     }
@@ -12996,9 +13005,20 @@ void ScalarEvolution::forgetMemoizedResultsImpl(const SCEV *S) {
   BlockDispositions.erase(S);
   UnsignedRanges.erase(S);
   SignedRanges.erase(S);
-  ExprValueMap.erase(S);
   HasRecMap.erase(S);
   MinTrailingZerosCache.erase(S);
+
+  auto ExprIt = ExprValueMap.find(S);
+  if (ExprIt != ExprValueMap.end()) {
+    for (auto &ValueAndOffset : ExprIt->second) {
+      if (ValueAndOffset.second == nullptr) {
+        auto ValueIt = ValueExprMap.find_as(ValueAndOffset.first);
+        if (ValueIt != ValueExprMap.end())
+          ValueExprMap.erase(ValueIt);
+      }
+    }
+    ExprValueMap.erase(ExprIt);
+  }
 }
 
 void
@@ -13103,13 +13123,40 @@ void ScalarEvolution::verify() const {
     ValidLoops.insert(L);
     Worklist.append(L->begin(), L->end());
   }
-  // Check for SCEV expressions referencing invalid/deleted loops.
   for (auto &KV : ValueExprMap) {
-    auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second);
-    if (!AR)
-      continue;
-    assert(ValidLoops.contains(AR->getLoop()) &&
-           "AddRec references invalid loop");
+    // Check for SCEV expressions referencing invalid/deleted loops.
+    if (auto *AR = dyn_cast<SCEVAddRecExpr>(KV.second)) {
+      assert(ValidLoops.contains(AR->getLoop()) &&
+             "AddRec references invalid loop");
+    }
+
+    // Check that the value is also part of the reverse map.
+    auto It = ExprValueMap.find(KV.second);
+    if (It == ExprValueMap.end() || !It->second.contains({KV.first, nullptr})) {
+      dbgs() << "Value " << *KV.first
+             << " is in ValueExprMap but not in ExprValueMap\n";
+      std::abort();
+    }
+  }
+
+  for (const auto &KV : ExprValueMap) {
+    for (const auto &ValueAndOffset : KV.second) {
+      if (ValueAndOffset.second != nullptr)
+        continue;
+
+      auto It = ValueExprMap.find_as(ValueAndOffset.first);
+      if (It == ValueExprMap.end()) {
+        dbgs() << "Value " << *ValueAndOffset.first
+               << " is in ExprValueMap but not in ValueExprMap\n";
+        std::abort();
+      }
+      if (It->second != KV.first) {
+        dbgs() << "Value " << *ValueAndOffset.first
+               << " mapped to " << *It->second
+               << " rather than " << *KV.first << "\n";
+        std::abort();
+      }
+    }
   }
 
   // Verify intergity of SCEV users.

diff  --git a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
index d5ef4e64d0a85..29c93f44f25c4 100644
--- a/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll
@@ -10,8 +10,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
 define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_type_of_x]* %.x, i32* %.l, <2 x double>* %.vy01, <2 x double>* %.vy02, <2 x double>* %.vy03, <2 x double>* %.vy04, <2 x double>* %.vy05, <2 x double>* %.vy06, <2 x double>* %.vy07, <2 x double>* %.vy08, <2 x double>* %.vy09, <2 x double>* %.vy0a, <2 x double>* %.vy0b, <2 x double>* %.vy0c, <2 x double>* %.vy21, <2 x double>* %.vy22, <2 x double>* %.vy23, <2 x double>* %.vy24, <2 x double>* %.vy25, <2 x double>* %.vy26, <2 x double>* %.vy27, <2 x double>* %.vy28, <2 x double>* %.vy29, <2 x double>* %.vy2a, <2 x double>* %.vy2b, <2 x double>* %.vy2c) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    stdu 1, -608(1)
-; CHECK-NEXT:    .cfi_def_cfa_offset 608
+; CHECK-NEXT:    stdu 1, -592(1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 592
 ; CHECK-NEXT:    .cfi_offset r14, -192
 ; CHECK-NEXT:    .cfi_offset r15, -184
 ; CHECK-NEXT:    .cfi_offset r16, -176
@@ -48,322 +48,313 @@ define void @foo(i32* %.m, i32* %.n, [0 x %_elem_type_of_a]* %.a, [0 x %_elem_ty
 ; CHECK-NEXT:    .cfi_offset v29, -240
 ; CHECK-NEXT:    .cfi_offset v30, -224
 ; CHECK-NEXT:    .cfi_offset v31, -208
-; CHECK-NEXT:    lwz 0, 0(4)
-; CHECK-NEXT:    std 14, 416(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 15, 424(1) # 8-byte Folded Spill
-; CHECK-NEXT:    cmpwi 0, 1
-; CHECK-NEXT:    std 16, 432(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 17, 440(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 18, 448(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 19, 456(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 20, 464(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, 472(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 22, 480(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 23, 488(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 24, 496(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 25, 504(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 26, 512(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 27, 520(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 28, 528(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 29, 536(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 30, 544(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 31, 552(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 26, 560(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 27, 568(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 28, 576(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 29, 584(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 30, 592(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stfd 31, 600(1) # 8-byte Folded Spill
-; CHECK-NEXT:    stxv 52, 224(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 53, 240(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 54, 256(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 55, 272(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 56, 288(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 57, 304(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 58, 320(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 59, 336(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 60, 352(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 61, 368(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 62, 384(1) # 16-byte Folded Spill
-; CHECK-NEXT:    stxv 63, 400(1) # 16-byte Folded Spill
+; CHECK-NEXT:    lwz 4, 0(4)
+; CHECK-NEXT:    std 14, 400(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 408(1) # 8-byte Folded Spill
+; CHECK-NEXT:    cmpwi 4, 1
+; CHECK-NEXT:    std 16, 416(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 17, 424(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 18, 432(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 19, 440(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, 448(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, 456(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 22, 464(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, 472(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, 480(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 25, 488(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, 496(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 27, 504(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 28, 512(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 29, 520(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, 528(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 31, 536(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 26, 544(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 27, 552(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 28, 560(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 29, 568(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 30, 576(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stfd 31, 584(1) # 8-byte Folded Spill
+; CHECK-NEXT:    stxv 52, 208(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 53, 224(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 54, 240(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 55, 256(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 56, 272(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 57, 288(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 58, 304(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 59, 320(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 60, 336(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 61, 352(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 62, 368(1) # 16-byte Folded Spill
+; CHECK-NEXT:    stxv 63, 384(1) # 16-byte Folded Spill
 ; CHECK-NEXT:    blt 0, .LBB0_7
 ; CHECK-NEXT:  # %bb.1: # %_loop_1_do_.lr.ph
 ; CHECK-NEXT:    lwz 3, 0(3)
 ; CHECK-NEXT:    cmpwi 3, 1
 ; CHECK-NEXT:    blt 0, .LBB0_7
 ; CHECK-NEXT:  # %bb.2: # %_loop_1_do_.preheader
-; CHECK-NEXT:    mr 23, 5
-; CHECK-NEXT:    ld 5, 704(1)
+; CHECK-NEXT:    mr 21, 5
+; CHECK-NEXT:    ld 5, 848(1)
+; CHECK-NEXT:    lwa 0, 0(7)
 ; CHECK-NEXT:    addi 3, 3, 1
-; CHECK-NEXT:    ld 4, 728(1)
+; CHECK-NEXT:    mr 14, 7
+; CHECK-NEXT:    mr 22, 6
 ; CHECK-NEXT:    mr 11, 10
-; CHECK-NEXT:    mr 10, 6
-; CHECK-NEXT:    std 8, 40(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 9, 48(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lwa 7, 0(7)
-; CHECK-NEXT:    ld 29, 840(1)
+; CHECK-NEXT:    ld 18, 736(1)
+; CHECK-NEXT:    ld 17, 728(1)
+; CHECK-NEXT:    std 8, 32(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 9, 40(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    cmpldi 3, 9
-; CHECK-NEXT:    ld 27, 832(1)
-; CHECK-NEXT:    ld 28, 856(1)
-; CHECK-NEXT:    std 5, 112(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 4, 120(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 1, 0(5)
+; CHECK-NEXT:    ld 19, 744(1)
+; CHECK-NEXT:    ld 20, 752(1)
+; CHECK-NEXT:    std 5, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT:    ld 5, 840(1)
+; CHECK-NEXT:    std 17, 80(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 18, 88(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 36, 0(18)
+; CHECK-NEXT:    std 19, 96(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, 104(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lxv 13, 0(19)
+; CHECK-NEXT:    lxv 12, 0(20)
+; CHECK-NEXT:    ld 30, 832(1)
+; CHECK-NEXT:    ld 2, 824(1)
+; CHECK-NEXT:    ld 12, 816(1)
+; CHECK-NEXT:    ld 29, 808(1)
+; CHECK-NEXT:    std 2, 176(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 30, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 29, 160(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 12, 168(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 5, 192(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    li 5, 9
-; CHECK-NEXT:    ld 30, 848(1)
-; CHECK-NEXT:    lxv 0, 0(4)
-; CHECK-NEXT:    sldi 4, 7, 3
-; CHECK-NEXT:    add 4, 4, 23
-; CHECK-NEXT:    sldi 16, 7, 2
-; CHECK-NEXT:    sldi 15, 7, 1
-; CHECK-NEXT:    ld 17, 760(1)
-; CHECK-NEXT:    std 27, 192(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 29, 200(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 6, 0(29)
-; CHECK-NEXT:    ld 26, 824(1)
-; CHECK-NEXT:    ld 25, 816(1)
-; CHECK-NEXT:    ld 24, 808(1)
-; CHECK-NEXT:    std 30, 208(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 28, 216(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 25, 176(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 26, 184(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 24, 168(1) # 8-byte Folded Spill
-; CHECK-NEXT:    iselgt 3, 3, 5
-; CHECK-NEXT:    ld 5, 752(1)
-; CHECK-NEXT:    addi 14, 4, 32
-; CHECK-NEXT:    sldi 4, 7, 4
-; CHECK-NEXT:    add 29, 7, 15
-; CHECK-NEXT:    ld 22, 800(1)
-; CHECK-NEXT:    ld 21, 792(1)
-; CHECK-NEXT:    ld 20, 784(1)
-; CHECK-NEXT:    std 22, 160(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 20, 144(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, 152(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 6, 3, -2
-; CHECK-NEXT:    add 3, 7, 16
-; CHECK-NEXT:    add 4, 4, 23
-; CHECK-NEXT:    ld 19, 776(1)
-; CHECK-NEXT:    ld 18, 768(1)
-; CHECK-NEXT:    lxv 4, 0(8)
-; CHECK-NEXT:    lxv 2, 0(11)
-; CHECK-NEXT:    std 18, 128(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 19, 136(1) # 8-byte Folded Spill
-; CHECK-NEXT:    addi 12, 4, 32
-; CHECK-NEXT:    rldicl 2, 6, 61, 3
-; CHECK-NEXT:    sldi 6, 3, 3
-; CHECK-NEXT:    ld 4, 736(1)
-; CHECK-NEXT:    ld 31, 720(1)
-; CHECK-NEXT:    std 11, 56(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 31, 64(1) # 8-byte Folded Spill
-; CHECK-NEXT:    add 11, 23, 6
-; CHECK-NEXT:    ld 6, 744(1)
-; CHECK-NEXT:    ld 8, 712(1)
-; CHECK-NEXT:    std 5, 96(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 17, 104(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 39, 0(5)
-; CHECK-NEXT:    sldi 5, 7, 5
-; CHECK-NEXT:    lxv 5, 0(30)
-; CHECK-NEXT:    lxv 7, 0(28)
-; CHECK-NEXT:    lxv 3, 0(9)
-; CHECK-NEXT:    addi 2, 2, 1
-; CHECK-NEXT:    add 30, 23, 5
-; CHECK-NEXT:    sldi 5, 29, 3
-; CHECK-NEXT:    add 28, 23, 5
-; CHECK-NEXT:    ld 5, 864(1)
+; CHECK-NEXT:    ld 28, 800(1)
+; CHECK-NEXT:    ld 27, 792(1)
+; CHECK-NEXT:    ld 26, 784(1)
+; CHECK-NEXT:    ld 25, 776(1)
+; CHECK-NEXT:    ld 24, 768(1)
+; CHECK-NEXT:    ld 23, 760(1)
+; CHECK-NEXT:    ld 16, 720(1)
+; CHECK-NEXT:    ld 15, 712(1)
+; CHECK-NEXT:    ld 6, 704(1)
+; CHECK-NEXT:    ld 7, 696(1)
+; CHECK-NEXT:    ld 10, 688(1)
 ; CHECK-NEXT:    lxv 43, 0(8)
-; CHECK-NEXT:    lxv 42, 0(31)
-; CHECK-NEXT:    lxv 38, 0(17)
-; CHECK-NEXT:    std 4, 72(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 6, 80(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 41, 0(4)
-; CHECK-NEXT:    lxv 40, 0(6)
-; CHECK-NEXT:    lxv 33, 0(18)
-; CHECK-NEXT:    lxv 32, 0(19)
-; CHECK-NEXT:    std 5, 88(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lxv 37, 0(20)
-; CHECK-NEXT:    lxv 36, 0(21)
-; CHECK-NEXT:    lxv 13, 0(22)
-; CHECK-NEXT:    lxv 12, 0(24)
-; CHECK-NEXT:    lxv 11, 0(25)
-; CHECK-NEXT:    lxv 9, 0(26)
-; CHECK-NEXT:    lxv 8, 0(27)
-; CHECK-NEXT:    lxv 10, 0(5)
-; CHECK-NEXT:    mulli 27, 7, 48
-; CHECK-NEXT:    mulli 26, 7, 6
-; CHECK-NEXT:    li 25, 1
-; CHECK-NEXT:    li 24, 0
-; CHECK-NEXT:    mr 5, 23
+; CHECK-NEXT:    std 11, 48(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 6, 56(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 27, 144(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 28, 152(1) # 8-byte Folded Spill
+; CHECK-NEXT:    mr 8, 7
+; CHECK-NEXT:    std 25, 128(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, 136(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 64(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 16, 72(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, 112(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, 120(1) # 8-byte Folded Spill
+; CHECK-NEXT:    iselgt 3, 3, 5
+; CHECK-NEXT:    sldi 5, 0, 3
+; CHECK-NEXT:    add 5, 5, 21
+; CHECK-NEXT:    lxv 42, 0(9)
+; CHECK-NEXT:    lxv 41, 0(11)
+; CHECK-NEXT:    lxv 40, 0(10)
+; CHECK-NEXT:    lxv 39, 0(7)
+; CHECK-NEXT:    mulli 11, 0, 48
+; CHECK-NEXT:    addi 14, 5, 32
+; CHECK-NEXT:    sldi 5, 0, 4
+; CHECK-NEXT:    addi 3, 3, -2
+; CHECK-NEXT:    lxv 38, 0(6)
+; CHECK-NEXT:    lxv 33, 0(15)
+; CHECK-NEXT:    lxv 32, 0(16)
+; CHECK-NEXT:    lxv 37, 0(17)
+; CHECK-NEXT:    add 5, 5, 21
+; CHECK-NEXT:    lxv 11, 0(23)
+; CHECK-NEXT:    lxv 10, 0(24)
+; CHECK-NEXT:    lxv 8, 0(25)
+; CHECK-NEXT:    lxv 6, 0(26)
+; CHECK-NEXT:    rldicl 3, 3, 61, 3
+; CHECK-NEXT:    li 26, 0
+; CHECK-NEXT:    mr 25, 21
+; CHECK-NEXT:    addi 31, 5, 32
+; CHECK-NEXT:    mulli 5, 0, 40
+; CHECK-NEXT:    lxv 5, 0(27)
+; CHECK-NEXT:    lxv 3, 0(28)
+; CHECK-NEXT:    lxv 1, 0(29)
+; CHECK-NEXT:    lxv 0, 0(12)
+; CHECK-NEXT:    mulli 28, 0, 6
+; CHECK-NEXT:    addi 3, 3, 1
+; CHECK-NEXT:    li 27, 1
+; CHECK-NEXT:    add 18, 21, 5
+; CHECK-NEXT:    sldi 5, 0, 5
+; CHECK-NEXT:    lxv 2, 0(2)
+; CHECK-NEXT:    lxv 4, 0(30)
+; CHECK-NEXT:    sldi 2, 0, 1
+; CHECK-NEXT:    add 19, 21, 5
+; CHECK-NEXT:    mulli 5, 0, 24
+; CHECK-NEXT:    add 20, 21, 5
+; CHECK-NEXT:    ld 5, 192(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lxv 9, 0(5)
+; CHECK-NEXT:    ld 5, 200(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lxv 7, 0(5)
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_3: # %_loop_2_do_.lr.ph
 ; CHECK-NEXT:    # =>This Loop Header: Depth=1
 ; CHECK-NEXT:    # Child Loop BB0_4 Depth 2
-; CHECK-NEXT:    maddld 6, 26, 24, 3
-; CHECK-NEXT:    mtctr 2
-; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 22, 23, 6
-; CHECK-NEXT:    maddld 6, 26, 24, 16
-; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 21, 23, 6
-; CHECK-NEXT:    maddld 6, 26, 24, 29
-; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 20, 23, 6
-; CHECK-NEXT:    maddld 6, 26, 24, 15
-; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 19, 23, 6
-; CHECK-NEXT:    maddld 6, 26, 24, 7
-; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 18, 23, 6
-; CHECK-NEXT:    mulld 6, 26, 24
-; CHECK-NEXT:    sldi 6, 6, 3
-; CHECK-NEXT:    add 17, 23, 6
-; CHECK-NEXT:    mr 6, 10
+; CHECK-NEXT:    maddld 5, 28, 26, 2
+; CHECK-NEXT:    mr 6, 22
+; CHECK-NEXT:    mr 29, 20
+; CHECK-NEXT:    mr 30, 19
+; CHECK-NEXT:    mr 12, 18
+; CHECK-NEXT:    mtctr 3
+; CHECK-NEXT:    sldi 5, 5, 3
+; CHECK-NEXT:    add 24, 21, 5
+; CHECK-NEXT:    maddld 5, 28, 26, 0
+; CHECK-NEXT:    sldi 5, 5, 3
+; CHECK-NEXT:    add 23, 21, 5
+; CHECK-NEXT:    mr 5, 25
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_4: # %_loop_2_do_
 ; CHECK-NEXT:    # Parent Loop BB0_3 Depth=1
 ; CHECK-NEXT:    # => This Inner Loop Header: Depth=2
 ; CHECK-NEXT:    lxvp 34, 0(6)
-; CHECK-NEXT:    lxvp 44, 0(17)
-; CHECK-NEXT:    xvmaddadp 4, 45, 35
-; CHECK-NEXT:    lxvp 46, 0(18)
-; CHECK-NEXT:    xvmaddadp 3, 47, 35
-; CHECK-NEXT:    lxvp 48, 0(19)
-; CHECK-NEXT:    lxvp 50, 0(20)
-; CHECK-NEXT:    lxvp 62, 0(21)
-; CHECK-NEXT:    lxvp 60, 0(22)
+; CHECK-NEXT:    lxvp 44, 0(5)
+; CHECK-NEXT:    xvmaddadp 43, 45, 35
+; CHECK-NEXT:    lxvp 46, 0(23)
+; CHECK-NEXT:    xvmaddadp 42, 47, 35
+; CHECK-NEXT:    lxvp 48, 0(24)
+; CHECK-NEXT:    lxvp 50, 0(29)
+; CHECK-NEXT:    lxvp 62, 0(30)
+; CHECK-NEXT:    lxvp 60, 0(12)
 ; CHECK-NEXT:    lxvp 58, 32(6)
-; CHECK-NEXT:    lxvp 56, 32(17)
-; CHECK-NEXT:    lxvp 54, 32(18)
-; CHECK-NEXT:    lxvp 52, 32(19)
-; CHECK-NEXT:    lxvp 30, 32(20)
-; CHECK-NEXT:    lxvp 28, 32(21)
-; CHECK-NEXT:    lxvp 26, 32(22)
-; CHECK-NEXT:    xvmaddadp 2, 49, 35
-; CHECK-NEXT:    xvmaddadp 1, 51, 35
-; CHECK-NEXT:    xvmaddadp 43, 63, 35
-; CHECK-NEXT:    xvmaddadp 42, 61, 35
-; CHECK-NEXT:    xvmaddadp 0, 44, 34
-; CHECK-NEXT:    xvmaddadp 41, 46, 34
-; CHECK-NEXT:    xvmaddadp 40, 48, 34
-; CHECK-NEXT:    xvmaddadp 39, 50, 34
-; CHECK-NEXT:    xvmaddadp 38, 62, 34
-; CHECK-NEXT:    xvmaddadp 33, 60, 34
-; CHECK-NEXT:    xvmaddadp 32, 57, 59
-; CHECK-NEXT:    xvmaddadp 37, 55, 59
-; CHECK-NEXT:    xvmaddadp 36, 53, 59
-; CHECK-NEXT:    xvmaddadp 13, 31, 59
-; CHECK-NEXT:    xvmaddadp 12, 29, 59
-; CHECK-NEXT:    xvmaddadp 11, 27, 59
-; CHECK-NEXT:    xvmaddadp 9, 56, 58
-; CHECK-NEXT:    xvmaddadp 8, 54, 58
-; CHECK-NEXT:    xvmaddadp 6, 52, 58
-; CHECK-NEXT:    xvmaddadp 5, 30, 58
-; CHECK-NEXT:    xvmaddadp 7, 28, 58
-; CHECK-NEXT:    xvmaddadp 10, 26, 58
+; CHECK-NEXT:    lxvp 56, 32(5)
+; CHECK-NEXT:    lxvp 54, 32(23)
+; CHECK-NEXT:    lxvp 52, 32(24)
+; CHECK-NEXT:    lxvp 30, 32(29)
+; CHECK-NEXT:    lxvp 28, 32(30)
+; CHECK-NEXT:    lxvp 26, 32(12)
+; CHECK-NEXT:    xvmaddadp 41, 49, 35
+; CHECK-NEXT:    xvmaddadp 40, 51, 35
+; CHECK-NEXT:    xvmaddadp 39, 63, 35
+; CHECK-NEXT:    xvmaddadp 38, 61, 35
+; CHECK-NEXT:    xvmaddadp 33, 44, 34
+; CHECK-NEXT:    xvmaddadp 32, 46, 34
+; CHECK-NEXT:    xvmaddadp 37, 48, 34
+; CHECK-NEXT:    xvmaddadp 36, 50, 34
+; CHECK-NEXT:    xvmaddadp 13, 62, 34
+; CHECK-NEXT:    xvmaddadp 12, 60, 34
+; CHECK-NEXT:    xvmaddadp 11, 57, 59
+; CHECK-NEXT:    xvmaddadp 10, 55, 59
+; CHECK-NEXT:    xvmaddadp 8, 53, 59
+; CHECK-NEXT:    xvmaddadp 6, 31, 59
+; CHECK-NEXT:    xvmaddadp 5, 29, 59
+; CHECK-NEXT:    xvmaddadp 3, 27, 59
+; CHECK-NEXT:    xvmaddadp 1, 56, 58
+; CHECK-NEXT:    xvmaddadp 0, 54, 58
+; CHECK-NEXT:    xvmaddadp 2, 52, 58
+; CHECK-NEXT:    xvmaddadp 4, 30, 58
+; CHECK-NEXT:    xvmaddadp 9, 28, 58
+; CHECK-NEXT:    xvmaddadp 7, 26, 58
 ; CHECK-NEXT:    addi 6, 6, 64
-; CHECK-NEXT:    addi 17, 17, 64
-; CHECK-NEXT:    addi 18, 18, 64
-; CHECK-NEXT:    addi 19, 19, 64
-; CHECK-NEXT:    addi 20, 20, 64
-; CHECK-NEXT:    addi 21, 21, 64
-; CHECK-NEXT:    addi 22, 22, 64
+; CHECK-NEXT:    addi 5, 5, 64
+; CHECK-NEXT:    addi 23, 23, 64
+; CHECK-NEXT:    addi 24, 24, 64
+; CHECK-NEXT:    addi 29, 29, 64
+; CHECK-NEXT:    addi 30, 30, 64
+; CHECK-NEXT:    addi 12, 12, 64
 ; CHECK-NEXT:    bdnz .LBB0_4
 ; CHECK-NEXT:  # %bb.5: # %_loop_2_endl_
 ; CHECK-NEXT:    #
-; CHECK-NEXT:    addi 25, 25, 6
-; CHECK-NEXT:    add 5, 5, 27
-; CHECK-NEXT:    add 14, 14, 27
-; CHECK-NEXT:    add 11, 11, 27
-; CHECK-NEXT:    add 12, 12, 27
-; CHECK-NEXT:    add 30, 30, 27
-; CHECK-NEXT:    add 28, 28, 27
-; CHECK-NEXT:    addi 24, 24, 1
-; CHECK-NEXT:    cmpld 25, 0
+; CHECK-NEXT:    addi 27, 27, 6
+; CHECK-NEXT:    add 25, 25, 11
+; CHECK-NEXT:    add 14, 14, 11
+; CHECK-NEXT:    add 18, 18, 11
+; CHECK-NEXT:    add 31, 31, 11
+; CHECK-NEXT:    add 19, 19, 11
+; CHECK-NEXT:    add 20, 20, 11
+; CHECK-NEXT:    addi 26, 26, 1
+; CHECK-NEXT:    cmpld 27, 4
 ; CHECK-NEXT:    ble 0, .LBB0_3
 ; CHECK-NEXT:  # %bb.6: # %_loop_1_loopHeader_._return_bb_crit_edge.loopexit
+; CHECK-NEXT:    ld 3, 32(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 43, 0(3)
 ; CHECK-NEXT:    ld 3, 40(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 4, 0(3)
+; CHECK-NEXT:    stxv 42, 0(3)
 ; CHECK-NEXT:    ld 3, 48(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 3, 0(3)
+; CHECK-NEXT:    stxv 41, 0(3)
 ; CHECK-NEXT:    ld 3, 56(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 2, 0(3)
-; CHECK-NEXT:    ld 3, 112(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 1, 0(3)
+; CHECK-NEXT:    stxv 40, 0(10)
+; CHECK-NEXT:    stxv 39, 0(8)
+; CHECK-NEXT:    stxv 38, 0(3)
 ; CHECK-NEXT:    ld 3, 64(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 43, 0(8)
-; CHECK-NEXT:    stxv 42, 0(3)
-; CHECK-NEXT:    ld 3, 120(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 0, 0(3)
+; CHECK-NEXT:    stxv 33, 0(3)
 ; CHECK-NEXT:    ld 3, 72(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 41, 0(3)
+; CHECK-NEXT:    stxv 32, 0(3)
 ; CHECK-NEXT:    ld 3, 80(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 40, 0(3)
+; CHECK-NEXT:    stxv 37, 0(3)
+; CHECK-NEXT:    ld 3, 88(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 36, 0(3)
 ; CHECK-NEXT:    ld 3, 96(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 39, 0(3)
+; CHECK-NEXT:    stxv 13, 0(3)
 ; CHECK-NEXT:    ld 3, 104(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 38, 0(3)
+; CHECK-NEXT:    stxv 12, 0(3)
+; CHECK-NEXT:    ld 3, 112(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 11, 0(3)
+; CHECK-NEXT:    ld 3, 120(1) # 8-byte Folded Reload
+; CHECK-NEXT:    stxv 10, 0(3)
 ; CHECK-NEXT:    ld 3, 128(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 33, 0(3)
+; CHECK-NEXT:    stxv 8, 0(3)
 ; CHECK-NEXT:    ld 3, 136(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 32, 0(3)
+; CHECK-NEXT:    stxv 6, 0(3)
 ; CHECK-NEXT:    ld 3, 144(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 37, 0(3)
+; CHECK-NEXT:    stxv 5, 0(3)
 ; CHECK-NEXT:    ld 3, 152(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 36, 0(3)
+; CHECK-NEXT:    stxv 3, 0(3)
 ; CHECK-NEXT:    ld 3, 160(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 13, 0(3)
+; CHECK-NEXT:    stxv 1, 0(3)
 ; CHECK-NEXT:    ld 3, 168(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 12, 0(3)
+; CHECK-NEXT:    stxv 0, 0(3)
 ; CHECK-NEXT:    ld 3, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 11, 0(3)
+; CHECK-NEXT:    stxv 2, 0(3)
 ; CHECK-NEXT:    ld 3, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 9, 0(3)
+; CHECK-NEXT:    stxv 4, 0(3)
 ; CHECK-NEXT:    ld 3, 192(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 8, 0(3)
+; CHECK-NEXT:    stxv 9, 0(3)
 ; CHECK-NEXT:    ld 3, 200(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 6, 0(3)
-; CHECK-NEXT:    ld 3, 208(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 5, 0(3)
-; CHECK-NEXT:    ld 3, 216(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stxv 7, 0(3)
-; CHECK-NEXT:    ld 3, 88(1) # 8-byte Folded Reload
-; CHECK-NEXT:    stxv 10, 0(3)
 ; CHECK-NEXT:  .LBB0_7: # %_return_bb
-; CHECK-NEXT:    lxv 63, 400(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 62, 384(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 61, 368(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 60, 352(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 59, 336(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 58, 320(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 57, 304(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 56, 288(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 55, 272(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 54, 256(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 53, 240(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lxv 52, 224(1) # 16-byte Folded Reload
-; CHECK-NEXT:    lfd 31, 600(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 30, 592(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 29, 584(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 28, 576(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 27, 568(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lfd 26, 560(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 31, 552(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 30, 544(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 29, 536(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 28, 528(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 27, 520(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 26, 512(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 25, 504(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 24, 496(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 23, 488(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 22, 480(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 21, 472(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 20, 464(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 19, 456(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 18, 448(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 17, 440(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 16, 432(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 15, 424(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 14, 416(1) # 8-byte Folded Reload
-; CHECK-NEXT:    addi 1, 1, 608
+; CHECK-NEXT:    lxv 63, 384(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 62, 368(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 61, 352(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 60, 336(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 59, 320(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 58, 304(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 57, 288(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 56, 272(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 55, 256(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 54, 240(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 53, 224(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lxv 52, 208(1) # 16-byte Folded Reload
+; CHECK-NEXT:    lfd 31, 584(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 30, 576(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 29, 568(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 28, 560(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 27, 552(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lfd 26, 544(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 31, 536(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 30, 528(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 29, 520(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 28, 512(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 27, 504(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 26, 496(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 25, 488(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 24, 480(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 23, 472(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 22, 464(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 21, 456(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 20, 448(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 19, 440(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 18, 432(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 17, 424(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 16, 416(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 15, 408(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 14, 400(1) # 8-byte Folded Reload
+; CHECK-NEXT:    addi 1, 1, 592
 ; CHECK-NEXT:    blr
 entry:
   %_val_l_ = load i32, i32* %.l, align 4

diff  --git a/llvm/test/Transforms/LoopStrengthReduce/X86/nested-ptr-addrec.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/nested-ptr-addrec.ll
new file mode 100644
index 0000000000000..fc988f0ac9fa9
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/nested-ptr-addrec.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -loop-reduce < %s | FileCheck %s
+
+; Test an assertion failure from D113349, where the SCEV for the outer phi
+; gets computed and registered in the value map while attempting to compute it.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test() {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64* [ [[SCEVGEP:%.*]], [[LOOP_LATCH:%.*]] ], [ inttoptr (i64 -8 to i64*), [[ENTRY:%.*]] ]
+; CHECK-NEXT:    br i1 true, label [[LOOP_EXIT:%.*]], label [[LOOP2_PREHEADER:%.*]]
+; CHECK:       loop.exit:
+; CHECK-NEXT:    ret void
+; CHECK:       loop2.preheader:
+; CHECK-NEXT:    br label [[LOOP2_HEADER:%.*]]
+; CHECK:       loop2.header:
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i64* [ [[SCEVGEP2:%.*]], [[LOOP2_HEADER]] ], [ [[LSR_IV]], [[LOOP2_PREHEADER]] ]
+; CHECK-NEXT:    [[SCEVGEP2]] = getelementptr i64, i64* [[LSR_IV1]], i64 1
+; CHECK-NEXT:    [[SCEVGEP23:%.*]] = bitcast i64* [[SCEVGEP2]] to i8*
+; CHECK-NEXT:    br i1 false, label [[LOOP2_HEADER]], label [[LOOP2_CONT:%.*]]
+; CHECK:       loop2.cont:
+; CHECK-NEXT:    [[V:%.*]] = load i8, i8* [[SCEVGEP23]], align 1
+; CHECK-NEXT:    [[C:%.*]] = icmp ne i8 [[V]], 0
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP_EXIT]], label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i64, i64* [[LSR_IV]], i64 1
+; CHECK-NEXT:    br label [[LOOP_HEADER]]
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %ptr = phi i64* [ %ptr.next, %loop.latch ], [ null, %entry ]
+  br i1 true, label %loop.exit, label %loop2.preheader
+
+loop.exit:
+  ret void
+
+loop2.preheader:
+  br label %loop2.header
+
+loop2.header:
+  %ptr2 = phi i64* [ %ptr, %loop2.preheader ], [ %ptr2.next, %loop2.header ]
+  %ptr2.next = getelementptr inbounds i64, i64* %ptr2, i64 1
+  br i1 false, label %loop2.header, label %loop2.cont
+
+loop2.cont:
+  %ptr2.i8 = bitcast i64* %ptr2 to i8*
+  %v = load i8, i8* %ptr2.i8
+  %c = icmp ne i8 %v, 0
+  br i1 %c, label %loop.exit, label %loop.latch
+
+loop.latch:
+  %ptr.next = getelementptr inbounds i64, i64* %ptr, i64 1
+  br label %loop.header
+}


        


More information about the llvm-commits mailing list