[llvm] [UTC] support debug output from LDist (PR #93208)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Thu May 23 08:48:38 PDT 2024


https://github.com/artagnon created https://github.com/llvm/llvm-project/pull/93208

Tweak the LoopDistribute debug output to be stable, and extend update_analyze_test_checks.py trivially to support this output.

-- 8< --
Based on #93051.

>From 81a4643518d1749b6cbd1a77b871e206196bdbe5 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Wed, 22 May 2024 16:16:33 +0100
Subject: [PATCH 1/2] LoopInfo: introduce Loop::getLocStr; unify debug output

Introduce a Loop::getLocStr stolen from LoopVectorize's static function
getDebugLocString in order to have uniform debug output headers across
LoopVectorize, LoopAccessAnalysis, and LoopDistribute. The motivation
for this change is to have UpdateTestChecks recognize the headers and
automatically generate CHECK lines for debug output, with minimal
special-casing.
---
 llvm/include/llvm/Analysis/LoopInfo.h         |  6 ++-
 llvm/lib/Analysis/LoopAccessAnalysis.cpp      |  8 ++--
 llvm/lib/Analysis/LoopInfo.cpp                | 11 +++++
 llvm/lib/Transforms/Scalar/LoopDistribute.cpp |  9 ++--
 .../Transforms/Vectorize/LoopVectorize.cpp    | 27 ++----------
 .../LoopAccessAnalysis/print-order.ll         |  6 ++-
 .../scalable-vector-regression-tests.ll       |  4 +-
 .../Transforms/LoopDistribute/debug-print.ll  | 43 +++++++++++++++++++
 .../ARM/mve-hoist-runtime-checks.ll           |  2 +-
 .../LoopVectorize/runtime-checks-hoist.ll     | 20 ++++-----
 10 files changed, 90 insertions(+), 46 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopDistribute/debug-print.ll

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 52084630560c5..269a9efeac642 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -385,6 +385,11 @@ class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
   /// Return the source code span of the loop.
   LocRange getLocRange() const;
 
+  /// Return a string containing the location of the loop (file name + line
+  /// number if present, otherwise module name). Meant to be used for debug
+  /// printing within LLVM_DEBUG.
+  std::string getLocStr() const;
+
   StringRef getName() const {
     if (BasicBlock *Header = getHeader())
       if (Header->hasName())
@@ -690,7 +695,6 @@ llvm::MDNode *
 makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
                                llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
                                llvm::ArrayRef<llvm::MDNode *> AddAttrs);
-
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bc8b9b8479e4f..a537805bfd8b0 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2374,9 +2374,9 @@ void MemoryDepChecker::Dependence::print(
 
 bool LoopAccessInfo::canAnalyzeLoop() {
   // We need to have a loop header.
-  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
-                    << TheLoop->getHeader()->getParent()->getName() << ": "
-                    << TheLoop->getHeader()->getName() << '\n');
+  LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
+                    << TheLoop->getHeader()->getParent()->getName() << "' from "
+                    << TheLoop->getLocStr() << "\n");
 
   // We can only analyze innermost loops.
   if (!TheLoop->isInnermost()) {
@@ -2403,6 +2403,8 @@ bool LoopAccessInfo::canAnalyzeLoop() {
     return false;
   }
 
+  LLVM_DEBUG(dbgs() << "LAA: Found a loop: " << TheLoop->getHeader()->getName()
+                    << "\n");
   return true;
 }
 
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 369ab087ffc0f..d7bddb0bae1b6 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -663,6 +663,17 @@ Loop::LocRange Loop::getLocRange() const {
   return LocRange();
 }
 
+std::string Loop::getLocStr() const {
+  std::string Result;
+  raw_string_ostream OS(Result);
+  if (const DebugLoc LoopDbgLoc = getStartLoc())
+    LoopDbgLoc.print(OS);
+  else
+    // Just print the module name.
+    OS << getHeader()->getParent()->getParent()->getModuleIdentifier();
+  return Result;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); }
 
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 626888c74bad8..9c352b94fe9e4 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -659,9 +659,9 @@ class LoopDistributeForLoop {
   bool processLoop() {
     assert(L->isInnermost() && "Only process inner loops.");
 
-    LLVM_DEBUG(dbgs() << "\nLDist: In \""
-                      << L->getHeader()->getParent()->getName()
-                      << "\" checking " << *L << "\n");
+    LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
+                      << L->getHeader()->getParent()->getName() << "' from "
+                      << L->getLocStr() << "\n");
 
     // Having a single exit block implies there's also one exiting block.
     if (!L->getExitBlock())
@@ -686,6 +686,9 @@ class LoopDistributeForLoop {
     if (!Dependences || Dependences->empty())
       return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
 
+    LLVM_DEBUG(dbgs() << "LDist: Found a loop: " << L->getHeader()->getName()
+                      << "\n");
+
     InstPartitionContainer Partitions(L, LI, DT);
 
     // First, go through each memory operation and assign them to consecutive
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6d64aaa75922b..2726df59f464d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1027,23 +1027,6 @@ static void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop,
 
 } // end namespace llvm
 
-#ifndef NDEBUG
-/// \return string containing a file name and a line # for the given loop.
-static std::string getDebugLocString(const Loop *L) {
-  std::string Result;
-  if (L) {
-    raw_string_ostream OS(Result);
-    if (const DebugLoc LoopDbgLoc = L->getStartLoc())
-      LoopDbgLoc.print(OS);
-    else
-      // Just print the module name.
-      OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
-    OS.flush();
-  }
-  return Result;
-}
-#endif
-
 namespace llvm {
 
 // Loop vectorization cost-model hints how the scalar epilogue loop should be
@@ -9836,13 +9819,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   assert((EnableVPlanNativePath || L->isInnermost()) &&
          "VPlan-native path is not enabled. Only process inner loops.");
 
-#ifndef NDEBUG
-  const std::string DebugLocStr = getDebugLocString(L);
-#endif /* NDEBUG */
-
   LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '"
                     << L->getHeader()->getParent()->getName() << "' from "
-                    << DebugLocStr << "\n");
+                    << L->getLocStr() << "\n");
 
   LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
 
@@ -10112,7 +10091,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     });
   } else if (VectorizeLoop && !InterleaveLoop) {
     LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width
-                      << ") in " << DebugLocStr << '\n');
+                      << ") in " << L->getLocStr() << '\n');
     ORE->emit([&]() {
       return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first,
                                         L->getStartLoc(), L->getHeader())
@@ -10120,7 +10099,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     });
   } else if (VectorizeLoop && InterleaveLoop) {
     LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width
-                      << ") in " << DebugLocStr << '\n');
+                      << ") in " << L->getLocStr() << '\n');
     LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
   }
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
index 65f94a7d8fdb4..1e53eda68b1a2 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
@@ -6,7 +6,8 @@
 ;    A[i+1] = A[i] + 1;
 ; }
 
-; CHECK: LAA: Found a loop in negative_step: loop
+; CHECK-LABEL: 'negative_step'
+; CHECK: LAA: Found a loop: loop
 ; CHECK: LAA: Checking memory dependencies
 ; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
 ; CHECK-NEXT: LAA: Distance for   store i32 %add, ptr %gep.A.plus.1, align 4 to   %l = load i32, ptr %gep.A, align 4: -4
@@ -37,7 +38,8 @@ exit:
 ;    A[i-1] = A[i] + 1;
 ; }
 
-; CHECK: LAA: Found a loop in positive_step: loop
+; CHECK-LABEL: 'positive_step'
+; CHECK: LAA: Found a loop: loop
 ; CHECK: LAA: Checking memory dependencies
 ; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
 ; CHECK-NEXT: LAA: Distance for   %l = load i32, ptr %gep.A, align 4 to   store i32 %add, ptr %gep.A.minus.1, align 4: -4
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
index 82a884a637259..8019bc76d2f0f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
@@ -24,7 +24,7 @@ loop.end:
   ret void
 }
 
-; CHECK-LABEL: LAA: Found a loop in regression_test_loop_access_scalable_typesize
+; CHECK-LABEL: 'regression_test_loop_access_scalable_typesize'
 ; CHECK: LAA: Bad stride - Scalable object:
 define void @regression_test_loop_access_scalable_typesize(ptr %input_ptr) {
 entry:
@@ -42,7 +42,7 @@ end:
   ret void
 }
 
-; CHECK-LABEL: LAA: Found a loop in regression_test_loop_access_scalable_typesize_nonscalable_object
+; CHECK-LABEL: 'regression_test_loop_access_scalable_typesize_nonscalable_object'
 ; CHECK: LAA: Bad stride - Scalable object:
 define void @regression_test_loop_access_scalable_typesize_nonscalable_object(ptr %input_ptr) {
 entry:
diff --git a/llvm/test/Transforms/LoopDistribute/debug-print.ll b/llvm/test/Transforms/LoopDistribute/debug-print.ll
new file mode 100644
index 0000000000000..00a97b4b8c747
--- /dev/null
+++ b/llvm/test/Transforms/LoopDistribute/debug-print.ll
@@ -0,0 +1,43 @@
+; RUN: opt -passes=loop-distribute -enable-loop-distribute \
+; RUN:   -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
+
+define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, i64 %stride) {
+; CHECK-LABEL: 'f'
+; CHECK:        LDist: Found a loop: for.body
+; CHECK:        Backward dependences:
+; CHECK-NEXT:     Backward:
+; CHECK-NEXT:         %load.a = load i32, ptr %gep.a, align 4 ->
+; CHECK-NEXT:         store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK:        Seeded partitions:
+; CHECK:        Partition 0
+; CHECK:        Partition 1
+; CHECK:        Partition 2
+; CHECK:        Partition 3
+; CHECK:        Distributing loop
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  %gep.a = getelementptr inbounds i32, ptr %a, i64 %ind
+  %load.a = load i32, ptr %gep.a, align 4
+  %gep.b = getelementptr inbounds i32, ptr %b, i64 %ind
+  %load.b = load i32, ptr %gep.b, align 4
+  %mul.a = mul i32 %load.b, %load.a
+  %add = add nuw nsw i64 %ind, 1
+  %gep.a.plus4 = getelementptr inbounds i32, ptr %a, i64 %add
+  store i32 %mul.a, ptr %gep.a.plus4, align 4
+  %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
+  %loadD = load i32, ptr %gep.d, align 4
+  %mul = mul i64 %ind, %stride
+  %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
+  %load.strided.a = load i32, ptr %gep.strided.a, align 4
+  %mul.c = mul i32 %loadD, %load.strided.a
+  %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
+  store i32 %mul.c, ptr %gep.c, align 4
+  %exitcond = icmp eq i64 %add, 20
+  br i1 %exitcond, label %exit, label %for.body
+
+exit:                                             ; preds = %for.body
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll
index 438321e0fb0cc..9293420ac5030 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll
@@ -18,7 +18,7 @@ target triple = "thumbv8.1m.main-none-unknown-eabi"
 ; NOTE: The strides of the starting address values in the inner loop differ, i.e.
 ; '(i * (n + 1))' vs '(i * n)'.
 
-; DEBUG-LABEL: LAA: Found a loop in diff_checks:
+; DEBUG-LABEL: 'diff_checks'
 ; DEBUG:      LAA: Not creating diff runtime check, since these  cannot be hoisted out of the outer loop
 ; DEBUG:      LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
index c4f9c404a9265..f0a6f132cff20 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll
@@ -17,7 +17,7 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 ; NOTE: The strides of the starting address values in the inner loop differ, i.e.
 ; '(i * (n + 1))' vs '(i * n)'.
 
-; DEBUG-LABEL: LAA: Found a loop in diff_checks:
+; DEBUG-LABEL: 'diff_checks'
 ; DEBUG:      LAA: Not creating diff runtime check, since these  cannot be hoisted out of the outer loop
 ; DEBUG:      LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
@@ -149,7 +149,7 @@ outer.exit:
 ; We decide to do full runtime checks here (as opposed to diff checks) due to
 ; the additional load of 'dst[(i * n) + j]' in the loop.
 
-; DEBUG-LABEL: LAA: Found a loop in full_checks:
+; DEBUG-LABEL: 'full_checks'
 ; DEBUG-NOT: LAA: Creating diff runtime check for:
 ; DEBUG: LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
@@ -272,7 +272,7 @@ outer.exit:
 ; is accessed with a higher stride compared src, and therefore the inner loop
 ; runtime checks will vary for each outer loop iteration.
 
-; DEBUG-LABEL: LAA: Found a loop in full_checks_diff_strides:
+; DEBUG-LABEL: 'full_checks_diff_strides'
 ; DEBUG-NOT: LAA: Creating diff runtime check for:
 ; DEBUG: LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
@@ -402,7 +402,7 @@ outer.exit:
 ;   }
 ; }
 
-; DEBUG-LABEL: LAA: Found a loop in diff_checks_src_start_invariant:
+; DEBUG-LABEL: 'diff_checks_src_start_invariant'
 ; DEBUG-NOT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
 
 define void @diff_checks_src_start_invariant(ptr nocapture noundef writeonly %dst, ptr nocapture noundef readonly %src, i32 noundef %m, i32 noundef %n) {
@@ -508,7 +508,7 @@ outer.loop.exit:
 ;   }
 ; }
 
-; DEBUG-LABEL: LAA: Found a loop in full_checks_src_start_invariant:
+; DEBUG-LABEL: 'full_checks_src_start_invariant'
 ; DEBUG:      LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
 ; DEBUG-NEXT: Start: %dst End: ((4 * (zext i32 %m to i64) * (zext i32 %n to i64)) + %dst)
@@ -629,7 +629,7 @@ outer.loop.exit:
 ; The 'src' access varies with the outermost loop, rather than the parent of the
 ; innermost loop. Hence we don't expand `src`, although in theory we could do.
 
-; DEBUG-LABEL: LAA: Found a loop in triple_nested_loop_mixed_access:
+; DEBUG-LABEL: 'triple_nested_loop_mixed_access'
 ; DEBUG-NOT:  LAA: Creating diff runtime check for:
 ; DEBUG:      LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
@@ -795,7 +795,7 @@ exit:
 ; }
 ; Outer loop trip count is uncomputable so we shouldn't expand the ranges.
 
-; DEBUG-LABEL: LAA: Found a loop in uncomputable_outer_tc:
+; DEBUG-LABEL: 'uncomputable_outer_tc'
 ; DEBUG:      LAA: Adding RT check for range:
 ; DEBUG-NEXT: Start: {%dst,+,(4 * (zext i32 (1 + %n) to i64))<nuw><nsw>}<%outer.loop> End: {((4 * (zext i32 %n to i64))<nuw><nsw> + %dst),+,(4 * (zext i32 (1 + %n) to i64))<nuw><nsw>}<%outer.loop>
 ; DEBUG-NEXT: LAA: Adding RT check for range:
@@ -945,7 +945,7 @@ while.end:
 ; Inner IV is decreasing, but this isn't a problem and we can still expand the
 ; runtime checks correctly to cover the whole loop.
 
-; DEBUG-LABEL: LAA: Found a loop in decreasing_inner_iv:
+; DEBUG-LABEL: 'decreasing_inner_iv'
 ; DEBUG:      LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
 ; DEBUG-NEXT: LAA: ... but need to check stride is positive: (4 * (sext i32 %stride1 to i64))<nsw>
@@ -1111,7 +1111,7 @@ exit:
 ; Outer IV is decreasing, but the direction of memory accesses also depends
 ; upon the signedness of stride1.
 
-; DEBUG-LABEL: LAA: Found a loop in decreasing_outer_iv:
+; DEBUG-LABEL: 'decreasing_outer_iv'
 ; DEBUG:      LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
 ; DEBUG-NEXT: LAA: ... but need to check stride is positive: (-4 * (sext i32 %stride1 to i64))<nsw>
@@ -1271,7 +1271,7 @@ exit:
 ; }
 
 
-; DEBUG-LABEL: LAA: Found a loop in unknown_inner_stride:
+; DEBUG-LABEL: 'unknown_inner_stride'
 ; DEBUG:      LAA: Adding RT check for range:
 ; DEBUG-NEXT: LAA: Expanded RT check for range to include outer loop in order to permit hoisting
 ; DEBUG-NEXT: Start: %dst End: ((4 * (zext i32 %n to i64))<nuw><nsw> + (4 * (zext i32 (1 + %n) to i64) * (-1 + (zext i32 %m to i64))<nsw>) + %dst)

>From 3cf61710277c2704dab09bda3aa9be8542357af1 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <r at artagnon.com>
Date: Thu, 23 May 2024 16:33:20 +0100
Subject: [PATCH 2/2] [UTC] support debug output from LDist

Tweak the LoopDistribute debug output to be stable, and extend
update_analyze_test_checks.py trivially to support this output.
---
 llvm/lib/Transforms/Scalar/LoopDistribute.cpp |  58 ++++-----
 .../Transforms/LoopDistribute/debug-print.ll  |  94 ++++++++++++--
 .../Inputs/loop-distribute.ll                 |  27 ++++
 .../Inputs/loop-distribute.ll.expected        | 118 ++++++++++++++++++
 .../loop-distribute.test                      |   6 +
 llvm/utils/UpdateTestChecks/common.py         |   6 +-
 llvm/utils/update_analyze_test_checks.py      |  10 +-
 7 files changed, 273 insertions(+), 46 deletions(-)
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected
 create mode 100644 llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/loop-distribute.test

diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 9c352b94fe9e4..abd46dc6c1b63 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -26,7 +26,7 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
@@ -120,7 +120,7 @@ namespace {
 /// Maintains the set of instructions of the loop for a partition before
 /// cloning.  After cloning, it hosts the new loop.
 class InstPartition {
-  using InstructionSet = SmallPtrSet<Instruction *, 8>;
+  using InstructionSet = SetVector<Instruction *>;
 
 public:
   InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
@@ -166,7 +166,7 @@ class InstPartition {
       // Insert instructions from the loop that we depend on.
       for (Value *V : I->operand_values()) {
         auto *I = dyn_cast<Instruction>(V);
-        if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
+        if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
           Worklist.push_back(I);
       }
     }
@@ -231,17 +231,16 @@ class InstPartition {
     }
   }
 
-  void print() const {
-    if (DepCycle)
-      dbgs() << "  (cycle)\n";
+  void print(raw_ostream &OS) const {
+    OS << (DepCycle ? " (cycle)\n" : "\n");
     for (auto *I : Set)
       // Prefix with the block name.
-      dbgs() << "  " << I->getParent()->getName() << ":" << *I << "\n";
+      OS << "  " << I->getParent()->getName() << ":" << *I << "\n";
   }
 
-  void printBlocks() const {
+  void printBlocks(raw_ostream &OS) const {
     for (auto *BB : getDistributedLoop()->getBlocks())
-      dbgs() << *BB;
+      OS << *BB;
   }
 
 private:
@@ -368,11 +367,11 @@ class InstPartitionContainer {
           std::tie(LoadToPart, NewElt) =
               LoadToPartition.insert(std::make_pair(Inst, PartI));
           if (!NewElt) {
-            LLVM_DEBUG(dbgs()
-                       << "Merging partitions due to this load in multiple "
-                       << "partitions: " << PartI << ", " << LoadToPart->second
-                       << "\n"
-                       << *Inst << "\n");
+            LLVM_DEBUG(
+                dbgs()
+                << "LDist: Merging partitions due to this load in multiple "
+                << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
+                << *Inst << "\n");
 
             auto PartJ = I;
             do {
@@ -530,8 +529,8 @@ class InstPartitionContainer {
   void print(raw_ostream &OS) const {
     unsigned Index = 0;
     for (const auto &P : PartitionContainer) {
-      OS << "Partition " << Index++ << " (" << &P << "):\n";
-      P.print();
+      OS << "LDist: Partition " << Index++ << ":";
+      P.print(OS);
     }
   }
 
@@ -545,11 +544,11 @@ class InstPartitionContainer {
   }
 #endif
 
-  void printBlocks() const {
+  void printBlocks(raw_ostream &OS) const {
     unsigned Index = 0;
     for (const auto &P : PartitionContainer) {
-      dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
-      P.printBlocks();
+      OS << "LDist: Partition " << Index++ << ":";
+      P.printBlocks(OS);
     }
   }
 
@@ -628,7 +627,7 @@ class MemoryInstructionDependences {
       const SmallVectorImpl<Dependence> &Dependences) {
     Accesses.append(Instructions.begin(), Instructions.end());
 
-    LLVM_DEBUG(dbgs() << "Backward dependences:\n");
+    LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
     for (const auto &Dep : Dependences)
       if (Dep.isPossiblyBackward()) {
         // Note that the designations source and destination follow the program
@@ -738,7 +737,7 @@ class LoopDistributeForLoop {
     for (auto *Inst : DefsUsedOutside)
       Partitions.addToNewNonCyclicPartition(Inst);
 
-    LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
+    LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
     if (Partitions.getSize() < 2)
       return fail("CantIsolateUnsafeDeps",
                   "cannot isolate unsafe dependencies");
@@ -746,19 +745,19 @@ class LoopDistributeForLoop {
     // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
     // should be able to vectorize these together.
     Partitions.mergeBeforePopulating();
-    LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
+    LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
     if (Partitions.getSize() < 2)
       return fail("CantIsolateUnsafeDeps",
                   "cannot isolate unsafe dependencies");
 
     // Now, populate the partitions with non-memory operations.
     Partitions.populateUsedSet();
-    LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
+    LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
 
     // In order to preserve original lexical order for loads, keep them in the
     // partition that we set up in the MemoryInstructionDependences loop.
     if (Partitions.mergeToAvoidDuplicatedLoads()) {
-      LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
+      LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
                         << Partitions);
       if (Partitions.getSize() < 2)
         return fail("CantIsolateUnsafeDeps",
@@ -782,7 +781,8 @@ class LoopDistributeForLoop {
     if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
       return fail("HeuristicDisabled", "distribution heuristic disabled");
 
-    LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
+    LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
+                      << L->getHeader()->getName() << "\n");
     // We're done forming the partitions set up the reverse mapping from
     // instructions to partitions.
     Partitions.setupPartitionIdOnInstructions();
@@ -810,7 +810,7 @@ class LoopDistributeForLoop {
 
       MDNode *OrigLoopID = L->getLoopID();
 
-      LLVM_DEBUG(dbgs() << "\nPointers:\n");
+      LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
       LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
       LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
       LVer.versionLoop(DefsUsedOutside);
@@ -833,8 +833,8 @@ class LoopDistributeForLoop {
     // Now, we remove the instruction from each loop that don't belong to that
     // partition.
     Partitions.removeUnusedInsts();
-    LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
-    LLVM_DEBUG(Partitions.printBlocks());
+    LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
+    LLVM_DEBUG(Partitions.printBlocks(dbgs()));
 
     if (LDistVerify) {
       LI->verify(*DT);
@@ -856,7 +856,7 @@ class LoopDistributeForLoop {
     LLVMContext &Ctx = F->getContext();
     bool Forced = isForced().value_or(false);
 
-    LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
+    LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
 
     // With Rpass-missed report that distribution failed.
     ORE->emit([&]() {
diff --git a/llvm/test/Transforms/LoopDistribute/debug-print.ll b/llvm/test/Transforms/LoopDistribute/debug-print.ll
index 00a97b4b8c747..3727c78b17c92 100644
--- a/llvm/test/Transforms/LoopDistribute/debug-print.ll
+++ b/llvm/test/Transforms/LoopDistribute/debug-print.ll
@@ -1,19 +1,91 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt -passes=loop-distribute -enable-loop-distribute \
 ; RUN:   -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
 
 define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, i64 %stride) {
 ; CHECK-LABEL: 'f'
-; CHECK:        LDist: Found a loop: for.body
-; CHECK:        Backward dependences:
-; CHECK-NEXT:     Backward:
-; CHECK-NEXT:         %load.a = load i32, ptr %gep.a, align 4 ->
-; CHECK-NEXT:         store i32 %mul.a, ptr %gep.a.plus4, align 4
-; CHECK:        Seeded partitions:
-; CHECK:        Partition 0
-; CHECK:        Partition 1
-; CHECK:        Partition 2
-; CHECK:        Partition 3
-; CHECK:        Distributing loop
+; CHECK-NEXT:  LDist: Found a loop: for.body
+; CHECK-NEXT:  LDist: Backward dependences:
+; CHECK-NEXT:    Backward:
+; CHECK-NEXT:        %load.a = load i32, ptr %gep.a, align 4 ->
+; CHECK-NEXT:        store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT:  LDist: Seeded partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT:    for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT:  LDist: Partition 2:
+; CHECK-NEXT:    for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT:  LDist: Partition 3:
+; CHECK-NEXT:    for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT:  LDist: Merged partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT:    for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT:    for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT:  LDist: Populated partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT:    for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT:    for.body: br i1 %exitcond, label %exit, label %for.body
+; CHECK-NEXT:    for.body: %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT:    for.body: %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT:    for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+; CHECK-NEXT:    for.body: %mul.a = mul i32 %load.b, %load.a
+; CHECK-NEXT:    for.body: %gep.a.plus4 = getelementptr inbounds i32, ptr %a, i64 %add
+; CHECK-NEXT:    for.body: %gep.b = getelementptr inbounds i32, ptr %b, i64 %ind
+; CHECK-NEXT:    for.body: %gep.a = getelementptr inbounds i32, ptr %a, i64 %ind
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT:    for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT:    for.body: br i1 %exitcond, label %exit, label %for.body
+; CHECK-NEXT:    for.body: %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT:    for.body: %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT:    for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+; CHECK-NEXT:    for.body: %mul.c = mul i32 %loadD, %load.strided.a
+; CHECK-NEXT:    for.body: %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
+; CHECK-NEXT:    for.body: %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
+; CHECK-NEXT:    for.body: %mul = mul i64 %ind, %stride
+; CHECK-NEXT:    for.body: %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
+; CHECK-NEXT:  LDist: Distributing loop: for.body
+; CHECK-NEXT:  LDist: Pointers:
+; CHECK-NEXT:  LDist: After removing unused Instrs:
+; CHECK-NEXT:  LDist: Partition 0:
+; CHECK-NEXT:  for.body.ldist1: ; preds = %for.body.ldist1, %for.body.ph.ldist1
+; CHECK-NEXT:    %ind.ldist1 = phi i64 [ 0, %for.body.ph.ldist1 ], [ %add.ldist1, %for.body.ldist1 ]
+; CHECK-NEXT:    %gep.a.ldist1 = getelementptr inbounds i32, ptr %a, i64 %ind.ldist1
+; CHECK-NEXT:    %load.a.ldist1 = load i32, ptr %gep.a.ldist1, align 4
+; CHECK-NEXT:    %gep.b.ldist1 = getelementptr inbounds i32, ptr %b, i64 %ind.ldist1
+; CHECK-NEXT:    %load.b.ldist1 = load i32, ptr %gep.b.ldist1, align 4
+; CHECK-NEXT:    %mul.a.ldist1 = mul i32 %load.b.ldist1, %load.a.ldist1
+; CHECK-NEXT:    %add.ldist1 = add nuw nsw i64 %ind.ldist1, 1
+; CHECK-NEXT:    %gep.a.plus4.ldist1 = getelementptr inbounds i32, ptr %a, i64 %add.ldist1
+; CHECK-NEXT:    store i32 %mul.a.ldist1, ptr %gep.a.plus4.ldist1, align 4
+; CHECK-NEXT:    %exitcond.ldist1 = icmp eq i64 %add.ldist1, 20
+; CHECK-NEXT:    br i1 %exitcond.ldist1, label %for.body.ph, label %for.body.ldist1
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:  for.body: ; preds = %for.body, %for.body.ph
+; CHECK-NEXT:    %ind = phi i64 [ 0, %for.body.ph ], [ %add, %for.body ]
+; CHECK-NEXT:    %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT:    %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
+; CHECK-NEXT:    %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT:    %mul = mul i64 %ind, %stride
+; CHECK-NEXT:    %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
+; CHECK-NEXT:    %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT:    %mul.c = mul i32 %loadD, %load.strided.a
+; CHECK-NEXT:    %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
+; CHECK-NEXT:    store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT:    %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT:    br i1 %exitcond, label %exit.loopexit1, label %for.body
+;
 entry:
   br label %for.body
 
diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll
new file mode 100644
index 0000000000000..48f80533c6379
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll
@@ -0,0 +1,27 @@
+; RUN: opt -passes=loop-distribute -enable-loop-distribute \
+; RUN:   -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
+
+define void @ldist(i1 %c, ptr %A, ptr %B, ptr %C) {
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %if.end, %entry
+  %iv = phi i16 [ 0, %entry ], [ %iv.next, %if.end ]
+  %lv = load i16, ptr %A, align 1
+  store i16 %lv, ptr %A, align 1
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %lv2 = load i16, ptr %A, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  %c.sink = phi ptr [ %B, %if.then ], [ %C, %for.body ]
+  %lv3 = load i16, ptr %c.sink
+  %iv.next = add nuw nsw i16 %iv, 1
+  %tobool.not = icmp eq i16 %iv.next, 1000
+  br i1 %tobool.not, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %if.end
+  ret void
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected
new file mode 100644
index 0000000000000..303543444fa3d
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
+; RUN: opt -passes=loop-distribute -enable-loop-distribute \
+; RUN:   -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
+
+define void @ldist(i1 %c, ptr %A, ptr %B, ptr %C) {
+; CHECK-LABEL: 'ldist'
+; CHECK-NEXT:  LDist: Found a loop: for.body
+; CHECK-NEXT:  LDist: Backward dependences:
+; CHECK-NEXT:    Unknown:
+; CHECK-NEXT:        %lv = load i16, ptr %A, align 1 ->
+; CHECK-NEXT:        store i16 %lv, ptr %A, align 1
+; CHECK-NEXT:    Unknown:
+; CHECK-NEXT:        store i16 %lv, ptr %A, align 1 ->
+; CHECK-NEXT:        %lv2 = load i16, ptr %A, align 1
+; CHECK-NEXT:  LDist: Seeded partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %lv = load i16, ptr %A, align 1
+; CHECK-NEXT:    for.body: store i16 %lv, ptr %A, align 1
+; CHECK-NEXT:    if.then: %lv2 = load i16, ptr %A, align 1
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    if.end: %lv3 = load i16, ptr %c.sink, align 2
+; CHECK-NEXT:  LDist: Partition 2:
+; CHECK-NEXT:    if.end: %lv3 = load i16, ptr %c.sink, align 2
+; CHECK-NEXT:  LDist: Merged partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %lv = load i16, ptr %A, align 1
+; CHECK-NEXT:    for.body: store i16 %lv, ptr %A, align 1
+; CHECK-NEXT:    if.then: %lv2 = load i16, ptr %A, align 1
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    if.end: %lv3 = load i16, ptr %c.sink, align 2
+; CHECK-NEXT:  LDist: Populated partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %lv = load i16, ptr %A, align 1
+; CHECK-NEXT:    for.body: store i16 %lv, ptr %A, align 1
+; CHECK-NEXT:    if.then: %lv2 = load i16, ptr %A, align 1
+; CHECK-NEXT:    for.body: br i1 %c, label %if.then, label %if.end
+; CHECK-NEXT:    if.then: br label %if.end
+; CHECK-NEXT:    if.end: br i1 %tobool.not, label %for.end.loopexit, label %for.body
+; CHECK-NEXT:    if.end: %tobool.not = icmp eq i16 %iv.next, 1000
+; CHECK-NEXT:    if.end: %iv.next = add nuw nsw i16 %iv, 1
+; CHECK-NEXT:    for.body: %iv = phi i16 [ 0, %entry ], [ %iv.next, %if.end ]
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    if.end: %lv3 = load i16, ptr %c.sink, align 2
+; CHECK-NEXT:    for.body: br i1 %c, label %if.then, label %if.end
+; CHECK-NEXT:    if.then: br label %if.end
+; CHECK-NEXT:    if.end: br i1 %tobool.not, label %for.end.loopexit, label %for.body
+; CHECK-NEXT:    if.end: %tobool.not = icmp eq i16 %iv.next, 1000
+; CHECK-NEXT:    if.end: %iv.next = add nuw nsw i16 %iv, 1
+; CHECK-NEXT:    for.body: %iv = phi i16 [ 0, %entry ], [ %iv.next, %if.end ]
+; CHECK-NEXT:    if.end: %c.sink = phi ptr [ %B, %if.then ], [ %C, %for.body ]
+; CHECK-NEXT:  LDist: Distributing loop: for.body
+; CHECK-NEXT:  LDist: Pointers:
+; CHECK-NEXT:  Check 0:
+; CHECK-NEXT:    Comparing group ([[GRP1:0x[0-9a-f]+]]):
+; CHECK-NEXT:    ptr %A
+; CHECK-NEXT:    ptr %A
+; CHECK-NEXT:    Against group ([[GRP2:0x[0-9a-f]+]]):
+; CHECK-NEXT:    ptr %C
+; CHECK-NEXT:  Check 1:
+; CHECK-NEXT:    Comparing group ([[GRP1]]):
+; CHECK-NEXT:    ptr %A
+; CHECK-NEXT:    ptr %A
+; CHECK-NEXT:    Against group ([[GRP3:0x[0-9a-f]+]]):
+; CHECK-NEXT:    ptr %B
+; CHECK-NEXT:  LDist: After removing unused Instrs:
+; CHECK-NEXT:  LDist: Partition 0:
+; CHECK-NEXT:  for.body.ldist1: ; preds = %if.end.ldist1, %for.body.ph.ldist1
+; CHECK-NEXT:    %iv.ldist1 = phi i16 [ 0, %for.body.ph.ldist1 ], [ %iv.next.ldist1, %if.end.ldist1 ]
+; CHECK-NEXT:    %lv.ldist1 = load i16, ptr %A, align 1, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    store i16 %lv.ldist1, ptr %A, align 1, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    br i1 %c, label %if.then.ldist1, label %if.end.ldist1
+; CHECK-EMPTY:
+; CHECK-NEXT:  if.then.ldist1: ; preds = %for.body.ldist1
+; CHECK-NEXT:    %lv2.ldist1 = load i16, ptr %A, align 1, !alias.scope !0, !noalias !3
+; CHECK-NEXT:    br label %if.end.ldist1
+; CHECK-EMPTY:
+; CHECK-NEXT:  if.end.ldist1: ; preds = %if.then.ldist1, %for.body.ldist1
+; CHECK-NEXT:    %iv.next.ldist1 = add nuw nsw i16 %iv.ldist1, 1
+; CHECK-NEXT:    %tobool.not.ldist1 = icmp eq i16 %iv.next.ldist1, 1000
+; CHECK-NEXT:    br i1 %tobool.not.ldist1, label %for.body.ph, label %for.body.ldist1
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:  for.body: ; preds = %if.end, %for.body.ph
+; CHECK-NEXT:    %iv = phi i16 [ 0, %for.body.ph ], [ %iv.next, %if.end ]
+; CHECK-NEXT:    br i1 %c, label %if.then, label %if.end
+; CHECK-EMPTY:
+; CHECK-NEXT:  if.then: ; preds = %for.body
+; CHECK-NEXT:    br label %if.end
+; CHECK-EMPTY:
+; CHECK-NEXT:  if.end: ; preds = %if.then, %for.body
+; CHECK-NEXT:    %c.sink = phi ptr [ %B, %if.then ], [ %C, %for.body ]
+; CHECK-NEXT:    %lv3 = load i16, ptr %c.sink, align 2
+; CHECK-NEXT:    %iv.next = add nuw nsw i16 %iv, 1
+; CHECK-NEXT:    %tobool.not = icmp eq i16 %iv.next, 1000
+; CHECK-NEXT:    br i1 %tobool.not, label %for.end.loopexit.loopexit6, label %for.body
+;
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %if.end, %entry
+  %iv = phi i16 [ 0, %entry ], [ %iv.next, %if.end ]
+  %lv = load i16, ptr %A, align 1
+  store i16 %lv, ptr %A, align 1
+  br i1 %c, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body
+  %lv2 = load i16, ptr %A, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body
+  %c.sink = phi ptr [ %B, %if.then ], [ %C, %for.body ]
+  %lv3 = load i16, ptr %c.sink
+  %iv.next = add nuw nsw i16 %iv, 1
+  %tobool.not = icmp eq i16 %iv.next, 1000
+  br i1 %tobool.not, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:                                 ; preds = %if.end
+  ret void
+}
diff --git a/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/loop-distribute.test b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/loop-distribute.test
new file mode 100644
index 0000000000000..65a44fafc7d5a
--- /dev/null
+++ b/llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/loop-distribute.test
@@ -0,0 +1,6 @@
+## Basic test checking that update_analyze_test_checks.py works correctly
+# RUN: cp -f %S/Inputs/loop-distribute.ll %t.ll && %update_analyze_test_checks %t.ll
+# RUN: diff -u %t.ll %S/Inputs/loop-distribute.ll.expected
+## Check that running the script again does not change the result:
+# RUN: %update_analyze_test_checks %t.ll
+# RUN: diff -u %t.ll %S/Inputs/loop-distribute.ll.expected
diff --git a/llvm/utils/UpdateTestChecks/common.py b/llvm/utils/UpdateTestChecks/common.py
index 7da16e0f0cb2e..f0ab9d844db8c 100644
--- a/llvm/utils/UpdateTestChecks/common.py
+++ b/llvm/utils/UpdateTestChecks/common.py
@@ -569,7 +569,7 @@ def invoke_tool(exe, cmd_args, ir, preprocess_cmd=None, verbose=False):
     flags=(re.X | re.S),
 )
 
-LV_DEBUG_RE = re.compile(
+LOOP_PASS_DEBUG_RE = re.compile(
     r"^\s*\'(?P<func>[\w.$-]+?)\'[^\n]*" r"\s*\n(?P<body>.*)$", flags=(re.X | re.S)
 )
 
@@ -973,6 +973,7 @@ class NamelessValue:
     name (as in e.g. `@some_global` or `%x`) or just a number (as in e.g. `%12`
     or `!4`).
     """
+
     def __init__(
         self,
         check_prefix,
@@ -1635,8 +1636,9 @@ def generalize_check_lines(
         regexp = ginfo.get_regexp()
 
     multiple_braces_re = re.compile(r"({{+)|(}}+)")
+
     def escape_braces(match_obj):
-        return '{{' + re.escape(match_obj.group(0)) + '}}'
+        return "{{" + re.escape(match_obj.group(0)) + "}}"
 
     if ginfo.is_ir():
         for i, line in enumerate(lines):
diff --git a/llvm/utils/update_analyze_test_checks.py b/llvm/utils/update_analyze_test_checks.py
index 47506626a0a58..d356ebead0d81 100755
--- a/llvm/utils/update_analyze_test_checks.py
+++ b/llvm/utils/update_analyze_test_checks.py
@@ -134,13 +134,15 @@ def main():
                         raw_tool_output,
                         prefixes,
                     )
-            elif re.search(r"LV: Checking a loop in ", raw_tool_outputs) is not None:
-                # Split analysis outputs by "Printing analysis " declarations.
+            elif (
+                re.search(r"(LV|LDist): Checking a loop in ", raw_tool_outputs)
+                is not None
+            ):
                 for raw_tool_output in re.split(
-                    r"LV: Checking a loop in ", raw_tool_outputs
+                    r"(LV|LDist): Checking a loop in ", raw_tool_outputs
                 ):
                     builder.process_run_line(
-                        common.LV_DEBUG_RE,
+                        common.LOOP_PASS_DEBUG_RE,
                         common.scrub_body,
                         raw_tool_output,
                         prefixes,



More information about the llvm-commits mailing list