[llvm] [UTC] support debug output from LDist (PR #93208)

via llvm-commits llvm-commits at lists.llvm.org
Thu May 23 08:49:09 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-testing-tools

Author: Ramkumar Ramachandra (artagnon)

<details>
<summary>Changes</summary>

Tweak the LoopDistribute debug output to be stable, and extend update_analyze_test_checks.py trivially to support this output.

-- 8< --
Based on #<!-- -->93051.

---

Patch is 38.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93208.diff


15 Files Affected:

- (modified) llvm/include/llvm/Analysis/LoopInfo.h (+5-1) 
- (modified) llvm/lib/Analysis/LoopAccessAnalysis.cpp (+5-3) 
- (modified) llvm/lib/Analysis/LoopInfo.cpp (+11) 
- (modified) llvm/lib/Transforms/Scalar/LoopDistribute.cpp (+35-32) 
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+3-24) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/print-order.ll (+4-2) 
- (modified) llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll (+2-2) 
- (added) llvm/test/Transforms/LoopDistribute/debug-print.ll (+115) 
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-hoist-runtime-checks.ll (+1-1) 
- (modified) llvm/test/Transforms/LoopVectorize/runtime-checks-hoist.ll (+10-10) 
- (added) llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll (+27) 
- (added) llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/Inputs/loop-distribute.ll.expected (+118) 
- (added) llvm/test/tools/UpdateTestChecks/update_analyze_test_checks/loop-distribute.test (+6) 
- (modified) llvm/utils/UpdateTestChecks/common.py (+4-2) 
- (modified) llvm/utils/update_analyze_test_checks.py (+6-4) 


``````````diff
diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 52084630560c5..269a9efeac642 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -385,6 +385,11 @@ class LLVM_EXTERNAL_VISIBILITY Loop : public LoopBase<BasicBlock, Loop> {
   /// Return the source code span of the loop.
   LocRange getLocRange() const;
 
+  /// Return a string containing the location of the loop (file name + line
+  /// number if present, otherwise module name). Meant to be used for debug
+  /// printing within LLVM_DEBUG.
+  std::string getLocStr() const;
+
   StringRef getName() const {
     if (BasicBlock *Header = getHeader())
       if (Header->hasName())
@@ -690,7 +695,6 @@ llvm::MDNode *
 makePostTransformationMetadata(llvm::LLVMContext &Context, MDNode *OrigLoopID,
                                llvm::ArrayRef<llvm::StringRef> RemovePrefixes,
                                llvm::ArrayRef<llvm::MDNode *> AddAttrs);
-
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/Analysis/LoopAccessAnalysis.cpp b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
index bc8b9b8479e4f..a537805bfd8b0 100644
--- a/llvm/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/llvm/lib/Analysis/LoopAccessAnalysis.cpp
@@ -2374,9 +2374,9 @@ void MemoryDepChecker::Dependence::print(
 
 bool LoopAccessInfo::canAnalyzeLoop() {
   // We need to have a loop header.
-  LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
-                    << TheLoop->getHeader()->getParent()->getName() << ": "
-                    << TheLoop->getHeader()->getName() << '\n');
+  LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
+                    << TheLoop->getHeader()->getParent()->getName() << "' from "
+                    << TheLoop->getLocStr() << "\n");
 
   // We can only analyze innermost loops.
   if (!TheLoop->isInnermost()) {
@@ -2403,6 +2403,8 @@ bool LoopAccessInfo::canAnalyzeLoop() {
     return false;
   }
 
+  LLVM_DEBUG(dbgs() << "LAA: Found a loop: " << TheLoop->getHeader()->getName()
+                    << "\n");
   return true;
 }
 
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 369ab087ffc0f..d7bddb0bae1b6 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -663,6 +663,17 @@ Loop::LocRange Loop::getLocRange() const {
   return LocRange();
 }
 
+std::string Loop::getLocStr() const {
+  std::string Result;
+  raw_string_ostream OS(Result);
+  if (const DebugLoc LoopDbgLoc = getStartLoc())
+    LoopDbgLoc.print(OS);
+  else
+    // Just print the module name.
+    OS << getHeader()->getParent()->getParent()->getModuleIdentifier();
+  return Result;
+}
+
 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
 LLVM_DUMP_METHOD void Loop::dump() const { print(dbgs()); }
 
diff --git a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
index 626888c74bad8..abd46dc6c1b63 100644
--- a/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopDistribute.cpp
@@ -26,7 +26,7 @@
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/ADT/STLExtras.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringRef.h"
@@ -120,7 +120,7 @@ namespace {
 /// Maintains the set of instructions of the loop for a partition before
 /// cloning.  After cloning, it hosts the new loop.
 class InstPartition {
-  using InstructionSet = SmallPtrSet<Instruction *, 8>;
+  using InstructionSet = SetVector<Instruction *>;
 
 public:
   InstPartition(Instruction *I, Loop *L, bool DepCycle = false)
@@ -166,7 +166,7 @@ class InstPartition {
       // Insert instructions from the loop that we depend on.
       for (Value *V : I->operand_values()) {
         auto *I = dyn_cast<Instruction>(V);
-        if (I && OrigLoop->contains(I->getParent()) && Set.insert(I).second)
+        if (I && OrigLoop->contains(I->getParent()) && Set.insert(I))
           Worklist.push_back(I);
       }
     }
@@ -231,17 +231,16 @@ class InstPartition {
     }
   }
 
-  void print() const {
-    if (DepCycle)
-      dbgs() << "  (cycle)\n";
+  void print(raw_ostream &OS) const {
+    OS << (DepCycle ? " (cycle)\n" : "\n");
     for (auto *I : Set)
       // Prefix with the block name.
-      dbgs() << "  " << I->getParent()->getName() << ":" << *I << "\n";
+      OS << "  " << I->getParent()->getName() << ":" << *I << "\n";
   }
 
-  void printBlocks() const {
+  void printBlocks(raw_ostream &OS) const {
     for (auto *BB : getDistributedLoop()->getBlocks())
-      dbgs() << *BB;
+      OS << *BB;
   }
 
 private:
@@ -368,11 +367,11 @@ class InstPartitionContainer {
           std::tie(LoadToPart, NewElt) =
               LoadToPartition.insert(std::make_pair(Inst, PartI));
           if (!NewElt) {
-            LLVM_DEBUG(dbgs()
-                       << "Merging partitions due to this load in multiple "
-                       << "partitions: " << PartI << ", " << LoadToPart->second
-                       << "\n"
-                       << *Inst << "\n");
+            LLVM_DEBUG(
+                dbgs()
+                << "LDist: Merging partitions due to this load in multiple "
+                << "partitions: " << PartI << ", " << LoadToPart->second << "\n"
+                << *Inst << "\n");
 
             auto PartJ = I;
             do {
@@ -530,8 +529,8 @@ class InstPartitionContainer {
   void print(raw_ostream &OS) const {
     unsigned Index = 0;
     for (const auto &P : PartitionContainer) {
-      OS << "Partition " << Index++ << " (" << &P << "):\n";
-      P.print();
+      OS << "LDist: Partition " << Index++ << ":";
+      P.print(OS);
     }
   }
 
@@ -545,11 +544,11 @@ class InstPartitionContainer {
   }
 #endif
 
-  void printBlocks() const {
+  void printBlocks(raw_ostream &OS) const {
     unsigned Index = 0;
     for (const auto &P : PartitionContainer) {
-      dbgs() << "\nPartition " << Index++ << " (" << &P << "):\n";
-      P.printBlocks();
+      OS << "LDist: Partition " << Index++ << ":";
+      P.printBlocks(OS);
     }
   }
 
@@ -628,7 +627,7 @@ class MemoryInstructionDependences {
       const SmallVectorImpl<Dependence> &Dependences) {
     Accesses.append(Instructions.begin(), Instructions.end());
 
-    LLVM_DEBUG(dbgs() << "Backward dependences:\n");
+    LLVM_DEBUG(dbgs() << "LDist: Backward dependences:\n");
     for (const auto &Dep : Dependences)
       if (Dep.isPossiblyBackward()) {
         // Note that the designations source and destination follow the program
@@ -659,9 +658,9 @@ class LoopDistributeForLoop {
   bool processLoop() {
     assert(L->isInnermost() && "Only process inner loops.");
 
-    LLVM_DEBUG(dbgs() << "\nLDist: In \""
-                      << L->getHeader()->getParent()->getName()
-                      << "\" checking " << *L << "\n");
+    LLVM_DEBUG(dbgs() << "\nLDist: Checking a loop in '"
+                      << L->getHeader()->getParent()->getName() << "' from "
+                      << L->getLocStr() << "\n");
 
     // Having a single exit block implies there's also one exiting block.
     if (!L->getExitBlock())
@@ -686,6 +685,9 @@ class LoopDistributeForLoop {
     if (!Dependences || Dependences->empty())
       return fail("NoUnsafeDeps", "no unsafe dependences to isolate");
 
+    LLVM_DEBUG(dbgs() << "LDist: Found a loop: " << L->getHeader()->getName()
+                      << "\n");
+
     InstPartitionContainer Partitions(L, LI, DT);
 
     // First, go through each memory operation and assign them to consecutive
@@ -735,7 +737,7 @@ class LoopDistributeForLoop {
     for (auto *Inst : DefsUsedOutside)
       Partitions.addToNewNonCyclicPartition(Inst);
 
-    LLVM_DEBUG(dbgs() << "Seeded partitions:\n" << Partitions);
+    LLVM_DEBUG(dbgs() << "LDist: Seeded partitions:\n" << Partitions);
     if (Partitions.getSize() < 2)
       return fail("CantIsolateUnsafeDeps",
                   "cannot isolate unsafe dependencies");
@@ -743,19 +745,19 @@ class LoopDistributeForLoop {
     // Run the merge heuristics: Merge non-cyclic adjacent partitions since we
     // should be able to vectorize these together.
     Partitions.mergeBeforePopulating();
-    LLVM_DEBUG(dbgs() << "\nMerged partitions:\n" << Partitions);
+    LLVM_DEBUG(dbgs() << "LDist: Merged partitions:\n" << Partitions);
     if (Partitions.getSize() < 2)
       return fail("CantIsolateUnsafeDeps",
                   "cannot isolate unsafe dependencies");
 
     // Now, populate the partitions with non-memory operations.
     Partitions.populateUsedSet();
-    LLVM_DEBUG(dbgs() << "\nPopulated partitions:\n" << Partitions);
+    LLVM_DEBUG(dbgs() << "LDist: Populated partitions:\n" << Partitions);
 
     // In order to preserve original lexical order for loads, keep them in the
     // partition that we set up in the MemoryInstructionDependences loop.
     if (Partitions.mergeToAvoidDuplicatedLoads()) {
-      LLVM_DEBUG(dbgs() << "\nPartitions merged to ensure unique loads:\n"
+      LLVM_DEBUG(dbgs() << "LDist: Partitions merged to ensure unique loads:\n"
                         << Partitions);
       if (Partitions.getSize() < 2)
         return fail("CantIsolateUnsafeDeps",
@@ -779,7 +781,8 @@ class LoopDistributeForLoop {
     if (!IsForced.value_or(false) && hasDisableAllTransformsHint(L))
       return fail("HeuristicDisabled", "distribution heuristic disabled");
 
-    LLVM_DEBUG(dbgs() << "\nDistributing loop: " << *L << "\n");
+    LLVM_DEBUG(dbgs() << "LDist: Distributing loop: "
+                      << L->getHeader()->getName() << "\n");
     // We're done forming the partitions set up the reverse mapping from
     // instructions to partitions.
     Partitions.setupPartitionIdOnInstructions();
@@ -807,7 +810,7 @@ class LoopDistributeForLoop {
 
       MDNode *OrigLoopID = L->getLoopID();
 
-      LLVM_DEBUG(dbgs() << "\nPointers:\n");
+      LLVM_DEBUG(dbgs() << "LDist: Pointers:\n");
       LLVM_DEBUG(LAI->getRuntimePointerChecking()->printChecks(dbgs(), Checks));
       LoopVersioning LVer(*LAI, Checks, L, LI, DT, SE);
       LVer.versionLoop(DefsUsedOutside);
@@ -830,8 +833,8 @@ class LoopDistributeForLoop {
     // Now, we remove the instruction from each loop that don't belong to that
     // partition.
     Partitions.removeUnusedInsts();
-    LLVM_DEBUG(dbgs() << "\nAfter removing unused Instrs:\n");
-    LLVM_DEBUG(Partitions.printBlocks());
+    LLVM_DEBUG(dbgs() << "LDist: After removing unused Instrs:\n");
+    LLVM_DEBUG(Partitions.printBlocks(dbgs()));
 
     if (LDistVerify) {
       LI->verify(*DT);
@@ -853,7 +856,7 @@ class LoopDistributeForLoop {
     LLVMContext &Ctx = F->getContext();
     bool Forced = isForced().value_or(false);
 
-    LLVM_DEBUG(dbgs() << "Skipping; " << Message << "\n");
+    LLVM_DEBUG(dbgs() << "LDist: Skipping; " << Message << "\n");
 
     // With Rpass-missed report that distribution failed.
     ORE->emit([&]() {
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 6d64aaa75922b..2726df59f464d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1027,23 +1027,6 @@ static void reportVectorization(OptimizationRemarkEmitter *ORE, Loop *TheLoop,
 
 } // end namespace llvm
 
-#ifndef NDEBUG
-/// \return string containing a file name and a line # for the given loop.
-static std::string getDebugLocString(const Loop *L) {
-  std::string Result;
-  if (L) {
-    raw_string_ostream OS(Result);
-    if (const DebugLoc LoopDbgLoc = L->getStartLoc())
-      LoopDbgLoc.print(OS);
-    else
-      // Just print the module name.
-      OS << L->getHeader()->getParent()->getParent()->getModuleIdentifier();
-    OS.flush();
-  }
-  return Result;
-}
-#endif
-
 namespace llvm {
 
 // Loop vectorization cost-model hints how the scalar epilogue loop should be
@@ -9836,13 +9819,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   assert((EnableVPlanNativePath || L->isInnermost()) &&
          "VPlan-native path is not enabled. Only process inner loops.");
 
-#ifndef NDEBUG
-  const std::string DebugLocStr = getDebugLocString(L);
-#endif /* NDEBUG */
-
   LLVM_DEBUG(dbgs() << "\nLV: Checking a loop in '"
                     << L->getHeader()->getParent()->getName() << "' from "
-                    << DebugLocStr << "\n");
+                    << L->getLocStr() << "\n");
 
   LoopVectorizeHints Hints(L, InterleaveOnlyWhenForced, *ORE, TTI);
 
@@ -10112,7 +10091,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     });
   } else if (VectorizeLoop && !InterleaveLoop) {
     LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width
-                      << ") in " << DebugLocStr << '\n');
+                      << ") in " << L->getLocStr() << '\n');
     ORE->emit([&]() {
       return OptimizationRemarkAnalysis(LV_NAME, IntDiagMsg.first,
                                         L->getStartLoc(), L->getHeader())
@@ -10120,7 +10099,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     });
   } else if (VectorizeLoop && InterleaveLoop) {
     LLVM_DEBUG(dbgs() << "LV: Found a vectorizable loop (" << VF.Width
-                      << ") in " << DebugLocStr << '\n');
+                      << ") in " << L->getLocStr() << '\n');
     LLVM_DEBUG(dbgs() << "LV: Interleave Count is " << IC << '\n');
   }
 
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
index 65f94a7d8fdb4..1e53eda68b1a2 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/print-order.ll
@@ -6,7 +6,8 @@
 ;    A[i+1] = A[i] + 1;
 ; }
 
-; CHECK: LAA: Found a loop in negative_step: loop
+; CHECK-LABEL: 'negative_step'
+; CHECK: LAA: Found a loop: loop
 ; CHECK: LAA: Checking memory dependencies
 ; CHECK-NEXT: LAA: Src Scev: {(4092 + %A),+,-4}<nw><%loop>Sink Scev: {(4088 + %A)<nuw>,+,-4}<nw><%loop>(Induction step: -1)
 ; CHECK-NEXT: LAA: Distance for   store i32 %add, ptr %gep.A.plus.1, align 4 to   %l = load i32, ptr %gep.A, align 4: -4
@@ -37,7 +38,8 @@ exit:
 ;    A[i-1] = A[i] + 1;
 ; }
 
-; CHECK: LAA: Found a loop in positive_step: loop
+; CHECK-LABEL: 'positive_step'
+; CHECK: LAA: Found a loop: loop
 ; CHECK: LAA: Checking memory dependencies
 ; CHECK-NEXT: LAA: Src Scev: {(4 + %A)<nuw>,+,4}<nuw><%loop>Sink Scev: {%A,+,4}<nw><%loop>(Induction step: 1)
 ; CHECK-NEXT: LAA: Distance for   %l = load i32, ptr %gep.A, align 4 to   store i32 %add, ptr %gep.A.minus.1, align 4: -4
diff --git a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
index 82a884a637259..8019bc76d2f0f 100644
--- a/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
+++ b/llvm/test/Analysis/LoopAccessAnalysis/scalable-vector-regression-tests.ll
@@ -24,7 +24,7 @@ loop.end:
   ret void
 }
 
-; CHECK-LABEL: LAA: Found a loop in regression_test_loop_access_scalable_typesize
+; CHECK-LABEL: 'regression_test_loop_access_scalable_typesize'
 ; CHECK: LAA: Bad stride - Scalable object:
 define void @regression_test_loop_access_scalable_typesize(ptr %input_ptr) {
 entry:
@@ -42,7 +42,7 @@ end:
   ret void
 }
 
-; CHECK-LABEL: LAA: Found a loop in regression_test_loop_access_scalable_typesize_nonscalable_object
+; CHECK-LABEL: 'regression_test_loop_access_scalable_typesize_nonscalable_object'
 ; CHECK: LAA: Bad stride - Scalable object:
 define void @regression_test_loop_access_scalable_typesize_nonscalable_object(ptr %input_ptr) {
 entry:
diff --git a/llvm/test/Transforms/LoopDistribute/debug-print.ll b/llvm/test/Transforms/LoopDistribute/debug-print.ll
new file mode 100644
index 0000000000000..3727c78b17c92
--- /dev/null
+++ b/llvm/test/Transforms/LoopDistribute/debug-print.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-distribute -enable-loop-distribute \
+; RUN:   -debug-only=loop-distribute -disable-output 2>&1 %s | FileCheck %s
+
+define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, i64 %stride) {
+; CHECK-LABEL: 'f'
+; CHECK-NEXT:  LDist: Found a loop: for.body
+; CHECK-NEXT:  LDist: Backward dependences:
+; CHECK-NEXT:    Backward:
+; CHECK-NEXT:        %load.a = load i32, ptr %gep.a, align 4 ->
+; CHECK-NEXT:        store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT:  LDist: Seeded partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT:    for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT:  LDist: Partition 2:
+; CHECK-NEXT:    for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT:  LDist: Partition 3:
+; CHECK-NEXT:    for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT:  LDist: Merged partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT:    for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT:    for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT:  LDist: Populated partitions:
+; CHECK-NEXT:  LDist: Partition 0: (cycle)
+; CHECK-NEXT:    for.body: %load.a = load i32, ptr %gep.a, align 4
+; CHECK-NEXT:    for.body: %load.b = load i32, ptr %gep.b, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.a, ptr %gep.a.plus4, align 4
+; CHECK-NEXT:    for.body: br i1 %exitcond, label %exit, label %for.body
+; CHECK-NEXT:    for.body: %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT:    for.body: %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT:    for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+; CHECK-NEXT:    for.body: %mul.a = mul i32 %load.b, %load.a
+; CHECK-NEXT:    for.body: %gep.a.plus4 = getelementptr inbounds i32, ptr %a, i64 %add
+; CHECK-NEXT:    for.body: %gep.b = getelementptr inbounds i32, ptr %b, i64 %ind
+; CHECK-NEXT:    for.body: %gep.a = getelementptr inbounds i32, ptr %a, i64 %ind
+; CHECK-NEXT:  LDist: Partition 1:
+; CHECK-NEXT:    for.body: %loadD = load i32, ptr %gep.d, align 4
+; CHECK-NEXT:    for.body: %load.strided.a = load i32, ptr %gep.strided.a, align 4
+; CHECK-NEXT:    for.body: store i32 %mul.c, ptr %gep.c, align 4
+; CHECK-NEXT:    for.body: br i1 %exitcond, label %exit, label %for.body
+; CHECK-NEXT:    for.body: %exitcond = icmp eq i64 %add, 20
+; CHECK-NEXT:    for.body: %add = add nuw nsw i64 %ind, 1
+; CHECK-NEXT:    for.body: %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+; CHECK-NEXT:    for.body: %mul.c = mul i32 %loadD, %load.strided.a
+; CHECK-NEXT:    for.body: %gep.c = getelementptr inbounds i32, ptr %c, i64 %ind
+; CHECK-NEXT:    for.body: %gep.strided.a = getelementptr inbounds i32, ptr %a, i64 %mul
+; CHECK-NEXT:    for.body: %mul = mul i64 %ind, %stride
+; CHECK-NEXT:    for.body: %gep.d = getelementptr inbounds i32, ptr %d, i64 %ind
+; CHECK-NEXT:  LDist: Distributing loop: for.body
+; CHECK-NEXT:  LDist: Pointers:
+; CHECK-NEXT:  LDist: After removing unused Instrs:
+; CHECK-NEXT:  LDist: Partition 0:
+; CHECK-NEXT:  for.body.ldist1: ; preds = %for.body.ldist1, %for.body.ph.ldist1
+; CHECK-NEXT:    %ind.ldist1 = phi i64 [ 0, %for.body.ph.ldist1 ], [ %add.ldist1, %for.body.ldist1 ]
+; CHECK-NEXT:    %gep.a.ldist1 = getelementptr inbounds i32, ptr %a, i64 %ind.ldist1
+; CHECK-NEXT:    %load.a.ldist1 = load i32, ptr %gep.a.ldist1, align 4
+; CHECK-NEXT:    %gep.b.ldist1 = getelementptr inbounds i32, ptr %b, i64 %ind.ldist1
+; CHECK-NEXT:    %load.b.ldist1 = load i32, ptr %gep.b.ldist1, align 4
+; CHECK-NEXT:    %mul.a.ldist1 = mul i32 %load.b.ldist1, %load.a.ldist1
+; CHECK-NEXT:    %add.ldist1 = add nuw nsw i64 %ind.ldist1, 1
+; CHECK-NEXT:    %gep.a.plus4.ldist1 = getelementptr inbounds i32, ptr...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/93208


More information about the llvm-commits mailing list