[llvm] r367327 - [LoopFusion] Extend use of OptimizationRemarkEmitter

Kit Barton via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 30 08:58:44 PDT 2019


Author: kbarton
Date: Tue Jul 30 08:58:43 2019
New Revision: 367327

URL: http://llvm.org/viewvc/llvm-project?rev=367327&view=rev
Log:
[LoopFusion] Extend use of OptimizationRemarkEmitter

Summary:
This patch extends the use of the OptimizationRemarkEmitter to provide
information about loops that are not fused, and loops that are not eligible for
fusion. In particular, it uses the OptimizationRemarkAnalysis to identify loops
that are not eligible for fusion and the OptimizationRemarkMissed to identify
loops that cannot be fused.

It also reuses the statistics to provide the messages used in the
OptimizationRemarks. This provides common message strings between the
optimization remarks and the statistics.

I would like feedback on this approach, in general. If people are OK with this,
I will flesh out additional remarks in subsequent commits.

Subscribers: hiraditya, jsji, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D63844

Added:
    llvm/trunk/test/Transforms/LoopFusion/diagnostics_analysis.ll
    llvm/trunk/test/Transforms/LoopFusion/diagnostics_missed.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopFuse.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopFuse.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopFuse.cpp?rev=367327&r1=367326&r2=367327&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopFuse.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopFuse.cpp Tue Jul 30 08:58:43 2019
@@ -66,7 +66,7 @@ using namespace llvm;
 
 #define DEBUG_TYPE "loop-fusion"
 
-STATISTIC(FuseCounter, "Count number of loop fusions performed");
+STATISTIC(FuseCounter, "Loops fused");
 STATISTIC(NumFusionCandidates, "Number of candidates for loop fusion");
 STATISTIC(InvalidPreheader, "Loop has invalid preheader");
 STATISTIC(InvalidHeader, "Loop has invalid header");
@@ -79,12 +79,12 @@ STATISTIC(MayThrowException, "Loop may t
 STATISTIC(ContainsVolatileAccess, "Loop contains a volatile access");
 STATISTIC(NotSimplifiedForm, "Loop is not in simplified form");
 STATISTIC(InvalidDependencies, "Dependencies prevent fusion");
-STATISTIC(InvalidTripCount,
-          "Loop does not have invariant backedge taken count");
+STATISTIC(UnknownTripCount, "Loop has unknown trip count");
 STATISTIC(UncomputableTripCount, "SCEV cannot compute trip count of loop");
-STATISTIC(NonEqualTripCount, "Candidate trip counts are not the same");
-STATISTIC(NonAdjacent, "Candidates are not adjacent");
-STATISTIC(NonEmptyPreheader, "Candidate has a non-empty preheader");
+STATISTIC(NonEqualTripCount, "Loop trip counts are not the same");
+STATISTIC(NonAdjacent, "Loops are not adjacent");
+STATISTIC(NonEmptyPreheader, "Loop has a non-empty preheader");
+STATISTIC(FusionNotBeneficial, "Fusion is not beneficial");
 
 enum FusionDependenceAnalysisChoice {
   FUSION_DEPENDENCE_ANALYSIS_SCEV,
@@ -151,11 +151,14 @@ struct FusionCandidate {
   const DominatorTree *DT;
   const PostDominatorTree *PDT;
 
+  OptimizationRemarkEmitter &ORE;
+
   FusionCandidate(Loop *L, const DominatorTree *DT,
-                  const PostDominatorTree *PDT)
+                  const PostDominatorTree *PDT, OptimizationRemarkEmitter &ORE)
       : Preheader(L->getLoopPreheader()), Header(L->getHeader()),
         ExitingBlock(L->getExitingBlock()), ExitBlock(L->getExitBlock()),
-        Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT) {
+        Latch(L->getLoopLatch()), L(L), Valid(true), DT(DT), PDT(PDT),
+        ORE(ORE) {
 
     // Walk over all blocks in the loop and check for conditions that may
     // prevent fusion. For each block, walk over all instructions and collect
@@ -163,28 +166,28 @@ struct FusionCandidate {
     // found, invalidate this object and return.
     for (BasicBlock *BB : L->blocks()) {
       if (BB->hasAddressTaken()) {
-        AddressTakenBB++;
         invalidate();
+        reportInvalidCandidate(AddressTakenBB);
         return;
       }
 
       for (Instruction &I : *BB) {
         if (I.mayThrow()) {
-          MayThrowException++;
           invalidate();
+          reportInvalidCandidate(MayThrowException);
           return;
         }
         if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
           if (SI->isVolatile()) {
-            ContainsVolatileAccess++;
             invalidate();
+            reportInvalidCandidate(ContainsVolatileAccess);
             return;
           }
         }
         if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
           if (LI->isVolatile()) {
-            ContainsVolatileAccess++;
             invalidate();
+            reportInvalidCandidate(ContainsVolatileAccess);
             return;
           }
         }
@@ -227,6 +230,44 @@ struct FusionCandidate {
   }
 #endif
 
+  /// Determine if a fusion candidate (representing a loop) is eligible for
+  /// fusion. Note that this only checks whether a single loop can be fused - it
+  /// does not check whether it is *legal* to fuse two loops together.
+  bool isEligibleForFusion(ScalarEvolution &SE) const {
+    if (!isValid()) {
+      LLVM_DEBUG(dbgs() << "FC has invalid CFG requirements!\n");
+      if (!Preheader)
+        ++InvalidPreheader;
+      if (!Header)
+        ++InvalidHeader;
+      if (!ExitingBlock)
+        ++InvalidExitingBlock;
+      if (!ExitBlock)
+        ++InvalidExitBlock;
+      if (!Latch)
+        ++InvalidLatch;
+      if (L->isInvalid())
+        ++InvalidLoop;
+
+      return false;
+    }
+
+    // Require ScalarEvolution to be able to determine a trip count.
+    if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
+      LLVM_DEBUG(dbgs() << "Loop " << L->getName()
+                        << " trip count not computable!\n");
+      return reportInvalidCandidate(UnknownTripCount);
+    }
+
+    if (!L->isLoopSimplifyForm()) {
+      LLVM_DEBUG(dbgs() << "Loop " << L->getName()
+                        << " is not in simplified form!\n");
+      return reportInvalidCandidate(NotSimplifiedForm);
+    }
+
+    return true;
+  }
+
 private:
   // This is only used internally for now, to clear the MemWrites and MemReads
   // list and setting Valid to false. I can't envision other uses of this right
@@ -239,6 +280,17 @@ private:
     MemReads.clear();
     Valid = false;
   }
+
+  bool reportInvalidCandidate(llvm::Statistic &Stat) const {
+    using namespace ore;
+    assert(L && Preheader && "Fusion candidate not initialized properly!");
+    ++Stat;
+    ORE.emit(OptimizationRemarkAnalysis(DEBUG_TYPE, Stat.getName(),
+                                        L->getStartLoc(), Preheader)
+             << "[" << Preheader->getParent()->getName() << "]: "
+             << "Loop is not a candidate for fusion: " << Stat.getDesc());
+    return false;
+  }
 };
 
 inline llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
@@ -391,16 +443,6 @@ static void printLoopVector(const LoopVe
 }
 #endif
 
-static void reportLoopFusion(const FusionCandidate &FC0,
-                             const FusionCandidate &FC1,
-                             OptimizationRemarkEmitter &ORE) {
-  using namespace ore;
-  ORE.emit(
-      OptimizationRemark(DEBUG_TYPE, "LoopFusion", FC0.Preheader->getParent())
-      << "Fused " << NV("Cand1", StringRef(FC0.Preheader->getName()))
-      << " with " << NV("Cand2", StringRef(FC1.Preheader->getName())));
-}
-
 struct LoopFuser {
 private:
   // Sets of control flow equivalent fusion candidates for a given nest level.
@@ -506,53 +548,13 @@ private:
     return false;
   }
 
-  /// Determine if a fusion candidate (representing a loop) is eligible for
-  /// fusion. Note that this only checks whether a single loop can be fused - it
-  /// does not check whether it is *legal* to fuse two loops together.
-  bool eligibleForFusion(const FusionCandidate &FC) const {
-    if (!FC.isValid()) {
-      LLVM_DEBUG(dbgs() << "FC " << FC << " has invalid CFG requirements!\n");
-      if (!FC.Preheader)
-        InvalidPreheader++;
-      if (!FC.Header)
-        InvalidHeader++;
-      if (!FC.ExitingBlock)
-        InvalidExitingBlock++;
-      if (!FC.ExitBlock)
-        InvalidExitBlock++;
-      if (!FC.Latch)
-        InvalidLatch++;
-      if (FC.L->isInvalid())
-        InvalidLoop++;
-
-      return false;
-    }
-
-    // Require ScalarEvolution to be able to determine a trip count.
-    if (!SE.hasLoopInvariantBackedgeTakenCount(FC.L)) {
-      LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
-                        << " trip count not computable!\n");
-      InvalidTripCount++;
-      return false;
-    }
-
-    if (!FC.L->isLoopSimplifyForm()) {
-      LLVM_DEBUG(dbgs() << "Loop " << FC.L->getName()
-                        << " is not in simplified form!\n");
-      NotSimplifiedForm++;
-      return false;
-    }
-
-    return true;
-  }
-
   /// Iterate over all loops in the given loop set and identify the loops that
   /// are eligible for fusion. Place all eligible fusion candidates into Control
   /// Flow Equivalent sets, sorted by dominance.
   void collectFusionCandidates(const LoopVector &LV) {
     for (Loop *L : LV) {
-      FusionCandidate CurrCand(L, &DT, &PDT);
-      if (!eligibleForFusion(CurrCand))
+      FusionCandidate CurrCand(L, &DT, &PDT, ORE);
+      if (!CurrCand.isEligibleForFusion(SE))
         continue;
 
       // Go through each list in FusionCandidates and determine if L is control
@@ -664,14 +666,15 @@ private:
           if (!identicalTripCounts(*FC0, *FC1)) {
             LLVM_DEBUG(dbgs() << "Fusion candidates do not have identical trip "
                                  "counts. Not fusing.\n");
-            NonEqualTripCount++;
+            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+                                                       NonEqualTripCount);
             continue;
           }
 
           if (!isAdjacent(*FC0, *FC1)) {
             LLVM_DEBUG(dbgs()
                        << "Fusion candidates are not adjacent. Not fusing.\n");
-            NonAdjacent++;
+            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1, NonAdjacent);
             continue;
           }
 
@@ -683,12 +686,15 @@ private:
           if (!isEmptyPreheader(*FC1)) {
             LLVM_DEBUG(dbgs() << "Fusion candidate does not have empty "
                                  "preheader. Not fusing.\n");
-            NonEmptyPreheader++;
+            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+                                                       NonEmptyPreheader);
             continue;
           }
 
           if (!dependencesAllowFusion(*FC0, *FC1)) {
             LLVM_DEBUG(dbgs() << "Memory dependencies do not allow fusion!\n");
+            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+                                                       InvalidDependencies);
             continue;
           }
 
@@ -696,9 +702,11 @@ private:
           LLVM_DEBUG(dbgs()
                      << "\tFusion appears to be "
                      << (BeneficialToFuse ? "" : "un") << "profitable!\n");
-          if (!BeneficialToFuse)
+          if (!BeneficialToFuse) {
+            reportLoopFusion<OptimizationRemarkMissed>(*FC0, *FC1,
+                                                       FusionNotBeneficial);
             continue;
-
+          }
           // All analysis has completed and has determined that fusion is legal
           // and profitable. At this point, start transforming the code and
           // perform fusion.
@@ -710,15 +718,14 @@ private:
           // Note this needs to be done *before* performFusion because
           // performFusion will change the original loops, making it not
           // possible to identify them after fusion is complete.
-          reportLoopFusion(*FC0, *FC1, ORE);
+          reportLoopFusion<OptimizationRemark>(*FC0, *FC1, FuseCounter);
 
-          FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT);
+          FusionCandidate FusedCand(performFusion(*FC0, *FC1), &DT, &PDT, ORE);
           FusedCand.verify();
-          assert(eligibleForFusion(FusedCand) &&
+          assert(FusedCand.isEligibleForFusion(SE) &&
                  "Fused candidate should be eligible for fusion!");
 
           // Notify the loop-depth-tree that these loops are not valid objects
-          // anymore.
           LDT.removeLoop(FC1->L);
 
           CandidateSet.erase(FC0);
@@ -1137,6 +1144,33 @@ private:
 
     return FC0.L;
   }
+
+  /// Report details on loop fusion opportunities.
+  ///
+  /// This template function can be used to report both successful and missed
+  /// loop fusion opportunities, based on the RemarkKind. The RemarkKind should
+  /// be one of:
+  ///   - OptimizationRemarkMissed to report when loop fusion is unsuccessful
+  ///     given two valid fusion candidates.
+  ///   - OptimizationRemark to report successful fusion of two fusion
+  ///     candidates.
+  /// The remarks will be printed using the form:
+  ///    <path/filename>:<line number>:<column number>: [<function name>]:
+  ///       <Cand1 Preheader> and <Cand2 Preheader>: <Stat Description>
+  template <typename RemarkKind>
+  void reportLoopFusion(const FusionCandidate &FC0, const FusionCandidate &FC1,
+                        llvm::Statistic &Stat) {
+    assert(FC0.Preheader && FC1.Preheader &&
+           "Expecting valid fusion candidates");
+    using namespace ore;
+    ++Stat;
+    ORE.emit(RemarkKind(DEBUG_TYPE, Stat.getName(), FC0.L->getStartLoc(),
+                        FC0.Preheader)
+             << "[" << FC0.Preheader->getParent()->getName()
+             << "]: " << NV("Cand1", StringRef(FC0.Preheader->getName()))
+             << " and " << NV("Cand2", StringRef(FC1.Preheader->getName()))
+             << ": " << Stat.getDesc());
+  }
 };
 
 struct LoopFuseLegacy : public FunctionPass {

Added: llvm/trunk/test/Transforms/LoopFusion/diagnostics_analysis.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopFusion/diagnostics_analysis.ll?rev=367327&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopFusion/diagnostics_analysis.ll (added)
+++ llvm/trunk/test/Transforms/LoopFusion/diagnostics_analysis.ll Tue Jul 30 08:58:43 2019
@@ -0,0 +1,130 @@
+; RUN: opt -S -loop-fusion -pass-remarks-analysis=loop-fusion -disable-output < %s 2>&1 | FileCheck %s
+
+ at B = common global [1024 x i32] zeroinitializer, align 16
+
+; CHECK: remark: diagnostics_analysis.c:6:3: [test]: Loop is not a candidate for fusion: Loop contains a volatile access
+; CHECK: remark: diagnostics_analysis.c:10:3: [test]: Loop is not a candidate for fusion: Loop has unknown trip count
+define void @test(i32* %A, i32 %n) !dbg !15 {
+entry:
+  %A.addr = alloca i32*, align 8
+  %n.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  %i1 = alloca i32, align 4
+  store i32* %A, i32** %A.addr, align 8
+  store i32 %n, i32* %n.addr, align 4
+  %0 = bitcast i32* %i to i8*
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %1 = load i32, i32* %i, align 4
+  %2 = load i32, i32* %n.addr, align 4
+  %cmp = icmp slt i32 %1, %2
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  %3 = bitcast i32* %i to i8*, !dbg !42
+  br label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %4 = load i32, i32* %i, align 4
+  %sub = sub nsw i32 %4, 3
+  %5 = load i32, i32* %i, align 4
+  %add = add nsw i32 %5, 3
+  %mul = mul nsw i32 %sub, %add
+  %6 = load i32, i32* %i, align 4
+  %rem = srem i32 %mul, %6
+  %7 = load i32*, i32** %A.addr, align 8
+  %8 = load i32, i32* %i, align 4
+  %idxprom = sext i32 %8 to i64
+  %arrayidx = getelementptr inbounds i32, i32* %7, i64 %idxprom
+  store volatile i32 %rem, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %9 = load i32, i32* %i, align 4, !dbg !49
+  %inc = add nsw i32 %9, 1, !dbg !49
+  store i32 %inc, i32* %i, align 4, !dbg !49
+  br label %for.cond, !dbg !42, !llvm.loop !50
+
+for.end:                                          ; preds = %for.cond.cleanup
+  %10 = bitcast i32* %i1 to i8*
+  store i32 0, i32* %i1, align 4
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc12, %for.end
+  %11 = load i32, i32* %i1, align 4
+  %12 = load i32, i32* %n.addr, align 4
+  %cmp3 = icmp slt i32 %11, %12
+  br i1 %cmp3, label %for.body5, label %for.cond.cleanup4
+
+for.cond.cleanup4:                                ; preds = %for.cond2
+  %13 = bitcast i32* %i1 to i8*
+  br label %for.end14
+
+for.body5:                                        ; preds = %for.cond2
+  %14 = load i32, i32* %i1, align 4
+  %sub6 = sub nsw i32 %14, 3
+  %15 = load i32, i32* %i1, align 4
+  %add7 = add nsw i32 %15, 3
+  %mul8 = mul nsw i32 %sub6, %add7
+  %16 = load i32, i32* %i1, align 4
+  %rem9 = srem i32 %mul8, %16
+  %17 = load i32, i32* %i1, align 4
+  %idxprom10 = sext i32 %17 to i64
+  %arrayidx11 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %idxprom10
+  store i32 %rem9, i32* %arrayidx11, align 4
+  br label %for.inc12
+
+for.inc12:                                        ; preds = %for.body5
+  %18 = load i32, i32* %i1, align 4
+  %inc13 = add nsw i32 %18, 1
+  store i32 %inc13, i32* %i1, align 4
+  br label %for.cond2, !dbg !59, !llvm.loop !67
+
+for.end14:                                        ; preds = %for.cond.cleanup4
+  ret void
+}
+
+!llvm.module.flags = !{!10, !11, !13}
+!llvm.ident = !{!14}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !3, line: 1, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git at github.ibm.com:compiler/llvm-project.git c019c32c5a2b0ed4487a738337d35fd3f630ac0a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU)
+!3 = !DIFile(filename: "diagnostics_analysis.c", directory: "/tmp")
+!4 = !{}
+!5 = !{!0}
+!6 = !DICompositeType(tag: DW_TAG_array_type, baseType: !7, size: 32768, elements: !8)
+!7 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!8 = !{!9}
+!9 = !DISubrange(count: 1024)
+!10 = !{i32 2, !"Dwarf Version", i32 4}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{i32 7, !"PIC Level", i32 2}
+!14 = !{!"clang version 9.0.0 (git at github.ibm.com:compiler/llvm-project.git c019c32c5a2b0ed4487a738337d35fd3f630ac0a)"}
+!15 = distinct !DISubprogram(name: "test", scope: !3, file: !3, line: 5, type: !16, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !20)
+!16 = !DISubroutineType(types: !17)
+!17 = !{null, !18, !7}
+!18 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !19, size: 64)
+!19 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !7)
+!20 = !{!21, !22, !23, !25}
+!21 = !DILocalVariable(name: "A", arg: 1, scope: !15, file: !3, line: 5, type: !18)
+!22 = !DILocalVariable(name: "n", arg: 2, scope: !15, file: !3, line: 5, type: !7)
+!23 = !DILocalVariable(name: "i", scope: !24, file: !3, line: 6, type: !7)
+!24 = distinct !DILexicalBlock(scope: !15, file: !3, line: 6, column: 3)
+!25 = !DILocalVariable(name: "i", scope: !26, file: !3, line: 10, type: !7)
+!26 = distinct !DILexicalBlock(scope: !15, file: !3, line: 10, column: 3)
+!38 = distinct !DILexicalBlock(scope: !24, file: !3, line: 6, column: 3)
+!41 = !DILocation(line: 6, column: 3, scope: !24)
+!42 = !DILocation(line: 6, column: 3, scope: !38)
+!44 = distinct !DILexicalBlock(scope: !38, file: !3, line: 6, column: 31)
+!49 = !DILocation(line: 6, column: 27, scope: !38)
+!50 = distinct !{!50, !41, !51}
+!51 = !DILocation(line: 8, column: 3, scope: !24)
+!55 = distinct !DILexicalBlock(scope: !26, file: !3, line: 10, column: 3)
+!58 = !DILocation(line: 10, column: 3, scope: !26)
+!59 = !DILocation(line: 10, column: 3, scope: !55)
+!67 = distinct !{!67, !58, !68}
+!68 = !DILocation(line: 12, column: 3, scope: !26)
+!69 = !DILocation(line: 13, column: 1, scope: !15)

Added: llvm/trunk/test/Transforms/LoopFusion/diagnostics_missed.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopFusion/diagnostics_missed.ll?rev=367327&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopFusion/diagnostics_missed.ll (added)
+++ llvm/trunk/test/Transforms/LoopFusion/diagnostics_missed.ll Tue Jul 30 08:58:43 2019
@@ -0,0 +1,301 @@
+; RUN: opt -S -loop-fusion -pass-remarks-missed=loop-fusion -disable-output < %s 2>&1 | FileCheck %s
+;
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+ at B = common global [1024 x i32] zeroinitializer, align 16, !dbg !0
+
+; CHECK: remark: diagnostics_missed.c:18:3: [non_adjacent]: entry and for.end: Loops are not adjacent
+define void @non_adjacent(i32* noalias %A) !dbg !67 {
+entry:
+    br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
+    %exitcond1 = icmp ne i64 %i.0, 100
+  br i1 %exitcond1, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  br label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %sub = add nsw i64 %i.0, -3
+  %add = add nuw nsw i64 %i.0, 3
+  %mul = mul nsw i64 %sub, %add
+  %rem = srem i64 %mul, %i.0
+  %conv = trunc i64 %rem to i32
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0
+  store i32 %conv, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nuw nsw i64 %i.0, 1, !dbg !86
+  br label %for.cond, !dbg !87, !llvm.loop !88
+
+for.end:                                          ; preds = %for.cond.cleanup
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc13, %for.end
+  %i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
+  %exitcond = icmp ne i64 %i1.0, 100
+  br i1 %exitcond, label %for.body6, label %for.cond.cleanup5
+
+for.cond.cleanup5:                                ; preds = %for.cond2
+  br label %for.end15
+
+for.body6:                                        ; preds = %for.cond2
+  %sub7 = add nsw i64 %i1.0, -3
+  %add8 = add nuw nsw i64 %i1.0, 3
+  %mul9 = mul nsw i64 %sub7, %add8
+  %rem10 = srem i64 %mul9, %i1.0
+  %conv11 = trunc i64 %rem10 to i32
+  %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0
+  store i32 %conv11, i32* %arrayidx12, align 4
+  br label %for.inc13
+
+for.inc13:                                        ; preds = %for.body6
+  %inc14 = add nuw nsw i64 %i1.0, 1, !dbg !100
+  br label %for.cond2, !dbg !101, !llvm.loop !102
+
+for.end15:                                        ; preds = %for.cond.cleanup5
+  ret void
+}
+
+
+; CHECK: remark: diagnostics_missed.c:28:3: [different_bounds]: entry and for.end: Loop trip counts are not the same
+define void @different_bounds(i32* noalias %A) !dbg !105 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond1 = icmp ne i64 %i.0, 100
+  br i1 %exitcond1, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  br label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %sub = add nsw i64 %i.0, -3
+  %add = add nuw nsw i64 %i.0, 3
+  %mul = mul nsw i64 %sub, %add
+  %rem = srem i64 %mul, %i.0
+  %conv = trunc i64 %rem to i32
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %i.0
+  store i32 %conv, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nuw nsw i64 %i.0, 1, !dbg !123
+  br label %for.cond, !dbg !124, !llvm.loop !125
+
+for.end:                                          ; preds = %for.cond.cleanup
+  br label %for.cond2
+
+for.cond2:                                        ; preds = %for.inc13, %for.end
+  %i1.0 = phi i64 [ 0, %for.end ], [ %inc14, %for.inc13 ]
+  %exitcond = icmp ne i64 %i1.0, 200
+  br i1 %exitcond, label %for.body6, label %for.cond.cleanup5
+
+for.cond.cleanup5:                                ; preds = %for.cond2
+  br label %for.end15
+
+for.body6:                                        ; preds = %for.cond2
+  %sub7 = add nsw i64 %i1.0, -3
+  %add8 = add nuw nsw i64 %i1.0, 3
+  %mul9 = mul nsw i64 %sub7, %add8
+  %rem10 = srem i64 %mul9, %i1.0
+  %conv11 = trunc i64 %rem10 to i32
+  %arrayidx12 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %i1.0
+  store i32 %conv11, i32* %arrayidx12, align 4
+  br label %for.inc13
+
+for.inc13:                                        ; preds = %for.body6
+  %inc14 = add nuw nsw i64 %i1.0, 1
+  br label %for.cond2, !dbg !138, !llvm.loop !139
+
+for.end15:                                        ; preds = %for.cond.cleanup5
+  ret void
+}
+
+; CHECK: remark: diagnostics_missed.c:38:3: [negative_dependence]: entry and for.end: Loop has a non-empty preheader
+define void @negative_dependence(i32* noalias %A) !dbg !142 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ]
+  %exitcond3 = icmp ne i64 %indvars.iv1, 100
+  br i1 %exitcond3, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
+  %tmp = trunc i64 %indvars.iv1 to i32
+  store i32 %tmp, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
+  br label %for.cond, !dbg !160, !llvm.loop !161
+
+for.end:                                          ; preds = %for.cond
+  call void @llvm.dbg.value(metadata i32 0, metadata !147, metadata !DIExpression()), !dbg !163
+  br label %for.cond2, !dbg !164
+
+for.cond2:                                        ; preds = %for.inc10, %for.end
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.end ]
+  %exitcond = icmp ne i64 %indvars.iv, 100
+  br i1 %exitcond, label %for.body5, label %for.end12
+
+for.body5:                                        ; preds = %for.cond2
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+  %tmp4 = load i32, i32* %arrayidx7, align 4
+  %mul = shl nsw i32 %tmp4, 1
+  %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
+  store i32 %mul, i32* %arrayidx9, align 4
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.body5
+  br label %for.cond2
+
+for.end12:                                        ; preds = %for.cond.
+  ret void, !dbg !178
+}
+
+; CHECK: remark: diagnostics_missed.c:51:3: [sumTest]: entry and for.cond2.preheader: Dependencies prevent fusion
+define i32 @sumTest(i32* noalias %A) !dbg !179 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %indvars.iv1 = phi i64 [ %indvars.iv.next2, %for.inc ], [ 0, %entry ]
+  %sum.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
+  %exitcond3 = icmp ne i64 %indvars.iv1, 100
+  br i1 %exitcond3, label %for.body, label %for.cond2
+
+for.body:                                         ; preds = %for.cond
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv1
+  %tmp = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %sum.0, %tmp
+  %indvars.iv.next2 = add nuw nsw i64 %indvars.iv1, 1
+  br label %for.cond, !dbg !199, !llvm.loop !200
+
+for.cond2:                                        ; preds = %for.inc10, %for.cond
+  %sum.0.lcssa = phi i32 [ %sum.0, %for.cond ], [ %sum.0.lcssa, %for.inc10 ]
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc10 ], [ 0, %for.cond ]
+  %exitcond = icmp ne i64 %indvars.iv, 100
+  br i1 %exitcond, label %for.body5, label %for.end12
+
+for.body5:                                        ; preds = %for.cond2
+  %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %tmp4 = load i32, i32* %arrayidx7, align 4
+  %div = sdiv i32 %tmp4, %sum.0.lcssa
+  %arrayidx9 = getelementptr inbounds [1024 x i32], [1024 x i32]* @B, i64 0, i64 %indvars.iv
+  store i32 %div, i32* %arrayidx9, align 4
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.body5
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  br label %for.cond2
+
+for.end12:                                        ; preds = %for.cond2
+  ret i32 %sum.0.lcssa, !dbg !215
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata)
+
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!11, !12, !13, !14}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "B", scope: !2, file: !6, line: 46, type: !7, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 9.0.0 (git at github.ibm.com:compiler/llvm-project.git 23c4baaa9f5b33d2d52eda981d376c6b0a7a3180)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: GNU)
+!3 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIFile(filename: "diagnostics_missed.c", directory: "/tmp")
+!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 32768, elements: !9)
+!8 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!9 = !{!10}
+!10 = !DISubrange(count: 1024)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{i32 1, !"wchar_size", i32 4}
+!14 = !{i32 7, !"PIC Level", i32 2}
+!17 = !DISubroutineType(types: !18)
+!18 = !{null, !19}
+!19 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !20)
+!20 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !8, size: 64)
+!67 = distinct !DISubprogram(name: "non_adjacent", scope: !6, file: !6, line: 17, type: !17, scopeLine: 17, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !68)
+!68 = !{!69, !70, !73}
+!69 = !DILocalVariable(name: "A", arg: 1, scope: !67, file: !6, line: 17, type: !19)
+!70 = !DILocalVariable(name: "i", scope: !71, file: !6, line: 18, type: !72)
+!71 = distinct !DILexicalBlock(scope: !67, file: !6, line: 18, column: 3)
+!72 = !DIBasicType(name: "long int", size: 64, encoding: DW_ATE_signed)
+!73 = !DILocalVariable(name: "i", scope: !74, file: !6, line: 22, type: !72)
+!74 = distinct !DILexicalBlock(scope: !67, file: !6, line: 22, column: 3)
+!79 = distinct !DILexicalBlock(scope: !71, file: !6, line: 18, column: 3)
+!80 = !DILocation(line: 18, column: 3, scope: !71)
+!86 = !DILocation(line: 18, column: 30, scope: !79)
+!87 = !DILocation(line: 18, column: 3, scope: !79)
+!88 = distinct !{!88, !80, !89}
+!89 = !DILocation(line: 20, column: 3, scope: !71)
+!93 = distinct !DILexicalBlock(scope: !74, file: !6, line: 22, column: 3)
+!94 = !DILocation(line: 22, column: 3, scope: !74)
+!100 = !DILocation(line: 22, column: 30, scope: !93)
+!101 = !DILocation(line: 22, column: 3, scope: !93)
+!102 = distinct !{!102, !94, !103}
+!103 = !DILocation(line: 24, column: 3, scope: !74)
+!105 = distinct !DISubprogram(name: "different_bounds", scope: !6, file: !6, line: 27, type: !17, scopeLine: 27, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !106)
+!106 = !{!107, !108, !110}
+!107 = !DILocalVariable(name: "A", arg: 1, scope: !105, file: !6, line: 27, type: !19)
+!108 = !DILocalVariable(name: "i", scope: !109, file: !6, line: 28, type: !72)
+!109 = distinct !DILexicalBlock(scope: !105, file: !6, line: 28, column: 3)
+!110 = !DILocalVariable(name: "i", scope: !111, file: !6, line: 32, type: !72)
+!111 = distinct !DILexicalBlock(scope: !105, file: !6, line: 32, column: 3)
+!116 = distinct !DILexicalBlock(scope: !109, file: !6, line: 28, column: 3)
+!117 = !DILocation(line: 28, column: 3, scope: !109)
+!123 = !DILocation(line: 28, column: 30, scope: !116)
+!124 = !DILocation(line: 28, column: 3, scope: !116)
+!125 = distinct !{!125, !117, !126}
+!126 = !DILocation(line: 30, column: 3, scope: !109)
+!130 = distinct !DILexicalBlock(scope: !111, file: !6, line: 32, column: 3)
+!131 = !DILocation(line: 32, column: 3, scope: !111)
+!138 = !DILocation(line: 32, column: 3, scope: !130)
+!139 = distinct !{!139, !131, !140}
+!140 = !DILocation(line: 34, column: 3, scope: !111)
+!142 = distinct !DISubprogram(name: "negative_dependence", scope: !6, file: !6, line: 37, type: !17, scopeLine: 37, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !143)
+!143 = !{!144, !145, !147}
+!144 = !DILocalVariable(name: "A", arg: 1, scope: !142, file: !6, line: 37, type: !19)
+!145 = !DILocalVariable(name: "i", scope: !146, file: !6, line: 38, type: !8)
+!146 = distinct !DILexicalBlock(scope: !142, file: !6, line: 38, column: 3)
+!147 = !DILocalVariable(name: "i", scope: !148, file: !6, line: 42, type: !8)
+!148 = distinct !DILexicalBlock(scope: !142, file: !6, line: 42, column: 3)
+!153 = distinct !DILexicalBlock(scope: !146, file: !6, line: 38, column: 3)
+!154 = !DILocation(line: 38, column: 3, scope: !146)
+!160 = !DILocation(line: 38, column: 3, scope: !153)
+!161 = distinct !{!161, !154, !162}
+!162 = !DILocation(line: 40, column: 3, scope: !146)
+!163 = !DILocation(line: 0, scope: !148)
+!164 = !DILocation(line: 42, column: 8, scope: !148)
+!178 = !DILocation(line: 45, column: 1, scope: !142)
+!179 = distinct !DISubprogram(name: "sumTest", scope: !6, file: !6, line: 48, type: !180, scopeLine: 48, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !182)
+!180 = !DISubroutineType(types: !181)
+!181 = !{!8, !19}
+!182 = !{!183, !184, !185, !187}
+!183 = !DILocalVariable(name: "A", arg: 1, scope: !179, file: !6, line: 48, type: !19)
+!184 = !DILocalVariable(name: "sum", scope: !179, file: !6, line: 49, type: !8)
+!185 = !DILocalVariable(name: "i", scope: !186, file: !6, line: 51, type: !8)
+!186 = distinct !DILexicalBlock(scope: !179, file: !6, line: 51, column: 3)
+!187 = !DILocalVariable(name: "i", scope: !188, file: !6, line: 54, type: !8)
+!188 = distinct !DILexicalBlock(scope: !179, file: !6, line: 54, column: 3)
+!193 = distinct !DILexicalBlock(scope: !186, file: !6, line: 51, column: 3)
+!194 = !DILocation(line: 51, column: 3, scope: !186)
+!199 = !DILocation(line: 51, column: 3, scope: !193)
+!200 = distinct !{!200, !194, !201}
+!201 = !DILocation(line: 52, column: 15, scope: !186)
+!215 = !DILocation(line: 57, column: 3, scope: !179)




More information about the llvm-commits mailing list