[llvm] 70d4a20 - [UnJ] Update LI for inner nested loops

David Green via llvm-commits llvm-commits at lists.llvm.org
Wed May 27 06:37:26 PDT 2020


Author: David Green
Date: 2020-05-27T14:36:38+01:00
New Revision: 70d4a202995315c77d7daec9f332a6ceda84efc9

URL: https://github.com/llvm/llvm-project/commit/70d4a202995315c77d7daec9f332a6ceda84efc9
DIFF: https://github.com/llvm/llvm-project/commit/70d4a202995315c77d7daec9f332a6ceda84efc9.diff

LOG: [UnJ] Update LI for inner nested loops

This makes sure to correctly register the loop info of the children
of unroll and jammed loops. It re-uses some code from the unroller for
registering subloops.

Differential Revision: https://reviews.llvm.org/D80619

Added: 
    llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll

Modified: 
    llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
index e8aac1233292..8ac6b0894d1c 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollAndJam.cpp
@@ -366,29 +366,29 @@ llvm::UnrollAndJamLoop(Loop *L, unsigned Count, unsigned TripCount,
     SmallVector<BasicBlock *, 8> NewBlocks;
     // Maps Blocks[It] -> Blocks[It-1]
     DenseMap<Value *, Value *> PrevItValueMap;
+    SmallDenseMap<const Loop *, Loop *, 4> NewLoops;
+    NewLoops[L] = L;
+    NewLoops[SubLoop] = SubLoop;
 
     for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) {
       ValueToValueMapTy VMap;
       BasicBlock *New = CloneBasicBlock(*BB, VMap, "." + Twine(It));
       Header->getParent()->getBasicBlockList().push_back(New);
 
-      if (ForeBlocks.count(*BB)) {
-        L->addBasicBlockToLoop(New, *LI);
+      // Tell LI about New.
+      addClonedBlockToLoopInfo(*BB, New, LI, NewLoops);
 
+      if (ForeBlocks.count(*BB)) {
         if (*BB == ForeBlocksFirst[0])
           ForeBlocksFirst.push_back(New);
         if (*BB == ForeBlocksLast[0])
           ForeBlocksLast.push_back(New);
       } else if (SubLoopBlocks.count(*BB)) {
-        SubLoop->addBasicBlockToLoop(New, *LI);
-
         if (*BB == SubLoopBlocksFirst[0])
           SubLoopBlocksFirst.push_back(New);
         if (*BB == SubLoopBlocksLast[0])
           SubLoopBlocksLast.push_back(New);
       } else if (AftBlocks.count(*BB)) {
-        L->addBasicBlockToLoop(New, *LI);
-
         if (*BB == AftBlocksFirst[0])
           AftBlocksFirst.push_back(New);
         if (*BB == AftBlocksLast[0])

diff  --git a/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll b/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll
new file mode 100644
index 000000000000..d84260650c67
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnrollAndJam/innerloop.ll
@@ -0,0 +1,97 @@
+; RUN: opt -loop-unroll-and-jam -allow-unroll-and-jam -verify-loop-info < %s -S | FileCheck %s
+; RUN: opt -passes='unroll-and-jam,verify<loops>' -allow-unroll-and-jam < %s -S | FileCheck %s
+
+; Check that the newly created loops to not fail to be added to LI
+; This test deliberately disables UnJ on the middle loop, performing it instead on the
+; outer of 3 nested loops. The (new) inner loops need to be added to LI.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @test() {
+; CHECK-LABEL: test
+; CHECK:       for.cond17.preheader:
+; CHECK:    br label %for.cond20.preheader
+; CHECK:       for.cond20.preheader:
+; CHECK:    br label %for.cond23.preheader
+; CHECK:       for.cond23.preheader:
+; CHECK:    br label %for.body25
+; CHECK:       for.body25:
+; CHECK:    br i1 [[CMP24:%.*]], label %for.body25, label %for.inc45
+; CHECK:       for.inc45:
+; CHECK:    br label %for.body25.1
+; CHECK:       for.inc48:
+; CHECK:    br i1 [[CMP18_3:%.*]], label %for.cond20.preheader, label %for.end50
+; CHECK:       for.end50:
+; CHECK:    ret i32 0
+; CHECK:       for.body25.1:
+; CHECK:    br i1 [[CMP24_1:%.*]], label %for.body25.1, label %for.inc45.1
+; CHECK:       for.inc45.1:
+; CHECK:    br label %for.body25.2
+; CHECK:       for.body25.2:
+; CHECK:    br i1 [[CMP24_2:%.*]], label %for.body25.2, label %for.inc45.2
+; CHECK:       for.inc45.2:
+; CHECK:    br label %for.body25.3
+; CHECK:       for.body25.3:
+; CHECK:    br i1 [[CMP24_3:%.*]], label %for.body25.3, label %for.inc45.3
+; CHECK:       for.inc45.3:
+; CHECK:    br i1 [[CMP21_3:%.*]], label %for.cond23.preheader, label %for.inc48
+;
+entry:
+  %A = alloca [8 x [8 x i32]], align 16
+  %B = alloca [8 x [8 x i32]], align 16
+  %C = alloca [8 x [8 x i32]], align 16
+  br label %for.cond17.preheader
+
+for.cond17.preheader:                             ; preds = %for.inc14
+  br label %for.cond20.preheader
+
+for.cond20.preheader:                             ; preds = %for.cond17.preheader, %for.inc48
+  %i.13 = phi i32 [ 0, %for.cond17.preheader ], [ %inc49, %for.inc48 ]
+  br label %for.cond23.preheader
+
+for.cond23.preheader:                             ; preds = %for.cond20.preheader, %for.inc45
+  %j.12 = phi i32 [ 0, %for.cond20.preheader ], [ %inc46, %for.inc45 ]
+  br label %for.body25
+
+for.body25:                                       ; preds = %for.cond23.preheader, %for.body25
+  %k.01 = phi i32 [ 0, %for.cond23.preheader ], [ %inc43, %for.body25 ]
+  %idxprom26 = zext i32 %i.13 to i64
+  %idxprom28 = zext i32 %j.12 to i64
+  %arrayidx29 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %C, i64 0, i64 %idxprom26, i64 %idxprom28
+  %0 = load i32, i32* %arrayidx29, align 4
+  %idxprom30 = zext i32 %i.13 to i64
+  %idxprom32 = zext i32 %k.01 to i64
+  %arrayidx33 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %A, i64 0, i64 %idxprom30, i64 %idxprom32
+  %1 = load i32, i32* %arrayidx33, align 4
+  %idxprom34 = zext i32 %k.01 to i64
+  %idxprom36 = zext i32 %j.12 to i64
+  %arrayidx37 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %B, i64 0, i64 %idxprom34, i64 %idxprom36
+  %2 = load i32, i32* %arrayidx37, align 4
+  %mul = mul nsw i32 %1, %2
+  %add = add nsw i32 %0, %mul
+  %idxprom38 = zext i32 %i.13 to i64
+  %idxprom40 = zext i32 %j.12 to i64
+  %arrayidx41 = getelementptr inbounds [8 x [8 x i32]], [8 x [8 x i32]]* %C, i64 0, i64 %idxprom38, i64 %idxprom40
+  store i32 %add, i32* %arrayidx41, align 4
+  %inc43 = add nuw nsw i32 %k.01, 1
+  %cmp24 = icmp ult i32 %k.01, 7
+  br i1 %cmp24, label %for.body25, label %for.inc45
+
+for.inc45:                                        ; preds = %for.body25
+  %inc46 = add nuw nsw i32 %j.12, 1
+  %cmp21 = icmp ult i32 %j.12, 7
+  br i1 %cmp21, label %for.cond23.preheader, label %for.inc48, !llvm.loop !7
+
+for.inc48:                                        ; preds = %for.inc45
+  %inc49 = add nuw nsw i32 %i.13, 1
+  %cmp18 = icmp ult i32 %i.13, 7
+  br i1 %cmp18, label %for.cond20.preheader, label %for.end50, !llvm.loop !5
+
+for.end50:                                        ; preds = %for.inc48
+  ret i32 0
+}
+
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.unroll_and_jam.count", i32 4}
+!7 = distinct !{!7, !8}
+!8 = !{!"llvm.loop.unroll_and_jam.disable"}


        


More information about the llvm-commits mailing list