[llvm] 23c93c2 - Discount invariant instructions in full unrolling

Fri May 14 11:07:25 PDT 2021

Author: Philip Reames
Date: 2021-05-14T11:07:19-07:00
New Revision: 23c93c255549f440444b10d73d439bedc0d6822d

URL: https://github.com/llvm/llvm-project/commit/23c93c255549f440444b10d73d439bedc0d6822d
DIFF: https://github.com/llvm/llvm-project/commit/23c93c255549f440444b10d73d439bedc0d6822d.diff

LOG: Discount invariant instructions in full unrolling

This patch updates the cost model for full unrolling to discount the cost of a loop invariant expression on all but one iteration. The reasoning here is that such an expression (as determined by SCEV) will be CSEd or DSEd once the loop is unrolled. Note that SCEVs reasoning will find things which could be invariant, not simply those outside the loop.

Differential Revision: https://reviews.llvm.org/D102506

Added: 
    llvm/test/Transforms/LoopUnroll/full-unroll-invariant.ll

Modified: 
    llvm/lib/Analysis/LoopUnrollAnalyzer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
index b04cc46bd272..66deb353044e 100644

--- a/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
+++ b/llvm/lib/Analysis/LoopUnrollAnalyzer.cpp
@@ -35,6 +35,11 @@ bool UnrolledInstAnalyzer::simplifyInstWithSCEV(Instruction *I) {
     return true;
   }
 
+  // If we have a loop invariant computation, we only need to compute it once.
+  // Given that, all but the first occurance are free.
+  if (!IterationNumber->isZero() && SE.isLoopInvariant(S, L))
+    return true;
+
   auto *AR = dyn_cast<SCEVAddRecExpr>(S);
   if (!AR || AR->getLoop() != L)
     return false;

diff  --git a/llvm/test/Transforms/LoopUnroll/full-unroll-invariant.ll b/llvm/test/Transforms/LoopUnroll/full-unroll-invariant.ll
new file mode 100644
index 000000000000..ce5649416f33
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/full-unroll-invariant.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=1 | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(loop-unroll-full)' -unroll-threshold=1 | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Body fully unrolls into a single instruction after unroll and simplify
+define i32 @test(i8 %a) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[ZEXT_9:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    ret i32 [[ZEXT_9]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %zext = zext i8 %a to i32
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret i32 %zext
+}
+
+; Generalized version of previous to show benefit of using SCEV's ability
+; to prove invariance not Loop::isLoopInvaraint.
+define i32 @test2(i8 %a) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[ZEXT_9:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[AND_9:%.*]] = and i32 [[ZEXT_9]], 31
+; CHECK-NEXT:    [[SHL_9:%.*]] = shl i32 [[AND_9]], 15
+; CHECK-NEXT:    ret i32 [[SHL_9]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %zext = zext i8 %a to i32
+  %and = and i32 %zext, 31
+  %shl = shl i32 %and, 15
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret i32 %shl
+}
+
+; Show that this works for instructions which might fault as well
+define i32 @test3(i8 %a) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[ZEXT_9:%.*]] = zext i8 [[A:%.*]] to i32
+; CHECK-NEXT:    [[DIV_9:%.*]] = udiv i32 [[ZEXT_9]], 31
+; CHECK-NEXT:    ret i32 [[DIV_9]]
+;
+entry:
+  br label %for.body
+
+for.body:
+  %phi = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+  %zext = zext i8 %a to i32
+  %div = udiv i32 %zext, 31
+  %inc = add nuw nsw i64 %phi, 1
+  %cmp = icmp ult i64 %inc, 10
+  br i1 %cmp, label %for.body, label %for.exit
+
+for.exit:
+  ret i32 %div
+}