[llvm] r278421 - Extend trip count instead of truncating IV in LFTR, when legal

Ehsan Amiri via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 11 14:31:40 PDT 2016


Author: amehsan
Date: Thu Aug 11 16:31:40 2016
New Revision: 278421

URL: http://llvm.org/viewvc/llvm-project?rev=278421&view=rev
Log:
Extend trip count instead of truncating IV in LFTR, when legal

When legal, extending trip count in the loop control logic generates better code compared to truncating IV. This is because

(1) extending trip count is a loop invariant operation (see genLoopLimit where we prove trip count is loop invariant).
(2) Scalar Evolution seems to have problems understanding trunc when computing loop trip count. So removing them allows better analysis performed in Scalar Evolution. (In particular this fixes PR 28363 which is the motivation for this change).

I am not going to perform any performance test. Any degradation caused by this should be an indication of a bug elsewhere.

To prove legality, we rely on SCEV to prove zext(trunc(IV)) == IV (or similarly for sext). If this holds, we can prove equivalence of trunc(IV)==ExitCnt (1) and IV == zext(ExitCnt). Simply take zext of boths sides of (1) and apply the proven equivalence.

This commit contains changes in a newly added testcase which was not included in the previous commit (which was reverted later on).

https://reviews.llvm.org/D23075


Added:
    llvm/trunk/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
    llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll
    llvm/trunk/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
    llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll

Modified: llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp?rev=278421&r1=278420&r2=278421&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/IndVarSimplify.cpp Thu Aug 11 16:31:40 2016
@@ -1989,7 +1989,34 @@ linearFunctionTestReplace(Loop *L,
 
       DEBUG(dbgs() << "  Widen RHS:\t" << *ExitCnt << "\n");
     } else {
+      // We try to extend trip count first. If that doesn't work we truncate IV.
+      // Zext(trunc(IV)) == IV implies equivalence of the following two:
+      // Trunc(IV) == ExitCnt and IV == zext(ExitCnt). Similarly for sext. If
+      // one of the two holds, extend the trip count, otherwise we truncate IV.
+      bool Extended = false;
+      const SCEV *IV = SE->getSCEV(CmpIndVar);
+      const SCEV *ZExtTrunc =
+           SE->getZeroExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+                                                     ExitCnt->getType()),
+                                 CmpIndVar->getType());
 
+      if (ZExtTrunc == IV) {
+        Extended = true;
+        ExitCnt = Builder.CreateZExt(ExitCnt, IndVar->getType(),
+                                     "wide.trip.count");
+      } else {
+        const SCEV *SExtTrunc =
+          SE->getSignExtendExpr(SE->getTruncateExpr(SE->getSCEV(CmpIndVar),
+                                                    ExitCnt->getType()),
+                                CmpIndVar->getType());
+        if (SExtTrunc == IV) {
+          Extended = true;
+          ExitCnt = Builder.CreateSExt(ExitCnt, IndVar->getType(),
+                                       "wide.trip.count");
+        }
+      }
+
+      if (!Extended)
         CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(),
                                         "lftr.wideiv");
     }

Modified: llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll?rev=278421&r1=278420&r2=278421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll (original)
+++ llvm/trunk/test/Transforms/IndVarSimplify/elim-extend.ll Thu Aug 11 16:31:40 2016
@@ -41,6 +41,8 @@ entry:
   br i1 %precond, label %loop, label %return
 ; CHECK: loop:
 ; CHECK-NOT: sext
+; CHECK: wide.trip.count = sext
+; CHECK-NOT: sext
 ; CHECK: exit:
 loop:
   %iv = phi i32 [ %postiv, %loop ], [ %init, %entry ]

Modified: llvm/trunk/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll?rev=278421&r1=278420&r2=278421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll (original)
+++ llvm/trunk/test/Transforms/IndVarSimplify/iv-widen-elim-ext.ll Thu Aug 11 16:31:40 2016
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt < %s -indvars -S | FileCheck %s --implicit-check-not sext --implicit-check-not zext
 
 target datalayout = "p:64:64:64-n32:64"
 
@@ -7,8 +7,8 @@ target datalayout = "p:64:64:64-n32:64"
 ; the IV is considered signed or unsigned.
 define void @foo(i32* %A, i32* %B, i32* %C, i32 %N) {
 ; CHECK-LABEL: @foo(
-; CHECK-NOT: zext
-; CHECK-NOT: sext
+; CHECK: wide.trip.count = zext
+; CHECK: ret void
 entry:
   %cmp1 = icmp slt i32 0, %N
   br i1 %cmp1, label %for.body.lr.ph, label %for.end
@@ -45,8 +45,8 @@ for.end:
 
 define void @foo1(i32* %A, i32* %B, i32* %C, i32 %N) {
 ; CHECK-LABEL: @foo1(
-; CHECK-NOT: zext
-; CHECK-NOT: sext
+; CHECK: wide.trip.count = zext
+; CHECK: ret void
 entry:
   %cmp1 = icmp slt i32 0, %N
   br i1 %cmp1, label %for.body.lr.ph, label %for.end

Added: llvm/trunk/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll?rev=278421&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll (added)
+++ llvm/trunk/test/Transforms/IndVarSimplify/lftr-wide-trip-count.ll Thu Aug 11 16:31:40 2016
@@ -0,0 +1,158 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; Provide legal integer types.
+target datalayout = "n8:16:32:64"
+
+
+define void @test1(float* %autoc,
+                   float* %data,
+                   float %d, i32 %data_len, i32 %sample) nounwind {
+entry:
+  %sub = sub i32 %data_len, %sample
+  %cmp4 = icmp eq i32 %data_len, %sample
+  br i1 %cmp4, label %for.end, label %for.body
+
+for.body:                                         ; preds = %entry, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 68719476736, %entry ]
+  %temp = trunc i64 %indvars.iv to i32
+  %add = add i32 %temp, %sample
+  %idxprom = zext i32 %add to i64
+  %arrayidx = getelementptr inbounds float, float* %data, i64 %idxprom
+  %temp1 = load float, float* %arrayidx, align 4
+  %mul = fmul float %temp1, %d
+  %arrayidx2 = getelementptr inbounds float, float* %autoc, i64 %indvars.iv
+  %temp2 = load float, float* %arrayidx2, align 4
+  %add3 = fadd float %temp2, %mul
+  store float %add3, float* %arrayidx2, align 4
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %temp3 = trunc i64 %indvars.iv.next to i32
+  %cmp = icmp ult i32 %temp3, %sub
+  br i1 %cmp, label %for.body, label %for.end
+
+for.end:                                          ; preds = %for.body, %entry
+  ret void
+
+; CHECK-LABEL: @test1(
+
+; With the given initial value for IV, it is not legal to widen
+; trip count to IV size
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub
+; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
+}
+
+define float @test2(float* %a,
+                    float* %b,
+                    i32 zeroext %m) local_unnamed_addr #0 {
+entry:
+  %cmp5 = icmp ugt i32 %m, 500
+  br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %sum.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+  %i.06 = phi i32 [ %inc, %for.body ], [ 500, %for.body.preheader ]
+  %idxprom = zext i32 %i.06 to i64
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
+  %temp = load float, float* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds float, float* %a, i64 %idxprom
+  %temp1 = load float, float* %arrayidx2, align 4
+  %mul = fmul float %temp, %temp1
+  %add = fadd float %sum.07, %mul
+  %inc = add i32 %i.06, 1
+  %cmp = icmp ult i32 %inc, %m
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.end.loopexit ]
+  ret float %sum.0.lcssa
+
+; CHECK-LABEL: @test2(
+; Trip count should be widened and LFTR should canonicalize the condition
+; CHECK: %wide.trip.count = zext
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+; CHECK: br i1 %exitcond
+}
+
+define float @test3(float* %b,
+                    i32 signext %m) local_unnamed_addr #0 {
+entry:
+  %cmp5 = icmp sgt i32 %m, -10
+  br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %sum.07 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+  %i.06 = phi i32 [ %inc, %for.body ], [ -10, %for.body.preheader ]
+  %add = add nsw i32 %i.06, 20
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
+  %temp = load float, float* %arrayidx, align 4
+  %conv = sitofp i32 %i.06 to float
+  %mul = fmul float %conv, %temp
+  %add1 = fadd float %sum.07, %mul
+  %inc = add nsw i32 %i.06, 1
+  %cmp = icmp slt i32 %inc, %m
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1, %for.end.loopexit ]
+  ret float %sum.0.lcssa
+
+; CHECK-LABEL: @test3(
+; Trip count should be widened and LFTR should canonicalize the condition
+; CHECK: %wide.trip.count = sext
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+; CHECK: br i1 %exitcond
+}
+
+define float @test4(float* %b,
+                    i32 signext %m) local_unnamed_addr #0 {
+entry:
+  %cmp5 = icmp sgt i32 %m, 10
+  br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %sum.07 = phi float [ %add1, %for.body ], [ 0.000000e+00, %for.body.preheader ]
+  %i.06 = phi i32 [ %inc, %for.body ], [ 10, %for.body.preheader ]
+  %add = add nsw i32 %i.06, 20
+  %idxprom = sext i32 %add to i64
+  %arrayidx = getelementptr inbounds float, float* %b, i64 %idxprom
+  %temp = load float, float* %arrayidx, align 4
+  %conv = sitofp i32 %i.06 to float
+  %mul = fmul float %conv, %temp
+  %add1 = fadd float %sum.07, %mul
+  %inc = add nsw i32 %i.06, 1
+  %cmp = icmp slt i32 %inc, %m
+  br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit:                                 ; preds = %for.body
+  %add1.lcssa = phi float [ %add1, %for.body ]
+  br label %for.end
+
+for.end:                                          ; preds = %for.end.loopexit, %entry
+  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add1.lcssa, %for.end.loopexit ]
+  ret float %sum.0.lcssa
+
+; CHECK-LABEL: @test4(
+; Trip count should be widened and LFTR should canonicalize the condition
+; CHECK: %wide.trip.count = zext
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
+; CHECK: br i1 %exitcond
+}
+
+

Modified: llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll?rev=278421&r1=278420&r2=278421&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll (original)
+++ llvm/trunk/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll Thu Aug 11 16:31:40 2016
@@ -33,8 +33,9 @@ for.end:
 ; CHECK-LABEL: @test1(
 
 ; check that we turn the IV test into an eq.
-; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %su
+; CHECK: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK: %wide.trip.count = zext i32 %sub to i64
+; CHECK: %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
 ; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
 }
 




More information about the llvm-commits mailing list