[llvm] 367df18 - [LoopFlatten] Bail if we can't perform flattening after IV widening

Wed Sep 29 11:54:44 PDT 2021

Author: Sjoerd Meijer
Date: 2021-09-29T19:53:34+01:00
New Revision: 367df18050303b27bc6937e44e4d636332333ae7

URL: https://github.com/llvm/llvm-project/commit/367df18050303b27bc6937e44e4d636332333ae7
DIFF: https://github.com/llvm/llvm-project/commit/367df18050303b27bc6937e44e4d636332333ae7.diff

LOG: [LoopFlatten] Bail if we can't perform flattening after IV widening

It can happen that after widening of the IV, flattening may not be possible,
e.g. when it is deemed unprofitable. We were not properly checking this, which
resulted in flattening being applied when it shouldn't, also leading to
incorrect results (miscompilation).

This should fix PR51980 (https://bugs.llvm.org/show_bug.cgi?id=51980)

Differential Revision: https://reviews.llvm.org/D110712

Added: 
    llvm/test/Transforms/LoopFlatten/widen-iv3.ll

Modified: 
    llvm/lib/Transforms/Scalar/LoopFlatten.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
index 11520f77aba02..9440de21b81b5 100644

--- a/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -748,12 +748,30 @@ static bool FlattenLoopPair(FlattenInfo &FI, DominatorTree *DT, LoopInfo *LI,
     return false;
 
   // Check if we can widen the induction variables to avoid overflow checks.
-  if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
+  bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
+
+  // It can happen that after widening of the IV, flattening may not be
+  // possible/happening, e.g. when it is deemed unprofitable. So bail here if
+  // that is the case.
+  // TODO: IV widening without performing the actual flattening transformation
+  // is not ideal. While this codegen change should not matter much, it is an
+  // unnecessary change which is better to avoid. It's unlikely this happens
+  // often, because if it's unprofitibale after widening, it should be
+  // unprofitabe before widening as checked in the first round of checks. But
+  // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
+  // relaxed. Because this is making a code change (the IV widening, but not
+  // the flattening), we return true here.
+  if (FI.Widened && !CanFlatten)
+    return true;
+
+  // If we have widened and can perform the transformation, do that here.
+  if (CanFlatten)
     return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
 
-  // Check if the new iteration variable might overflow. In this case, we
-  // need to version the loop, and select the original version at runtime if
-  // the iteration space is too large.
+  // Otherwise, if we haven't widened the IV, check if the new iteration
+  // variable might overflow. In this case, we need to version the loop, and
+  // select the original version at runtime if the iteration space is too
+  // large.
   // TODO: We currently don't version the loop.
   OverflowResult OR = checkOverflow(FI, DT, AC);
   if (OR == OverflowResult::AlwaysOverflowsHigh ||

diff  --git a/llvm/test/Transforms/LoopFlatten/widen-iv3.ll b/llvm/test/Transforms/LoopFlatten/widen-iv3.ll
new file mode 100644
index 0000000000000..78aa7db9a5e1d
--- /dev/null
+++ b/llvm/test/Transforms/LoopFlatten/widen-iv3.ll
@@ -0,0 +1,76 @@
+
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -loop-flatten \
+; RUN:     -verify-loop-info -verify-dom-info -verify-scev -verify | \
+; RUN:     FileCheck %s --check-prefix=CHECK
+
+target datalayout = "n32"
+
+ at v = global [64 x i16] [i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31, i16 32, i16 33, i16 34, i16 35, i16 36, i16 37, i16 38, i16 39, i16 40, i16 41, i16 42, i16 43, i16 44, i16 45, i16 46, i16 47, i16 48, i16 49, i16 50, i16 51, i16 52, i16 53, i16 54, i16 55, i16 56, i16 57, i16 58, i16 59, i16 60, i16 61, i16 62, i16 63], align 1
+
+define i16 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
+; CHECK:       for.cond1.preheader:
+; CHECK-NEXT:    [[INDVAR2:%.*]] = phi i32 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[I_013:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[INC7:%.*]], [[FOR_COND_CLEANUP3]] ]
+; CHECK-NEXT:    [[SUM_012:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[ADD5_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = mul nsw i32 [[INDVAR2]], 16
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i16 [[I_013]], 16
+; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[MUL]] to i32
+; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[ADD5_LCSSA_LCSSA:%.*]] = phi i16 [ [[ADD5_LCSSA]], [[FOR_COND_CLEANUP3]] ]
+; CHECK-NEXT:    ret i16 [[ADD5_LCSSA_LCSSA]]
+; CHECK:       for.cond.cleanup3:
+; CHECK-NEXT:    [[ADD5_LCSSA]] = phi i16 [ [[ADD5:%.*]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[INDVAR_NEXT3]] = add i32 [[INDVAR2]], 1
+; CHECK-NEXT:    [[INC7]] = add nuw nsw i16 [[I_013]], 1
+; CHECK-NEXT:    [[EXITCOND14_NOT:%.*]] = icmp eq i32 [[INDVAR_NEXT3]], 4
+; CHECK-NEXT:    br i1 [[EXITCOND14_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
+; CHECK:       for.body4:
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY4]] ], [ 0, [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[J_011:%.*]] = phi i16 [ 0, [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_110:%.*]] = phi i16 [ [[SUM_012]], [[FOR_COND1_PREHEADER]] ], [ [[ADD5]], [[FOR_BODY4]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[INDVAR]], [[TMP0]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i16 [[J_011]], [[MUL]]
+; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i16], [64 x i16]* @v, i16 0, i16 [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ADD5]] = add nsw i16 [[TMP4]], [[SUM_110]]
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
+; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i16 [[J_011]], 1
+; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INDVAR_NEXT]], 16
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]]
+;
+entry:
+  br label %for.cond1.preheader
+
+for.cond1.preheader:                              ; preds = %entry, %for.cond.cleanup3
+  %i.013 = phi i16 [ 0, %entry ], [ %inc7, %for.cond.cleanup3 ]
+  %sum.012 = phi i16 [ 0, %entry ], [ %add5.lcssa, %for.cond.cleanup3 ]
+  %mul = mul nsw i16 %i.013, 16
+  br label %for.body4
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup3
+  %add5.lcssa.lcssa = phi i16 [ %add5.lcssa, %for.cond.cleanup3 ]
+  ret i16 %add5.lcssa.lcssa
+
+for.cond.cleanup3:                                ; preds = %for.body4
+  %add5.lcssa = phi i16 [ %add5, %for.body4 ]
+  %inc7 = add nuw nsw i16 %i.013, 1
+  %exitcond14.not = icmp eq i16 %inc7, 4
+  br i1 %exitcond14.not, label %for.cond.cleanup, label %for.cond1.preheader
+
+for.body4:                                        ; preds = %for.cond1.preheader, %for.body4
+  %j.011 = phi i16 [ 0, %for.cond1.preheader ], [ %inc, %for.body4 ]
+  %sum.110 = phi i16 [ %sum.012, %for.cond1.preheader ], [ %add5, %for.body4 ]
+  %add = add nuw nsw i16 %j.011, %mul
+  %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @v, i16 0, i16 %add
+  %0 = load i16, i16* %arrayidx, align 1
+  %add5 = add nsw i16 %0, %sum.110
+  %inc = add nuw nsw i16 %j.011, 1
+  %exitcond.not = icmp eq i16 %inc, 16
+  br i1 %exitcond.not, label %for.cond.cleanup3, label %for.body4
+}