[PATCH] D110712: [LoopFlatten] Bail if we can't perform flattening after IV widening

Wed Sep 29 06:30:16 PDT 2021

SjoerdMeijer created this revision.
SjoerdMeijer added reviewers: dmgreen, momchil.velikov, lenary.
Herald added a subscriber: hiraditya.
SjoerdMeijer requested review of this revision.
Herald added a project: LLVM.

It can happen that after widening of the IV, flattening may not be possible, e.g. when it is deemed unprofitable. We were not properly checking this, which resulted in flattening being applied when it shouldn't.

This should fix PR51980 (https://bugs.llvm.org/show_bug.cgi?id=51980)


https://reviews.llvm.org/D110712

Files:
  llvm/lib/Transforms/Scalar/LoopFlatten.cpp
  llvm/test/Transforms/LoopFlatten/widen-iv3.ll


Index: llvm/test/Transforms/LoopFlatten/widen-iv3.ll
===================================================================

--- llvm/test/Transforms/LoopFlatten/widen-iv3.ll
+++ llvm/test/Transforms/LoopFlatten/widen-iv3.ll
@@ -10,7 +10,6 @@
 define i16 @foo() {
 ; CHECK-LABEL: @foo(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[FLATTEN_TRIPCOUNT:%.*]] = mul i32 16, 4
 ; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
 ; CHECK:       for.cond1.preheader:
 ; CHECK-NEXT:    [[INDVAR2:%.*]] = phi i32 [ [[INDVAR_NEXT3:%.*]], [[FOR_COND_CLEANUP3:%.*]] ], [ 0, [[ENTRY:%.*]] ]
@@ -19,7 +18,6 @@
 ; CHECK-NEXT:    [[TMP0:%.*]] = mul nsw i32 [[INDVAR2]], 16
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i16 [[I_013]], 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[MUL]] to i32
-; CHECK-NEXT:    [[FLATTEN_TRUNCIV:%.*]] = trunc i32 [[INDVAR2]] to i16
 ; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
 ; CHECK:       for.cond.cleanup:
 ; CHECK-NEXT:    [[ADD5_LCSSA_LCSSA:%.*]] = phi i16 [ [[ADD5_LCSSA]], [[FOR_COND_CLEANUP3]] ]
@@ -28,22 +26,22 @@
 ; CHECK-NEXT:    [[ADD5_LCSSA]] = phi i16 [ [[ADD5:%.*]], [[FOR_BODY4]] ]
 ; CHECK-NEXT:    [[INDVAR_NEXT3]] = add i32 [[INDVAR2]], 1
 ; CHECK-NEXT:    [[INC7]] = add nuw nsw i16 [[I_013]], 1
-; CHECK-NEXT:    [[EXITCOND14_NOT:%.*]] = icmp eq i32 [[INDVAR_NEXT3]], [[FLATTEN_TRIPCOUNT]]
+; CHECK-NEXT:    [[EXITCOND14_NOT:%.*]] = icmp eq i32 [[INDVAR_NEXT3]], 4
 ; CHECK-NEXT:    br i1 [[EXITCOND14_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
 ; CHECK:       for.body4:
-; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[INDVAR:%.*]] = phi i32 [ [[INDVAR_NEXT:%.*]], [[FOR_BODY4]] ], [ 0, [[FOR_COND1_PREHEADER]] ]
 ; CHECK-NEXT:    [[J_011:%.*]] = phi i16 [ 0, [[FOR_COND1_PREHEADER]] ]
-; CHECK-NEXT:    [[SUM_110:%.*]] = phi i16 [ [[SUM_012]], [[FOR_COND1_PREHEADER]] ]
+; CHECK-NEXT:    [[SUM_110:%.*]] = phi i16 [ [[SUM_012]], [[FOR_COND1_PREHEADER]] ], [ [[ADD5]], [[FOR_BODY4]] ]
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[INDVAR]], [[TMP0]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i16 [[J_011]], [[MUL]]
 ; CHECK-NEXT:    [[TMP3:%.*]] = trunc i32 [[TMP2]] to i16
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [64 x i16], [64 x i16]* @v, i16 0, i16 [[TMP3]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i16, i16* [[ARRAYIDX]], align 1
 ; CHECK-NEXT:    [[ADD5]] = add nsw i16 [[TMP4]], [[SUM_110]]
-; CHECK-NEXT:    [[INDVAR_NEXT:%.*]] = add i32 [[INDVAR]], 1
+; CHECK-NEXT:    [[INDVAR_NEXT]] = add i32 [[INDVAR]], 1
 ; CHECK-NEXT:    [[INC:%.*]] = add nuw nsw i16 [[J_011]], 1
 ; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INDVAR_NEXT]], 16
-; CHECK-NEXT:    br label [[FOR_COND_CLEANUP3]]
+; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP3]], label [[FOR_BODY4]]
 ;
 entry:
   br label %for.cond1.preheader
Index: llvm/lib/Transforms/Scalar/LoopFlatten.cpp
===================================================================
--- llvm/lib/Transforms/Scalar/LoopFlatten.cpp
+++ llvm/lib/Transforms/Scalar/LoopFlatten.cpp
@@ -739,12 +739,29 @@
     return false;
 
   // Check if we can widen the induction variables to avoid overflow checks.
-  if (CanWidenIV(FI, DT, LI, SE, AC, TTI))
+  bool CanFlatten = CanWidenIV(FI, DT, LI, SE, AC, TTI);
+
+  // It can happen that after widening of the IV, flattening may not be
+  // possible/happening, e.g. when it is deemed unprofitable. So bail here if
+  // that is the case.
+  // TODO: IV widening without performing the actual flattening transformation
+  // is not ideal. While this codegen change should not matter much, it is an
+  // unnecessary change which is better to avoid. It's unlikely his happens
+  // often, because if it's unprofitibale after widening, it should be
+  // unprofitabe before widening as checked in the first round of checks. But
+  // 'RepeatedInstructionThreshold' is set to only 2, which can probably be
+  // relaxed.
+  if (FI.Widened && !CanFlatten)
+    return false;
+
+  // If we have widened and can perform the transformation, do that here.
+  if (CanFlatten)
     return DoFlattenLoopPair(FI, DT, LI, SE, AC, TTI);
 
-  // Check if the new iteration variable might overflow. In this case, we
-  // need to version the loop, and select the original version at runtime if
-  // the iteration space is too large.
+  // Otherwise, if we haven't widened the IV, check if the new iteration
+  // variable might overflow. In this case, we need to version the loop, and
+  // select the original version at runtime if the iteration space is too
+  // large.
   // TODO: We currently don't version the loop.
   OverflowResult OR = checkOverflow(FI, DT, AC);
   if (OR == OverflowResult::AlwaysOverflowsHigh ||


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D110712.375860.patch
Type: text/x-patch
Size: 4750 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210929/6e88c5d2/attachment.bin>