[llvm] r344223 - [IndVars] Drop "exact" flag from lshr and udiv when substituting their args
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 11 00:22:27 PDT 2018
Author: mkazantsev
Date: Thu Oct 11 00:22:26 2018
New Revision: 344223
URL: http://llvm.org/viewvc/llvm-project?rev=344223&view=rev
Log:
[IndVars] Drop "exact" flag from lshr and udiv when substituting their args
There is a transform that may replace `lshr (x+1), 1` with `lshr x, 1` in case
if it can prove that the result will be the same. However the initial instruction
might have an `exact` flag set, and it now should be dropped unless we prove
that it may hold. Incorrectly set `exact` attribute may then produce poison.
Differential Revision: https://reviews.llvm.org/D53061
Reviewed By: sanjoy
Added:
llvm/trunk/test/Transforms/IndVarSimplify/drop-exact.ll
Modified:
llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp
Modified: llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp?rev=344223&r1=344222&r2=344223&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp Thu Oct 11 00:22:26 2018
@@ -108,6 +108,7 @@ Value *SimplifyIndvar::foldIVUser(Instru
Value *IVSrc = nullptr;
const unsigned OperIdx = 0;
const SCEV *FoldedExpr = nullptr;
+ bool MustDropExactFlag = false;
switch (UseInst->getOpcode()) {
default:
return nullptr;
@@ -140,6 +141,11 @@ Value *SimplifyIndvar::foldIVUser(Instru
APInt::getOneBitSet(BitWidth, D->getZExtValue()));
}
FoldedExpr = SE->getUDivExpr(SE->getSCEV(IVSrc), SE->getSCEV(D));
+ // We might have 'exact' flag set at this point which will no longer be
+ // correct after we make the replacement.
+ if (UseInst->isExact() &&
+ SE->getSCEV(IVSrc) != SE->getMulExpr(FoldedExpr, SE->getSCEV(D)))
+ MustDropExactFlag = true;
}
// We have something that might fold it's operand. Compare SCEVs.
if (!SE->isSCEVable(UseInst->getType()))
@@ -155,6 +161,9 @@ Value *SimplifyIndvar::foldIVUser(Instru
UseInst->setOperand(OperIdx, IVSrc);
assert(SE->getSCEV(UseInst) == FoldedExpr && "bad SCEV with folded oper");
+ if (MustDropExactFlag)
+ UseInst->dropPoisonGeneratingFlags();
+
++NumElimOperand;
Changed = true;
if (IVOperand->use_empty())
Added: llvm/trunk/test/Transforms/IndVarSimplify/drop-exact.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/drop-exact.ll?rev=344223&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/drop-exact.ll (added)
+++ llvm/trunk/test/Transforms/IndVarSimplify/drop-exact.ll Thu Oct 11 00:22:26 2018
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
+
+; We make a transform by getting rid of add nsw i32 %tmp17, -1; make sure that
+; we drop "exact" flag on lshr as we do it.
+define void @drop_exact(i32* %p, i64* %p1) {
+; CHECK-LABEL: @drop_exact(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB12:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: ret void
+; CHECK: bb12:
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ -47436, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB12]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP42:%.*]], [[BB12]] ]
+; CHECK-NEXT: [[TMP15]] = add nsw i32 [[TMP13]], -1
+; CHECK-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i32 42831, [[TMP16]]
+; CHECK-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP17]], 1
+; CHECK-NEXT: [[TMP20:%.*]] = urem i32 [[TMP19]], 250
+; CHECK-NEXT: [[TMP22:%.*]] = lshr i32 [[TMP17]], 1
+; CHECK-NEXT: store i32 [[TMP22]], i32* [[P:%.*]], align 4
+; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT: store i64 [[TMP26]], i64* [[P1:%.*]], align 4
+; CHECK-NEXT: [[TMP42]] = add nuw nsw i32 [[TMP14]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP42]], 719
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB7:%.*]], label [[BB12]]
+;
+bb:
+ br label %bb12
+
+bb7: ; preds = %bb12
+ ret void
+
+bb12: ; preds = %bb12, %bb
+ %tmp13 = phi i32 [ -47436, %bb ], [ %tmp15, %bb12 ]
+ %tmp14 = phi i32 [ 0, %bb ], [ %tmp42, %bb12 ]
+ %tmp15 = add i32 %tmp13, -1
+ %tmp16 = shl i32 %tmp15, 1
+ %tmp17 = sub i32 42831, %tmp16
+ %tmp19 = lshr i32 %tmp17, 1
+ %tmp20 = urem i32 %tmp19, 250
+ %tmp21 = add nsw i32 %tmp17, -1
+ %tmp22 = lshr exact i32 %tmp21, 1
+ store i32 %tmp22, i32* %p, align 4
+ %tmp26 = zext i32 %tmp20 to i64
+ store i64 %tmp26, i64* %p1, align 4
+ %tmp42 = add nuw nsw i32 %tmp14, 1
+ %tmp43 = icmp ugt i32 %tmp14, 717
+ br i1 %tmp43, label %bb7, label %bb12
+}
+
+; Throw away add nsw i32 %tmp17, 0, do not drop exact flag.
+define void @dont_drop_exact(i32* %p, i64* %p1) {
+; CHECK-LABEL: @dont_drop_exact(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br label [[BB12:%.*]]
+; CHECK: bb7:
+; CHECK-NEXT: ret void
+; CHECK: bb12:
+; CHECK-NEXT: [[TMP13:%.*]] = phi i32 [ -47436, [[BB:%.*]] ], [ [[TMP15:%.*]], [[BB12]] ]
+; CHECK-NEXT: [[TMP14:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP42:%.*]], [[BB12]] ]
+; CHECK-NEXT: [[TMP15]] = add nsw i32 [[TMP13]], -1
+; CHECK-NEXT: [[TMP16:%.*]] = shl i32 [[TMP15]], 1
+; CHECK-NEXT: [[TMP17:%.*]] = sub nsw i32 42831, [[TMP16]]
+; CHECK-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP17]], 1
+; CHECK-NEXT: [[TMP20:%.*]] = urem i32 [[TMP19]], 250
+; CHECK-NEXT: [[TMP22:%.*]] = lshr exact i32 [[TMP17]], 1
+; CHECK-NEXT: store i32 [[TMP22]], i32* [[P:%.*]], align 4
+; CHECK-NEXT: [[TMP26:%.*]] = zext i32 [[TMP20]] to i64
+; CHECK-NEXT: store i64 [[TMP26]], i64* [[P1:%.*]], align 4
+; CHECK-NEXT: [[TMP42]] = add nuw nsw i32 [[TMP14]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[TMP42]], 719
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[BB7:%.*]], label [[BB12]]
+;
+bb:
+ br label %bb12
+
+bb7: ; preds = %bb12
+ ret void
+
+bb12: ; preds = %bb12, %bb
+ %tmp13 = phi i32 [ -47436, %bb ], [ %tmp15, %bb12 ]
+ %tmp14 = phi i32 [ 0, %bb ], [ %tmp42, %bb12 ]
+ %tmp15 = add i32 %tmp13, -1
+ %tmp16 = shl i32 %tmp15, 1
+ %tmp17 = sub i32 42831, %tmp16
+ %tmp19 = lshr i32 %tmp17, 1
+ %tmp20 = urem i32 %tmp19, 250
+ %tmp21 = add nsw i32 %tmp17, 0
+ %tmp22 = lshr exact i32 %tmp21, 1
+ store i32 %tmp22, i32* %p, align 4
+ %tmp26 = zext i32 %tmp20 to i64
+ store i64 %tmp26, i64* %p1, align 4
+ %tmp42 = add nuw nsw i32 %tmp14, 1
+ %tmp43 = icmp ugt i32 %tmp14, 717
+ br i1 %tmp43, label %bb7, label %bb12
+}
More information about the llvm-commits
mailing list