[llvm] r265913 - [IndVars] Eliminate op.with.overflow when possible
Sanjoy Das via llvm-commits
llvm-commits at lists.llvm.org
Sun Apr 10 15:50:32 PDT 2016
Author: sanjoy
Date: Sun Apr 10 17:50:31 2016
New Revision: 265913
URL: http://llvm.org/viewvc/llvm-project?rev=265913&view=rev
Log:
[IndVars] Eliminate op.with.overflow when possible
Summary:
If we can prove that an op.with.overflow intrinsic does not overflow, we
can get rid of the intrinsic, and replace it with non-wrapping
arithmetic.
Reviewers: atrick, regehr
Subscribers: sanjoy, mcrosier, llvm-commits
Differential Revision: http://reviews.llvm.org/D18685
Added:
llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
Modified:
llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp
llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll
Modified: llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp?rev=265913&r1=265912&r2=265913&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp Sun Apr 10 17:50:31 2016
@@ -71,6 +71,7 @@ namespace {
bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
+ bool eliminateOverflowIntrinsic(CallInst *CI);
bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
@@ -318,6 +319,108 @@ void SimplifyIndvar::eliminateIVRemainde
DeadInsts.emplace_back(Rem);
}
+bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
+ auto *F = CI->getCalledFunction();
+ if (!F)
+ return false;
+
+ typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
+ const SCEV *, const SCEV *, SCEV::NoWrapFlags);
+ typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
+ const SCEV *, Type *);
+
+ OperationFunctionTy Operation;
+ ExtensionFunctionTy Extension;
+
+ Instruction::BinaryOps RawOp;
+
+ // We always have exactly one of nsw or nuw. If NoSignedOverflow is false, we
+ // have nuw.
+ bool NoSignedOverflow;
+
+ switch (F->getIntrinsicID()) {
+ default:
+ return false;
+
+ case Intrinsic::sadd_with_overflow:
+ Operation = &ScalarEvolution::getAddExpr;
+ Extension = &ScalarEvolution::getSignExtendExpr;
+ RawOp = Instruction::Add;
+ NoSignedOverflow = true;
+ break;
+
+ case Intrinsic::uadd_with_overflow:
+ Operation = &ScalarEvolution::getAddExpr;
+ Extension = &ScalarEvolution::getZeroExtendExpr;
+ RawOp = Instruction::Add;
+ NoSignedOverflow = false;
+ break;
+
+ case Intrinsic::ssub_with_overflow:
+ Operation = &ScalarEvolution::getMinusSCEV;
+ Extension = &ScalarEvolution::getSignExtendExpr;
+ RawOp = Instruction::Sub;
+ NoSignedOverflow = true;
+ break;
+
+ case Intrinsic::usub_with_overflow:
+ Operation = &ScalarEvolution::getMinusSCEV;
+ Extension = &ScalarEvolution::getZeroExtendExpr;
+ RawOp = Instruction::Sub;
+ NoSignedOverflow = false;
+ break;
+ }
+
+ const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
+ const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
+
+ auto *NarrowTy = cast<IntegerType>(LHS->getType());
+ auto *WideTy =
+ IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
+
+ const SCEV *A =
+ (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy);
+ const SCEV *B =
+ (SE->*Operation)((SE->*Extension)(LHS, WideTy),
+ (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap);
+
+ if (A != B)
+ return false;
+
+ // Proved no overflow, nuke the overflow check and, if possible, the overflow
+ // intrinsic as well.
+
+ BinaryOperator *NewResult = BinaryOperator::Create(
+ RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
+
+ if (NoSignedOverflow)
+ NewResult->setHasNoSignedWrap(true);
+ else
+ NewResult->setHasNoUnsignedWrap(true);
+
+ SmallVector<ExtractValueInst *, 4> ToDelete;
+
+ for (auto *U : CI->users()) {
+ if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+ if (EVI->getIndices()[0] == 1)
+ EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
+ else {
+ assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
+ EVI->replaceAllUsesWith(NewResult);
+ }
+ ToDelete.push_back(EVI);
+ }
+ }
+
+ for (auto *EVI : ToDelete)
+ EVI->eraseFromParent();
+
+ if (CI->use_empty())
+ CI->eraseFromParent();
+
+ return true;
+}
+
/// Eliminate an operation that consumes a simple IV and has no observable
/// side-effect given the range of IV values. IVOperand is guaranteed SCEVable,
/// but UseInst may not be.
@@ -335,6 +438,10 @@ bool SimplifyIndvar::eliminateIVUser(Ins
}
}
+ if (auto *CI = dyn_cast<CallInst>(UseInst))
+ if (eliminateOverflowIntrinsic(CI))
+ return true;
+
if (eliminateIdentitySCEV(UseInst, IVOperand))
return true;
Added: llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll?rev=265913&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll Sun Apr 10 17:50:31 2016
@@ -0,0 +1,137 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f_sadd(i8* %a) {
+; CHECK-LABEL: @f_sadd(
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %cont
+ ret void
+
+for.body: ; preds = %entry, %cont
+ %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
+ %idxprom = sext i32 %i.04 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.sadd.with.overflow
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+ br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap: ; preds = %for.body
+ tail call void @llvm.trap() #2, !nosanitize !{}
+ unreachable, !nosanitize !{}
+
+cont: ; preds = %for.body
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp = icmp slt i32 %2, 16
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_uadd(i8* %a) {
+; CHECK-LABEL: @f_uadd(
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %cont
+ ret void
+
+for.body: ; preds = %entry, %cont
+ %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
+ %idxprom = sext i32 %i.04 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.uadd.with.overflow
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+ br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap: ; preds = %for.body
+ tail call void @llvm.trap(), !nosanitize !{}
+ unreachable, !nosanitize !{}
+
+cont: ; preds = %for.body
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp = icmp slt i32 %2, 16
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_ssub(i8* nocapture %a) {
+; CHECK-LABEL: @f_ssub(
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %cont
+ ret void
+
+for.body: ; preds = %entry, %cont
+ %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
+ %idxprom = sext i32 %i.04 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.ssub.with.overflow.i32
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+ br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap: ; preds = %for.body
+ tail call void @llvm.trap(), !nosanitize !{}
+ unreachable, !nosanitize !{}
+
+cont: ; preds = %for.body
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp = icmp sgt i32 %2, -1
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_usub(i8* nocapture %a) {
+; CHECK-LABEL: @f_usub(
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %cont
+ ret void
+
+for.body: ; preds = %entry, %cont
+ %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
+ %idxprom = sext i32 %i.04 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+
+; It is theoretically possible to prove this, but SCEV cannot
+; represent non-unsigned-wrapping subtraction operations.
+
+; CHECK: for.body:
+; CHECK: [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1
+; CHECK-NEXT: br i1 [[COND]], label %trap, label %cont, !nosanitize !0
+ br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap: ; preds = %for.body
+ tail call void @llvm.trap(), !nosanitize !{}
+ unreachable, !nosanitize !{}
+
+cont: ; preds = %for.body
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp = icmp sgt i32 %2, -1
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+declare void @llvm.trap() #2
Modified: llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll?rev=265913&r1=265912&r2=265913&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll (original)
+++ llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll Sun Apr 10 17:50:31 2016
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx"
; CHECK-LABEL: loop2:
; CHECK-NOT: extractvalue
; CHECK: add nuw
-; CHECK: @llvm.sadd.with.overflow
+; CHECK-NOT: @llvm.sadd.with.overflow
; CHECK-LABEL: loop3:
; CHECK-NOT: extractvalue
; CHECK: ret
More information about the llvm-commits
mailing list