[llvm] r265913 - [IndVars] Eliminate op.with.overflow when possible

Sun Apr 10 15:50:32 PDT 2016

Author: sanjoy
Date: Sun Apr 10 17:50:31 2016
New Revision: 265913

URL: http://llvm.org/viewvc/llvm-project?rev=265913&view=rev
Log:
[IndVars] Eliminate op.with.overflow when possible

Summary:
If we can prove that an op.with.overflow intrinsic does not overflow, we
can get rid of the intrinsic, and replace it with non-wrapping
arithmetic.

Reviewers: atrick, regehr

Subscribers: sanjoy, mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D18685

Added:
    llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
Modified:
    llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp
    llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll

Modified: llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp?rev=265913&r1=265912&r2=265913&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyIndVar.cpp Sun Apr 10 17:50:31 2016
@@ -71,6 +71,7 @@ namespace {
 
     bool eliminateIdentitySCEV(Instruction *UseInst, Instruction *IVOperand);
 
+    bool eliminateOverflowIntrinsic(CallInst *CI);
     bool eliminateIVUser(Instruction *UseInst, Instruction *IVOperand);
     void eliminateIVComparison(ICmpInst *ICmp, Value *IVOperand);
     void eliminateIVRemainder(BinaryOperator *Rem, Value *IVOperand,
@@ -318,6 +319,108 @@ void SimplifyIndvar::eliminateIVRemainde
   DeadInsts.emplace_back(Rem);
 }
 
+bool SimplifyIndvar::eliminateOverflowIntrinsic(CallInst *CI) {
+  auto *F = CI->getCalledFunction();
+  if (!F)
+    return false;
+
+  typedef const SCEV *(ScalarEvolution::*OperationFunctionTy)(
+      const SCEV *, const SCEV *, SCEV::NoWrapFlags);
+  typedef const SCEV *(ScalarEvolution::*ExtensionFunctionTy)(
+      const SCEV *, Type *);
+
+  OperationFunctionTy Operation;
+  ExtensionFunctionTy Extension;
+
+  Instruction::BinaryOps RawOp;
+
+  // We always have exactly one of nsw or nuw.  If NoSignedOverflow is false, we
+  // have nuw.
+  bool NoSignedOverflow;
+
+  switch (F->getIntrinsicID()) {
+  default:
+    return false;
+
+  case Intrinsic::sadd_with_overflow:
+    Operation = &ScalarEvolution::getAddExpr;
+    Extension = &ScalarEvolution::getSignExtendExpr;
+    RawOp = Instruction::Add;
+    NoSignedOverflow = true;
+    break;
+
+  case Intrinsic::uadd_with_overflow:
+    Operation = &ScalarEvolution::getAddExpr;
+    Extension = &ScalarEvolution::getZeroExtendExpr;
+    RawOp = Instruction::Add;
+    NoSignedOverflow = false;
+    break;
+
+  case Intrinsic::ssub_with_overflow:
+    Operation = &ScalarEvolution::getMinusSCEV;
+    Extension = &ScalarEvolution::getSignExtendExpr;
+    RawOp = Instruction::Sub;
+    NoSignedOverflow = true;
+    break;
+
+  case Intrinsic::usub_with_overflow:
+    Operation = &ScalarEvolution::getMinusSCEV;
+    Extension = &ScalarEvolution::getZeroExtendExpr;
+    RawOp = Instruction::Sub;
+    NoSignedOverflow = false;
+    break;
+  }
+
+  const SCEV *LHS = SE->getSCEV(CI->getArgOperand(0));
+  const SCEV *RHS = SE->getSCEV(CI->getArgOperand(1));
+
+  auto *NarrowTy = cast<IntegerType>(LHS->getType());
+  auto *WideTy =
+    IntegerType::get(NarrowTy->getContext(), NarrowTy->getBitWidth() * 2);
+
+  const SCEV *A =
+      (SE->*Extension)((SE->*Operation)(LHS, RHS, SCEV::FlagAnyWrap), WideTy);
+  const SCEV *B =
+      (SE->*Operation)((SE->*Extension)(LHS, WideTy),
+                       (SE->*Extension)(RHS, WideTy), SCEV::FlagAnyWrap);
+
+  if (A != B)
+    return false;
+
+  // Proved no overflow, nuke the overflow check and, if possible, the overflow
+  // intrinsic as well.
+
+  BinaryOperator *NewResult = BinaryOperator::Create(
+      RawOp, CI->getArgOperand(0), CI->getArgOperand(1), "", CI);
+
+  if (NoSignedOverflow)
+    NewResult->setHasNoSignedWrap(true);
+  else
+    NewResult->setHasNoUnsignedWrap(true);
+
+  SmallVector<ExtractValueInst *, 4> ToDelete;
+
+  for (auto *U : CI->users()) {
+    if (auto *EVI = dyn_cast<ExtractValueInst>(U)) {
+      if (EVI->getIndices()[0] == 1)
+        EVI->replaceAllUsesWith(ConstantInt::getFalse(CI->getContext()));
+      else {
+        assert(EVI->getIndices()[0] == 0 && "Only two possibilities!");
+        EVI->replaceAllUsesWith(NewResult);
+      }
+      ToDelete.push_back(EVI);
+    }
+  }
+
+  for (auto *EVI : ToDelete)
+    EVI->eraseFromParent();
+
+  if (CI->use_empty())
+    CI->eraseFromParent();
+
+  return true;
+}
+
 /// Eliminate an operation that consumes a simple IV and has no observable
 /// side-effect given the range of IV values.  IVOperand is guaranteed SCEVable,
 /// but UseInst may not be.
@@ -335,6 +438,10 @@ bool SimplifyIndvar::eliminateIVUser(Ins
     }
   }
 
+  if (auto *CI = dyn_cast<CallInst>(UseInst))
+    if (eliminateOverflowIntrinsic(CI))
+      return true;
+
   if (eliminateIdentitySCEV(UseInst, IVOperand))
     return true;
 

Added: llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll?rev=265913&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/IndVarSimplify/overflow-intrinsics.ll Sun Apr 10 17:50:31 2016
@@ -0,0 +1,137 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f_sadd(i8* %a) {
+; CHECK-LABEL: @f_sadd(
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+  %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.sadd.with.overflow
+; CHECK:  br i1 false, label %trap, label %cont, !nosanitize !0
+  br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap() #2, !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp slt i32 %2, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_uadd(i8* %a) {
+; CHECK-LABEL: @f_uadd(
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+  %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.uadd.with.overflow
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+  br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp slt i32 %2, 16
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_ssub(i8* nocapture %a) {
+; CHECK-LABEL: @f_ssub(
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+  %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.ssub.with.overflow.i32
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+  br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp sgt i32 %2, -1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_usub(i8* nocapture %a) {
+; CHECK-LABEL: @f_usub(
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %cont
+  ret void
+
+for.body:                                         ; preds = %entry, %cont
+  %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
+  %idxprom = sext i32 %i.04 to i64
+  %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+  store i8 0, i8* %arrayidx, align 1
+  %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1)
+  %1 = extractvalue { i32, i1 } %0, 1
+
+; It is theoretically possible to prove this, but SCEV cannot
+; represent non-unsigned-wrapping subtraction operations.
+
+; CHECK: for.body:
+; CHECK:  [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1
+; CHECK-NEXT:  br i1 [[COND]], label %trap, label %cont, !nosanitize !0
+  br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap:                                             ; preds = %for.body
+  tail call void @llvm.trap(), !nosanitize !{}
+  unreachable, !nosanitize !{}
+
+cont:                                             ; preds = %for.body
+  %2 = extractvalue { i32, i1 } %0, 0
+  %cmp = icmp sgt i32 %2, -1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+declare void @llvm.trap() #2

Modified: llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll?rev=265913&r1=265912&r2=265913&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll (original)
+++ llvm/trunk/test/Transforms/IndVarSimplify/overflowcheck.ll Sun Apr 10 17:50:31 2016
@@ -10,7 +10,7 @@ target triple = "x86_64-apple-macosx"
 ; CHECK-LABEL: loop2:
 ; CHECK-NOT: extractvalue
 ; CHECK: add nuw
-; CHECK: @llvm.sadd.with.overflow
+; CHECK-NOT: @llvm.sadd.with.overflow
 ; CHECK-LABEL: loop3:
 ; CHECK-NOT: extractvalue
 ; CHECK: ret