[llvm-commits] [llvm] r127418 - in /llvm/trunk: lib/Transforms/InstCombine/InstCombineCalls.cpp test/Transforms/InstCombine/intrinsics.ll

Thu Mar 10 10:40:14 PST 2011

Author: d0k
Date: Thu Mar 10 12:40:14 2011
New Revision: 127418

URL: http://llvm.org/viewvc/llvm-project?rev=127418&view=rev
Log:
InstCombine: Turn umul_with_overflow into mul nuw if we can prove that it cannot overflow.

This happens a lot in clang-compiled C++ code because it adds overflow checks to operator new[]:
  unsigned *foo(unsigned n) { return new unsigned[n]; }
We can optimize away the overflow check on 64 bit targets because (uint64_t)n*4 cannot overflow.

Modified:
    llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/trunk/test/Transforms/InstCombine/intrinsics.ll

Modified: llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp?rev=127418&r1=127417&r2=127418&view=diff
==============================================================================

--- llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp (original)
+++ llvm/trunk/lib/Transforms/InstCombine/InstCombineCalls.cpp Thu Mar 10 12:40:14 2011
@@ -475,7 +475,35 @@
       }
     }
     break;
-  case Intrinsic::umul_with_overflow:
+  case Intrinsic::umul_with_overflow: {
+    Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1);
+    unsigned BitWidth = cast<IntegerType>(LHS->getType())->getBitWidth();
+    APInt Mask = APInt::getAllOnesValue(BitWidth);
+
+    APInt LHSKnownZero(BitWidth, 0);
+    APInt LHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(LHS, Mask, LHSKnownZero, LHSKnownOne);
+    APInt RHSKnownZero(BitWidth, 0);
+    APInt RHSKnownOne(BitWidth, 0);
+    ComputeMaskedBits(RHS, Mask, RHSKnownZero, RHSKnownOne);
+
+    // Get the largest possible values for each operand, extended to be large
+    // enough so that every possible product of two BitWidth-sized ints fits.
+    APInt LHSMax = (~LHSKnownZero).zext(BitWidth*2);
+    APInt RHSMax = (~RHSKnownZero).zext(BitWidth*2);
+
+    // If multiplying the maximum values does not overflow then we can turn
+    // this into a plain NUW mul.
+    if ((LHSMax * RHSMax).getActiveBits() <= BitWidth) {
+      Value *Mul = Builder->CreateNUWMul(LHS, RHS, "umul_with_overflow");
+      Constant *V[] = {
+        UndefValue::get(LHS->getType()),
+        Builder->getFalse()
+      };
+      Constant *Struct = ConstantStruct::get(II->getContext(), V, 2, false);
+      return InsertValueInst::Create(Struct, Mul, 0);
+    }
+  } // FALL THROUGH
   case Intrinsic::smul_with_overflow:
     // Canonicalize constants into the RHS.
     if (isa<Constant>(II->getArgOperand(0)) &&

Modified: llvm/trunk/test/Transforms/InstCombine/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/intrinsics.ll?rev=127418&r1=127417&r2=127418&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/intrinsics.ll Thu Mar 10 12:40:14 2011
@@ -112,6 +112,33 @@
 ; CHECK-NEXT: ret i8 %A
 }
 
+%ov.result.32 = type { i32, i1 }
+declare %ov.result.32 @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+define i32 @umultest3(i32 %n) nounwind {
+  %shr = lshr i32 %n, 2
+  %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 3)
+  %ov = extractvalue %ov.result.32 %mul, 1
+  %res = extractvalue %ov.result.32 %mul, 0
+  %ret = select i1 %ov, i32 -1, i32 %res
+  ret i32 %ret
+; CHECK: @umultest3
+; CHECK-NEXT: shr
+; CHECK-NEXT: mul nuw
+; CHECK-NEXT: ret
+}
+
+define i32 @umultest4(i32 %n) nounwind {
+  %shr = lshr i32 %n, 1
+  %mul = call %ov.result.32 @llvm.umul.with.overflow.i32(i32 %shr, i32 4)
+  %ov = extractvalue %ov.result.32 %mul, 1
+  %res = extractvalue %ov.result.32 %mul, 0
+  %ret = select i1 %ov, i32 -1, i32 %res
+  ret i32 %ret
+; CHECK: @umultest4
+; CHECK: umul.with.overflow
+}
+
 define void @powi(double %V, double *%P) {
 entry:
   %A = tail call double @llvm.powi.f64(double %V, i32 -1) nounwind