r320902 - [CodeGen] Specialize mixed-sign mul-with-overflow (fix PR34920)

Fri Dec 15 17:28:25 PST 2017

Author: vedantk
Date: Fri Dec 15 17:28:25 2017
New Revision: 320902

URL: http://llvm.org/viewvc/llvm-project?rev=320902&view=rev
Log:
[CodeGen] Specialize mixed-sign mul-with-overflow (fix PR34920)

This patch introduces a specialized way to lower overflow-checked
multiplications with mixed-sign operands. This fixes link failures and
ICEs on code like this:

  void mul(int64_t a, uint64_t b) {
    int64_t res;
    __builtin_mul_overflow(a, b, &res);
  }

The generic checked-binop irgen would use a 65-bit multiplication
intrinsic here, which requires runtime support for _muloti4 (128-bit
multiplication), and therefore fails to link on i386. To get an ICE
on x86_64, change the example to use __int128_t / __uint128_t.

Adding runtime and backend support for 65-bit or 129-bit checked
multiplication on all of our supported targets is infeasible.

This patch solves the problem by using simpler, specialized irgen for
the mixed-sign case.

llvm.org/PR34920, rdar://34963321

Testing: Apart from check-clang, I compared the output from this fairly
comprehensive test driver using unpatched & patched clangs:
https://gist.github.com/vedantk/3eb9c88f82e5c32f2e590555b4af5081

Differential Revision: https://reviews.llvm.org/D41149

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/builtins-overflow.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=320902&r1=320901&r2=320902&view=diff
==============================================================================

--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Dec 15 17:28:25 2017
@@ -839,6 +839,93 @@ RValue CodeGenFunction::emitBuiltinOSLog
   return RValue::get(BufAddr.getPointer());
 }
 
+/// Determine if a binop is a checked mixed-sign multiply we can specialize.
+static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
+                                       WidthAndSignedness Op1Info,
+                                       WidthAndSignedness Op2Info,
+                                       WidthAndSignedness ResultInfo) {
+  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
+         Op1Info.Width == Op2Info.Width && Op1Info.Width >= ResultInfo.Width &&
+         Op1Info.Signed != Op2Info.Signed;
+}
+
+/// Emit a checked mixed-sign multiply. This is a cheaper specialization of
+/// the generic checked-binop irgen.
+static RValue
+EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1,
+                             WidthAndSignedness Op1Info, const clang::Expr *Op2,
+                             WidthAndSignedness Op2Info,
+                             const clang::Expr *ResultArg, QualType ResultQTy,
+                             WidthAndSignedness ResultInfo) {
+  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
+                                    Op2Info, ResultInfo) &&
+         "Not a mixed-sign multipliction we can specialize");
+
+  // Emit the signed and unsigned operands.
+  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
+  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
+  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
+  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
+
+  llvm::Type *OpTy = Signed->getType();
+  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
+  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
+  llvm::Type *ResTy = ResultPtr.getElementType();
+
+  // Take the absolute value of the signed operand.
+  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
+  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
+  llvm::Value *AbsSigned =
+      CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
+
+  // Perform a checked unsigned multiplication.
+  llvm::Value *UnsignedOverflow;
+  llvm::Value *UnsignedResult =
+      EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
+                            Unsigned, UnsignedOverflow);
+
+  llvm::Value *Overflow, *Result;
+  if (ResultInfo.Signed) {
+    // Signed overflow occurs if the result is greater than INT_MAX or lesser
+    // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
+    auto IntMax = llvm::APInt::getSignedMaxValue(ResultInfo.Width)
+                      .zextOrSelf(Op1Info.Width);
+    llvm::Value *MaxResult =
+        CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
+                              CGF.Builder.CreateZExt(IsNegative, OpTy));
+    llvm::Value *SignedOverflow =
+        CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
+    Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
+
+    // Prepare the signed result (possibly by negating it).
+    llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
+    llvm::Value *SignedResult =
+        CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
+    Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
+  } else {
+    // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
+    llvm::Value *Underflow = CGF.Builder.CreateAnd(
+        IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
+    Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
+    if (ResultInfo.Width < Op1Info.Width) {
+      auto IntMax =
+          llvm::APInt::getMaxValue(ResultInfo.Width).zext(Op1Info.Width);
+      llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
+          UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
+      Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
+    }
+
+    Result = CGF.Builder.CreateTrunc(UnsignedResult, ResTy);
+  }
+  assert(Overflow && Result && "Missing overflow or result");
+
+  bool isVolatile =
+      ResultArg->getType()->getPointeeType().isVolatileQualified();
+  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
+                          isVolatile);
+  return RValue::get(Overflow);
+}
+
 RValue CodeGenFunction::EmitBuiltinExpr(const FunctionDecl *FD,
                                         unsigned BuiltinID, const CallExpr *E,
                                         ReturnValueSlot ReturnValue) {
@@ -2323,6 +2410,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(
         getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
     WidthAndSignedness ResultInfo =
         getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
+
+    // Handle mixed-sign multiplication as a special case, because adding
+    // runtime or backend support for our generic irgen would be too expensive.
+    if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
+      return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
+                                          RightInfo, ResultArg, ResultQTy,
+                                          ResultInfo);
+
     WidthAndSignedness EncompassingInfo =
         EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
 

Modified: cfe/trunk/test/CodeGen/builtins-overflow.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/builtins-overflow.c?rev=320902&r1=320901&r2=320902&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/builtins-overflow.c (original)
+++ cfe/trunk/test/CodeGen/builtins-overflow.c Fri Dec 15 17:28:25 2017
@@ -338,3 +338,122 @@ long long test_smulll_overflow(long long
     return LongLongErrorCode;
   return result;
 }
+
+int test_mixed_sign_mull_overflow(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mull_overflow
+// CHECK:       [[IsNeg:%.*]] = icmp slt i32 [[Op1:%.*]], 0
+// CHECK-NEXT:  [[Signed:%.*]] = sub i32 0, [[Op1]]
+// CHECK-NEXT:  [[AbsSigned:%.*]] = select i1 [[IsNeg]], i32 [[Signed]], i32 [[Op1]]
+// CHECK-NEXT:  call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[AbsSigned]], i32 %{{.*}})
+// CHECK-NEXT:  [[UnsignedOFlow:%.*]] = extractvalue { i32, i1 } %{{.*}}, 1
+// CHECK-NEXT:  [[UnsignedResult:%.*]] = extractvalue { i32, i1 } %{{.*}}, 0
+// CHECK-NEXT:  [[IsNegZext:%.*]] = zext i1 [[IsNeg]] to i32
+// CHECK-NEXT:  [[MaxResult:%.*]] = add i32 2147483647, [[IsNegZext]]
+// CHECK-NEXT:  [[SignedOFlow:%.*]] = icmp ugt i32 [[UnsignedResult]], [[MaxResult]]
+// CHECK-NEXT:  [[OFlow:%.*]] = or i1 [[UnsignedOFlow]], [[SignedOFlow]]
+// CHECK-NEXT:  [[NegativeResult:%.*]] = sub i32 0, [[UnsignedResult]]
+// CHECK-NEXT:  [[Result:%.*]] = select i1 [[IsNeg]], i32 [[NegativeResult]], i32 [[UnsignedResult]]
+// CHECK-NEXT:  store i32 [[Result]], i32* %{{.*}}, align 4
+// CHECK:       br i1 [[OFlow]]
+
+  int result;
+  if (__builtin_mul_overflow(x, y, &result))
+    return LongErrorCode;
+  return result;
+}
+
+int test_mixed_sign_mull_overflow_unsigned(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mull_overflow_unsigned
+// CHECK:       [[IsNeg:%.*]] = icmp slt i32 [[Op1:%.*]], 0
+// CHECK-NEXT:  [[Signed:%.*]] = sub i32 0, [[Op1]]
+// CHECK-NEXT:  [[AbsSigned:%.*]] = select i1 [[IsNeg]], i32 [[Signed]], i32 [[Op1]]
+// CHECK-NEXT:  call { i32, i1 } @llvm.umul.with.overflow.i32(i32 [[AbsSigned]], i32 %{{.*}})
+// CHECK-NEXT:  [[UnsignedOFlow:%.*]] = extractvalue { i32, i1 } %{{.*}}, 1
+// CHECK-NEXT:  [[UnsignedResult:%.*]] = extractvalue { i32, i1 } %{{.*}}, 0
+// CHECK-NEXT:  [[NotNull:%.*]] = icmp ne i32 [[UnsignedResult]], 0
+// CHECK-NEXT:  [[Underflow:%.*]] = and i1 [[IsNeg]], [[NotNull]]
+// CHECK-NEXT:  [[OFlow:%.*]] = or i1 [[UnsignedOFlow]], [[Underflow]]
+// CHECK-NEXT:  store i32 [[UnsignedResult]], i32* %{{.*}}, align 4
+// CHECK:       br i1 [[OFlow]]
+
+  unsigned result;
+  if (__builtin_mul_overflow(x, y, &result))
+    return LongErrorCode;
+  return result;
+}
+
+int test_mixed_sign_mull_overflow_swapped(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mull_overflow_swapped
+// CHECK:  call { i32, i1 } @llvm.umul.with.overflow.i32
+// CHECK:  add i32 2147483647
+  int result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mulll_overflow(long long x, unsigned long long y) {
+// CHECK: @test_mixed_sign_mulll_overflow
+// CHECK:  call { i64, i1 } @llvm.umul.with.overflow.i64
+// CHECK:  add i64 92233720368547
+  long long result;
+  if (__builtin_mul_overflow(x, y, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mulll_overflow_swapped(long long x, unsigned long long y) {
+// CHECK: @test_mixed_sign_mulll_overflow_swapped
+// CHECK:  call { i64, i1 } @llvm.umul.with.overflow.i64
+// CHECK:  add i64 92233720368547
+  long long result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mulll_overflow_trunc_signed(long long x, unsigned long long y) {
+// CHECK: @test_mixed_sign_mulll_overflow_trunc_signed
+// CHECK:  call { i64, i1 } @llvm.umul.with.overflow.i64
+// CHECK:  add i64 2147483647
+// CHECK:  trunc
+// CHECK:  store
+  int result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mulll_overflow_trunc_unsigned(long long x, unsigned long long y) {
+// CHECK: @test_mixed_sign_mulll_overflow_trunc_unsigned
+// CHECK:       call { i64, i1 } @llvm.umul.with.overflow.i64
+// CHECK:       [[NON_ZERO:%.*]] = icmp ne i64 [[UNSIGNED_RESULT:%.*]], 0
+// CHECK-NEXT:  [[UNDERFLOW:%.*]] = and i1 {{.*}}, [[NON_ZERO]]
+// CHECK-NEXT:  [[OVERFLOW_PRE_TRUNC:%.*]] = or i1 {{.*}}, [[UNDERFLOW]]
+// CHECK-NEXT:  [[TRUNC_OVERFLOW:%.*]] = icmp ugt i64 [[UNSIGNED_RESULT]], 4294967295
+// CHECK-NEXT:  [[OVERFLOW:%.*]] = or i1 [[OVERFLOW_PRE_TRUNC]], [[TRUNC_OVERFLOW]]
+// CHECK-NEXT:  trunc i64 [[UNSIGNED_RESULT]] to i32
+// CHECK-NEXT:  store
+  unsigned result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mul_overflow_extend_signed(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mul_overflow_extend_signed
+// CHECK:  call { i64, i1 } @llvm.smul.with.overflow.i64
+  long long result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}
+
+long long test_mixed_sign_mul_overflow_extend_unsigned(int x, unsigned y) {
+// CHECK: @test_mixed_sign_mul_overflow_extend_unsigned
+// CHECK:  call { i65, i1 } @llvm.smul.with.overflow.i65
+  unsigned long long result;
+  if (__builtin_mul_overflow(y, x, &result))
+    return LongLongErrorCode;
+  return result;
+}