[llvm-commits] [llvm] r133318 - in /llvm/trunk: lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp test/CodeGen/X86/muloti.ll

Fri Jun 17 17:09:58 PDT 2011

Author: echristo
Date: Fri Jun 17 19:09:57 2011
New Revision: 133318

URL: http://llvm.org/viewvc/llvm-project?rev=133318&view=rev
Log:
Fix UMULO support for 2x register width to allow the full
range without a libcall to a new mulo<mode> libcall
that we'd have to create.

Finishes the rest of rdar://9090077 and rdar://9210061

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/trunk/test/CodeGen/X86/muloti.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp?rev=133318&r1=133317&r2=133318&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp Fri Jun 17 19:09:57 2011
@@ -2160,6 +2160,27 @@
   const Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext());
   DebugLoc dl = N->getDebugLoc();
 
+  // A divide for UMULO should be faster than a function call.
+  if (N->getOpcode() == ISD::UMULO) {
+    SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+    DebugLoc DL = N->getDebugLoc();
+
+    SDValue MUL = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+    SplitInteger(MUL, Lo, Hi);
+
+    // A divide for UMULO will be faster than a function call. Select to
+    // make sure we aren't using 0.
+    SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT),
+				  RHS, DAG.getConstant(0, VT), ISD::SETNE);
+    SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero,
+				  DAG.getConstant(1, VT), RHS);
+    SDValue DIV = DAG.getNode(ISD::UDIV, DL, LHS.getValueType(), MUL, NotZero);
+    SDValue Overflow;
+    Overflow = DAG.getSetCC(DL, N->getValueType(1), DIV, LHS, ISD::SETNE);
+    ReplaceValueWith(SDValue(N, 1), Overflow);
+    return;
+  }
+
   // Replace this with a libcall that will check overflow.
   RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
   if (VT == MVT::i32)

Modified: llvm/trunk/test/CodeGen/X86/muloti.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/muloti.ll?rev=133318&r1=133317&r2=133318&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/muloti.ll (original)
+++ llvm/trunk/test/CodeGen/X86/muloti.ll Fri Jun 17 19:09:57 2011
@@ -2,9 +2,8 @@
 %0 = type { i64, i64 }
 %1 = type { i128, i1 }
 
- at .str = private unnamed_addr constant [11 x i8] c"%llx %llx\0A\00", align 1
-
 define %0 @x(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+; CHECK: x
 entry:
   %tmp16 = zext i64 %a.coerce0 to i128
   %tmp11 = zext i64 %a.coerce1 to i128
@@ -33,6 +32,50 @@
   ret %0 %tmp24
 }
 
+define %0 @foo(i64 %a.coerce0, i64 %a.coerce1, i64 %b.coerce0, i64 %b.coerce1) nounwind uwtable ssp {
+entry:
+; CHECK: foo
+  %retval = alloca i128, align 16
+  %coerce = alloca i128, align 16
+  %a.addr = alloca i128, align 16
+  %coerce1 = alloca i128, align 16
+  %b.addr = alloca i128, align 16
+  %0 = bitcast i128* %coerce to %0*
+  %1 = getelementptr %0* %0, i32 0, i32 0
+  store i64 %a.coerce0, i64* %1
+  %2 = getelementptr %0* %0, i32 0, i32 1
+  store i64 %a.coerce1, i64* %2
+  %a = load i128* %coerce, align 16
+  store i128 %a, i128* %a.addr, align 16
+  %3 = bitcast i128* %coerce1 to %0*
+  %4 = getelementptr %0* %3, i32 0, i32 0
+  store i64 %b.coerce0, i64* %4
+  %5 = getelementptr %0* %3, i32 0, i32 1
+  store i64 %b.coerce1, i64* %5
+  %b = load i128* %coerce1, align 16
+  store i128 %b, i128* %b.addr, align 16
+  %tmp = load i128* %a.addr, align 16
+  %tmp2 = load i128* %b.addr, align 16
+  %6 = call %1 @llvm.umul.with.overflow.i128(i128 %tmp, i128 %tmp2)
+; CHECK: cmov
+; CHECK: divti3
+  %7 = extractvalue %1 %6, 0
+  %8 = extractvalue %1 %6, 1
+  br i1 %8, label %overflow, label %nooverflow
+
+overflow:                                         ; preds = %entry
+  call void @llvm.trap()
+  unreachable
+
+nooverflow:                                       ; preds = %entry
+  store i128 %7, i128* %retval
+  %9 = bitcast i128* %retval to %0*
+  %10 = load %0* %9, align 1
+  ret %0 %10
+}
+
+declare %1 @llvm.umul.with.overflow.i128(i128, i128) nounwind readnone
+
 declare %1 @llvm.smul.with.overflow.i128(i128, i128) nounwind readnone
 
 declare void @llvm.trap() nounwind