[llvm] 09854f2 - [SelectionDAG] Emit calls to __divei4 and friends for division/remainder of large integers

Matthias Gehre via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 16 02:36:40 PDT 2022


Author: Matthias Gehre
Date: 2022-03-16T09:36:28Z
New Revision: 09854f2af3b914b616f29cb640bede3a27cf7c4e

URL: https://github.com/llvm/llvm-project/commit/09854f2af3b914b616f29cb640bede3a27cf7c4e
DIFF: https://github.com/llvm/llvm-project/commit/09854f2af3b914b616f29cb640bede3a27cf7c4e.diff

LOG: [SelectionDAG] Emit calls to __divei4 and friends for division/remainder of large integers

Emit calls to __divei4 and friends for divison/remainder of large integers.

This fixes https://github.com/llvm/llvm-project/issues/44994.

The overall RFC is in https://discourse.llvm.org/t/rfc-add-support-for-division-of-large-bitint-builtins-selectiondag-globalisel-clang/60329

The compiler-rt part is in https://reviews.llvm.org/D120327

Differential Revision: https://reviews.llvm.org/D120329

Added: 
    llvm/test/CodeGen/AArch64/udivmodei5.ll
    llvm/test/CodeGen/X86/udivmodei5.ll

Modified: 
    llvm/include/llvm/IR/RuntimeLibcalls.def
    llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/RuntimeLibcalls.def b/llvm/include/llvm/IR/RuntimeLibcalls.def
index 62d67308114f8..ccd4f2be38a64 100644
--- a/llvm/include/llvm/IR/RuntimeLibcalls.def
+++ b/llvm/include/llvm/IR/RuntimeLibcalls.def
@@ -47,6 +47,8 @@ HANDLE_LIBCALL(MUL_I16, "__mulhi3")
 HANDLE_LIBCALL(MUL_I32, "__mulsi3")
 HANDLE_LIBCALL(MUL_I64, "__muldi3")
 HANDLE_LIBCALL(MUL_I128, "__multi3")
+HANDLE_LIBCALL(MUL_IEXT, nullptr)
+
 HANDLE_LIBCALL(MULO_I32, "__mulosi4")
 HANDLE_LIBCALL(MULO_I64, "__mulodi4")
 HANDLE_LIBCALL(MULO_I128, "__muloti4")
@@ -55,31 +57,43 @@ HANDLE_LIBCALL(SDIV_I16, "__divhi3")
 HANDLE_LIBCALL(SDIV_I32, "__divsi3")
 HANDLE_LIBCALL(SDIV_I64, "__divdi3")
 HANDLE_LIBCALL(SDIV_I128, "__divti3")
+HANDLE_LIBCALL(SDIV_IEXT, "__divei4")
+
 HANDLE_LIBCALL(UDIV_I8, "__udivqi3")
 HANDLE_LIBCALL(UDIV_I16, "__udivhi3")
 HANDLE_LIBCALL(UDIV_I32, "__udivsi3")
 HANDLE_LIBCALL(UDIV_I64, "__udivdi3")
 HANDLE_LIBCALL(UDIV_I128, "__udivti3")
+HANDLE_LIBCALL(UDIV_IEXT, "__udivei4")
+
 HANDLE_LIBCALL(SREM_I8, "__modqi3")
 HANDLE_LIBCALL(SREM_I16, "__modhi3")
 HANDLE_LIBCALL(SREM_I32, "__modsi3")
 HANDLE_LIBCALL(SREM_I64, "__moddi3")
 HANDLE_LIBCALL(SREM_I128, "__modti3")
+HANDLE_LIBCALL(SREM_IEXT, "__modei4")
+
 HANDLE_LIBCALL(UREM_I8, "__umodqi3")
 HANDLE_LIBCALL(UREM_I16, "__umodhi3")
 HANDLE_LIBCALL(UREM_I32, "__umodsi3")
 HANDLE_LIBCALL(UREM_I64, "__umoddi3")
 HANDLE_LIBCALL(UREM_I128, "__umodti3")
+HANDLE_LIBCALL(UREM_IEXT, "__umodei4")
+
 HANDLE_LIBCALL(SDIVREM_I8, nullptr)
 HANDLE_LIBCALL(SDIVREM_I16, nullptr)
 HANDLE_LIBCALL(SDIVREM_I32, nullptr)
 HANDLE_LIBCALL(SDIVREM_I64, nullptr)
 HANDLE_LIBCALL(SDIVREM_I128, nullptr)
+HANDLE_LIBCALL(SDIVREM_IEXT, nullptr)
+
 HANDLE_LIBCALL(UDIVREM_I8, nullptr)
 HANDLE_LIBCALL(UDIVREM_I16, nullptr)
 HANDLE_LIBCALL(UDIVREM_I32, nullptr)
 HANDLE_LIBCALL(UDIVREM_I64, nullptr)
 HANDLE_LIBCALL(UDIVREM_I128, nullptr)
+HANDLE_LIBCALL(UDIVREM_IEXT, nullptr)
+
 HANDLE_LIBCALL(NEG_I32, "__negsi2")
 HANDLE_LIBCALL(NEG_I64, "__negdi2")
 HANDLE_LIBCALL(CTLZ_I32, "__clzsi2")

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 22c62d78ad901..cb6fc4e483a66 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -141,12 +141,10 @@ class SelectionDAGLegalize {
                        RTLIB::Libcall Call_F128,
                        RTLIB::Libcall Call_PPCF128,
                        SmallVectorImpl<SDValue> &Results);
-  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
-                           RTLIB::Libcall Call_I8,
-                           RTLIB::Libcall Call_I16,
-                           RTLIB::Libcall Call_I32,
-                           RTLIB::Libcall Call_I64,
-                           RTLIB::Libcall Call_I128);
+  SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
+                           RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32,
+                           RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128,
+                           RTLIB::Libcall Call_IEXT);
   void ExpandArgFPLibCall(SDNode *Node,
                           RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
                           RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
@@ -2105,15 +2103,17 @@ void SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
   ExpandFPLibCall(Node, LC, Results);
 }
 
-SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
-                                               RTLIB::Libcall Call_I8,
-                                               RTLIB::Libcall Call_I16,
-                                               RTLIB::Libcall Call_I32,
-                                               RTLIB::Libcall Call_I64,
-                                               RTLIB::Libcall Call_I128) {
+SDValue SelectionDAGLegalize::ExpandIntLibCall(
+    SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8,
+    RTLIB::Libcall Call_I16, RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
+    RTLIB::Libcall Call_I128, RTLIB::Libcall Call_IEXT) {
   RTLIB::Libcall LC;
   switch (Node->getSimpleValueType(0).SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
+
+  default:
+    LC = Call_IEXT;
+    break;
+
   case MVT::i8:   LC = Call_I8; break;
   case MVT::i16:  LC = Call_I16; break;
   case MVT::i32:  LC = Call_I32; break;
@@ -2148,7 +2148,11 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
 
   RTLIB::Libcall LC;
   switch (Node->getSimpleValueType(0).SimpleTy) {
-  default: llvm_unreachable("Unexpected request for libcall!");
+
+  default:
+    LC = isSigned ? RTLIB::SDIVREM_IEXT : RTLIB::UDIVREM_IEXT;
+    break;
+
   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
@@ -4319,28 +4323,24 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
                     RTLIB::SUB_PPCF128, Results);
     break;
   case ISD::SREM:
-    Results.push_back(ExpandIntLibCall(Node, true,
-                                       RTLIB::SREM_I8,
-                                       RTLIB::SREM_I16, RTLIB::SREM_I32,
-                                       RTLIB::SREM_I64, RTLIB::SREM_I128));
+    Results.push_back(ExpandIntLibCall(
+        Node, true, RTLIB::SREM_I8, RTLIB::SREM_I16, RTLIB::SREM_I32,
+        RTLIB::SREM_I64, RTLIB::SREM_I128, RTLIB::SREM_IEXT));
     break;
   case ISD::UREM:
-    Results.push_back(ExpandIntLibCall(Node, false,
-                                       RTLIB::UREM_I8,
-                                       RTLIB::UREM_I16, RTLIB::UREM_I32,
-                                       RTLIB::UREM_I64, RTLIB::UREM_I128));
+    Results.push_back(ExpandIntLibCall(
+        Node, false, RTLIB::UREM_I8, RTLIB::UREM_I16, RTLIB::UREM_I32,
+        RTLIB::UREM_I64, RTLIB::UREM_I128, RTLIB::UREM_IEXT));
     break;
   case ISD::SDIV:
-    Results.push_back(ExpandIntLibCall(Node, true,
-                                       RTLIB::SDIV_I8,
-                                       RTLIB::SDIV_I16, RTLIB::SDIV_I32,
-                                       RTLIB::SDIV_I64, RTLIB::SDIV_I128));
+    Results.push_back(ExpandIntLibCall(
+        Node, true, RTLIB::SDIV_I8, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+        RTLIB::SDIV_I64, RTLIB::SDIV_I128, RTLIB::SDIV_IEXT));
     break;
   case ISD::UDIV:
-    Results.push_back(ExpandIntLibCall(Node, false,
-                                       RTLIB::UDIV_I8,
-                                       RTLIB::UDIV_I16, RTLIB::UDIV_I32,
-                                       RTLIB::UDIV_I64, RTLIB::UDIV_I128));
+    Results.push_back(ExpandIntLibCall(
+        Node, false, RTLIB::UDIV_I8, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+        RTLIB::UDIV_I64, RTLIB::UDIV_I128, RTLIB::UDIV_IEXT));
     break;
   case ISD::SDIVREM:
   case ISD::UDIVREM:
@@ -4348,10 +4348,9 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
     ExpandDivRemLibCall(Node, Results);
     break;
   case ISD::MUL:
-    Results.push_back(ExpandIntLibCall(Node, false,
-                                       RTLIB::MUL_I8,
-                                       RTLIB::MUL_I16, RTLIB::MUL_I32,
-                                       RTLIB::MUL_I64, RTLIB::MUL_I128));
+    Results.push_back(ExpandIntLibCall(
+        Node, false, RTLIB::MUL_I8, RTLIB::MUL_I16, RTLIB::MUL_I32,
+        RTLIB::MUL_I64, RTLIB::MUL_I128, RTLIB::MUL_IEXT));
     break;
   case ISD::CTLZ_ZERO_UNDEF:
     switch (Node->getSimpleValueType(0).SimpleTy) {

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index c1e42ef42ccfd..da61a96a68cfa 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -3914,6 +3914,70 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
   ReplaceValueWith(SDValue(Node, 1), Ovf);
 }
 
+// Emit a call to __udivei4 and friends which require
+// the arguments be based on the stack
+// and extra argument that contains the number of bits of the operands.
+// Returns the result of the call operation.
+static SDValue ExpandExtIntRes_DIVREM(const TargetLowering &TLI,
+                                      const RTLIB::Libcall &LC,
+                                      SelectionDAG &DAG, SDNode *N,
+                                      const SDLoc &DL, const EVT &VT) {
+
+  SDValue InChain = DAG.getEntryNode();
+
+  TargetLowering::ArgListTy Args;
+  TargetLowering::ArgListEntry Entry;
+
+  // The signature of __udivei4 is
+  // void __udivei4(unsigned int *quo, unsigned int *a, unsigned int *b,
+  // unsigned int bits)
+  EVT ArgVT = N->op_begin()->getValueType();
+  assert(ArgVT.isInteger() && ArgVT.getSizeInBits() > 128 &&
+         "Unexpected argument type for lowering");
+  Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+
+  SDValue Output = DAG.CreateStackTemporary(ArgVT);
+  Entry.Node = Output;
+  Entry.Ty = ArgTy->getPointerTo();
+  Entry.IsSExt = false;
+  Entry.IsZExt = false;
+  Args.push_back(Entry);
+
+  for (const llvm::SDUse &Op : N->ops()) {
+    SDValue StackPtr = DAG.CreateStackTemporary(ArgVT);
+    InChain = DAG.getStore(InChain, DL, Op, StackPtr, MachinePointerInfo());
+    Entry.Node = StackPtr;
+    Entry.Ty = ArgTy->getPointerTo();
+    Entry.IsSExt = false;
+    Entry.IsZExt = false;
+    Args.push_back(Entry);
+  }
+
+  int Bits = N->getOperand(0)
+                 .getValueType()
+                 .getTypeForEVT(*DAG.getContext())
+                 ->getIntegerBitWidth();
+  Entry.Node = DAG.getConstant(Bits, DL, TLI.getPointerTy(DAG.getDataLayout()));
+  Entry.Ty = Type::getInt32Ty(*DAG.getContext());
+  Entry.IsSExt = false;
+  Entry.IsZExt = true;
+  Args.push_back(Entry);
+
+  SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+                                         TLI.getPointerTy(DAG.getDataLayout()));
+
+  TargetLowering::CallLoweringInfo CLI(DAG);
+  CLI.setDebugLoc(DL)
+      .setChain(InChain)
+      .setLibCallee(TLI.getLibcallCallingConv(LC),
+                    Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args))
+      .setDiscardResult();
+
+  SDValue Chain = TLI.LowerCallTo(CLI).second;
+
+  return DAG.getLoad(ArgVT, DL, Chain, Output, MachinePointerInfo());
+}
+
 void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
                                          SDValue &Lo, SDValue &Hi) {
   EVT VT = N->getValueType(0);
@@ -3935,6 +3999,14 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
     LC = RTLIB::SDIV_I64;
   else if (VT == MVT::i128)
     LC = RTLIB::SDIV_I128;
+
+  else {
+    SDValue Result =
+        ExpandExtIntRes_DIVREM(TLI, RTLIB::SDIV_IEXT, DAG, N, dl, VT);
+    SplitInteger(Result, Lo, Hi);
+    return;
+  }
+
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
 
   TargetLowering::MakeLibCallOptions CallOptions;
@@ -4126,6 +4198,14 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
     LC = RTLIB::SREM_I64;
   else if (VT == MVT::i128)
     LC = RTLIB::SREM_I128;
+
+  else {
+    SDValue Result =
+        ExpandExtIntRes_DIVREM(TLI, RTLIB::SREM_IEXT, DAG, N, dl, VT);
+    SplitInteger(Result, Lo, Hi);
+    return;
+  }
+
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
 
   TargetLowering::MakeLibCallOptions CallOptions;
@@ -4301,6 +4381,14 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
     LC = RTLIB::UDIV_I64;
   else if (VT == MVT::i128)
     LC = RTLIB::UDIV_I128;
+
+  else {
+    SDValue Result =
+        ExpandExtIntRes_DIVREM(TLI, RTLIB::UDIV_IEXT, DAG, N, dl, VT);
+    SplitInteger(Result, Lo, Hi);
+    return;
+  }
+
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
 
   TargetLowering::MakeLibCallOptions CallOptions;
@@ -4328,6 +4416,14 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
     LC = RTLIB::UREM_I64;
   else if (VT == MVT::i128)
     LC = RTLIB::UREM_I128;
+
+  else {
+    SDValue Result =
+        ExpandExtIntRes_DIVREM(TLI, RTLIB::UREM_IEXT, DAG, N, dl, VT);
+    SplitInteger(Result, Lo, Hi);
+    return;
+  }
+
   assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
 
   TargetLowering::MakeLibCallOptions CallOptions;

diff  --git a/llvm/test/CodeGen/AArch64/udivmodei5.ll b/llvm/test/CodeGen/AArch64/udivmodei5.ll
new file mode 100644
index 0000000000000..bb068bbef8301
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/udivmodei5.ll
@@ -0,0 +1,276 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-linux-gnuabi < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64_be-linux-gnuabi < %s | FileCheck %s --check-prefix=CHECK-BE
+
+define void @udiv129(i129* %ptr, i129* %out) nounwind {
+; CHECK-LABEL: udiv129:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #112
+; CHECK-NEXT:    ldp x10, x11, [x0]
+; CHECK-NEXT:    stp x30, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT:    mov x19, x1
+; CHECK-NEXT:    mov w8, #3
+; CHECK-NEXT:    ldrb w9, [x0, #16]
+; CHECK-NEXT:    add x0, sp, #64
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    mov x2, sp
+; CHECK-NEXT:    mov w3, #256
+; CHECK-NEXT:    stp x9, xzr, [sp, #48]
+; CHECK-NEXT:    stp xzr, xzr, [sp, #8]
+; CHECK-NEXT:    stp xzr, x10, [sp, #24]
+; CHECK-NEXT:    str x11, [sp, #40]
+; CHECK-NEXT:    str x8, [sp]
+; CHECK-NEXT:    bl __udivei4
+; CHECK-NEXT:    ldr w8, [sp, #80]
+; CHECK-NEXT:    ldp x9, x10, [sp, #64]
+; CHECK-NEXT:    and w8, w8, #0x1
+; CHECK-NEXT:    stp x9, x10, [x19]
+; CHECK-NEXT:    strb w8, [x19, #16]
+; CHECK-NEXT:    ldp x30, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #112
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: udiv129:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub sp, sp, #112
+; CHECK-BE-NEXT:    ldp x11, x10, [x0]
+; CHECK-BE-NEXT:    mov w8, #3
+; CHECK-BE-NEXT:    stp x30, x19, [sp, #96] // 16-byte Folded Spill
+; CHECK-BE-NEXT:    ldrb w9, [x0, #16]
+; CHECK-BE-NEXT:    mov x19, x1
+; CHECK-BE-NEXT:    add x0, sp, #64
+; CHECK-BE-NEXT:    add x1, sp, #32
+; CHECK-BE-NEXT:    stp x8, xzr, [sp, #24]
+; CHECK-BE-NEXT:    mov x2, sp
+; CHECK-BE-NEXT:    extr x8, x11, x10, #56
+; CHECK-BE-NEXT:    lsr x11, x11, #56
+; CHECK-BE-NEXT:    bfi x9, x10, #8, #56
+; CHECK-BE-NEXT:    mov w3, #256
+; CHECK-BE-NEXT:    stp xzr, xzr, [sp, #8]
+; CHECK-BE-NEXT:    str xzr, [sp]
+; CHECK-BE-NEXT:    stp x11, x8, [sp, #40]
+; CHECK-BE-NEXT:    str x9, [sp, #56]
+; CHECK-BE-NEXT:    bl __udivei4
+; CHECK-BE-NEXT:    ldp x9, x8, [sp, #72]
+; CHECK-BE-NEXT:    ldr x10, [sp, #88]
+; CHECK-BE-NEXT:    extr x9, x9, x8, #8
+; CHECK-BE-NEXT:    extr x8, x8, x10, #8
+; CHECK-BE-NEXT:    strb w10, [x19, #16]
+; CHECK-BE-NEXT:    and x9, x9, #0x1ffffffffffffff
+; CHECK-BE-NEXT:    stp x9, x8, [x19]
+; CHECK-BE-NEXT:    ldp x30, x19, [sp, #96] // 16-byte Folded Reload
+; CHECK-BE-NEXT:    add sp, sp, #112
+; CHECK-BE-NEXT:    ret
+  %a = load i129, i129* %ptr
+  %res = udiv i129 %a, 3
+  store i129 %res, i129* %out
+  ret void
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: urem129:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #112
+; CHECK-NEXT:    stp x0, x1, [sp, #32]
+; CHECK-NEXT:    and x8, x2, #0x1
+; CHECK-NEXT:    and x9, x6, #0x1
+; CHECK-NEXT:    add x0, sp, #64
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    mov x2, sp
+; CHECK-NEXT:    mov w3, #256
+; CHECK-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x4, x5, [sp]
+; CHECK-NEXT:    stp x8, xzr, [sp, #48]
+; CHECK-NEXT:    stp x9, xzr, [sp, #16]
+; CHECK-NEXT:    bl __umodei4
+; CHECK-NEXT:    ldp x1, x8, [sp, #72]
+; CHECK-NEXT:    ldr x0, [sp, #64]
+; CHECK-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT:    and x2, x8, #0x1
+; CHECK-NEXT:    add sp, sp, #112
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: urem129:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub sp, sp, #112
+; CHECK-BE-NEXT:    stp x1, x2, [sp, #48]
+; CHECK-BE-NEXT:    and x8, x0, #0x1
+; CHECK-BE-NEXT:    and x9, x4, #0x1
+; CHECK-BE-NEXT:    add x0, sp, #64
+; CHECK-BE-NEXT:    add x1, sp, #32
+; CHECK-BE-NEXT:    mov x2, sp
+; CHECK-BE-NEXT:    mov w3, #256
+; CHECK-BE-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    stp x6, xzr, [sp, #24]
+; CHECK-BE-NEXT:    stp x9, x5, [sp, #8]
+; CHECK-BE-NEXT:    str xzr, [sp]
+; CHECK-BE-NEXT:    str x8, [sp, #40]
+; CHECK-BE-NEXT:    bl __umodei4
+; CHECK-BE-NEXT:    ldp x8, x1, [sp, #72]
+; CHECK-BE-NEXT:    ldp x2, x30, [sp, #88] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    and x0, x8, #0x1
+; CHECK-BE-NEXT:    add sp, sp, #112
+; CHECK-BE-NEXT:    ret
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: sdiv129:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #112
+; CHECK-NEXT:    sbfx x8, x2, #0, #1
+; CHECK-NEXT:    stp x0, x1, [sp, #32]
+; CHECK-NEXT:    sbfx x9, x6, #0, #1
+; CHECK-NEXT:    add x0, sp, #64
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    mov x2, sp
+; CHECK-NEXT:    mov w3, #256
+; CHECK-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x4, x5, [sp]
+; CHECK-NEXT:    stp x8, x8, [sp, #48]
+; CHECK-NEXT:    stp x9, x9, [sp, #16]
+; CHECK-NEXT:    bl __divei4
+; CHECK-NEXT:    ldp x1, x8, [sp, #72]
+; CHECK-NEXT:    ldr x0, [sp, #64]
+; CHECK-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT:    and x2, x8, #0x1
+; CHECK-NEXT:    add sp, sp, #112
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: sdiv129:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub sp, sp, #112
+; CHECK-BE-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-BE-NEXT:    stp x1, x2, [sp, #48]
+; CHECK-BE-NEXT:    sbfx x9, x4, #0, #1
+; CHECK-BE-NEXT:    add x0, sp, #64
+; CHECK-BE-NEXT:    add x1, sp, #32
+; CHECK-BE-NEXT:    mov x2, sp
+; CHECK-BE-NEXT:    mov w3, #256
+; CHECK-BE-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    stp x5, x6, [sp, #16]
+; CHECK-BE-NEXT:    stp x8, x8, [sp, #32]
+; CHECK-BE-NEXT:    stp x9, x9, [sp]
+; CHECK-BE-NEXT:    bl __divei4
+; CHECK-BE-NEXT:    ldp x8, x1, [sp, #72]
+; CHECK-BE-NEXT:    ldp x2, x30, [sp, #88] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    and x0, x8, #0x1
+; CHECK-BE-NEXT:    add sp, sp, #112
+; CHECK-BE-NEXT:    ret
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; CHECK-LABEL: srem129:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #112
+; CHECK-NEXT:    sbfx x8, x2, #0, #1
+; CHECK-NEXT:    stp x0, x1, [sp, #32]
+; CHECK-NEXT:    sbfx x9, x6, #0, #1
+; CHECK-NEXT:    add x0, sp, #64
+; CHECK-NEXT:    add x1, sp, #32
+; CHECK-NEXT:    mov x2, sp
+; CHECK-NEXT:    mov w3, #256
+; CHECK-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x4, x5, [sp]
+; CHECK-NEXT:    stp x8, x8, [sp, #48]
+; CHECK-NEXT:    stp x9, x9, [sp, #16]
+; CHECK-NEXT:    bl __modei4
+; CHECK-NEXT:    ldp x1, x8, [sp, #72]
+; CHECK-NEXT:    ldr x0, [sp, #64]
+; CHECK-NEXT:    ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK-NEXT:    and x2, x8, #0x1
+; CHECK-NEXT:    add sp, sp, #112
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: srem129:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub sp, sp, #112
+; CHECK-BE-NEXT:    sbfx x8, x0, #0, #1
+; CHECK-BE-NEXT:    stp x1, x2, [sp, #48]
+; CHECK-BE-NEXT:    sbfx x9, x4, #0, #1
+; CHECK-BE-NEXT:    add x0, sp, #64
+; CHECK-BE-NEXT:    add x1, sp, #32
+; CHECK-BE-NEXT:    mov x2, sp
+; CHECK-BE-NEXT:    mov w3, #256
+; CHECK-BE-NEXT:    str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    stp x5, x6, [sp, #16]
+; CHECK-BE-NEXT:    stp x8, x8, [sp, #32]
+; CHECK-BE-NEXT:    stp x9, x9, [sp]
+; CHECK-BE-NEXT:    bl __modei4
+; CHECK-BE-NEXT:    ldp x8, x1, [sp, #72]
+; CHECK-BE-NEXT:    ldp x2, x30, [sp, #88] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    and x0, x8, #0x1
+; CHECK-BE-NEXT:    add sp, sp, #112
+; CHECK-BE-NEXT:    ret
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; CHECK-LABEL: sdiv257:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub sp, sp, #208
+; CHECK-NEXT:    ldp x8, x9, [sp, #208]
+; CHECK-NEXT:    stp x2, x3, [sp, #80]
+; CHECK-NEXT:    mov x2, sp
+; CHECK-NEXT:    stp x0, x1, [sp, #64]
+; CHECK-NEXT:    add x0, sp, #128
+; CHECK-NEXT:    add x1, sp, #64
+; CHECK-NEXT:    mov w3, #512
+; CHECK-NEXT:    str x30, [sp, #192] // 8-byte Folded Spill
+; CHECK-NEXT:    stp x8, x9, [sp, #16]
+; CHECK-NEXT:    ldr x9, [sp, #224]
+; CHECK-NEXT:    sbfx x8, x4, #0, #1
+; CHECK-NEXT:    stp x6, x7, [sp]
+; CHECK-NEXT:    sbfx x9, x9, #0, #1
+; CHECK-NEXT:    stp x8, x8, [sp, #112]
+; CHECK-NEXT:    stp x8, x8, [sp, #96]
+; CHECK-NEXT:    stp x9, x9, [sp, #48]
+; CHECK-NEXT:    stp x9, x9, [sp, #32]
+; CHECK-NEXT:    bl __divei4
+; CHECK-NEXT:    ldp x3, x8, [sp, #152]
+; CHECK-NEXT:    ldp x0, x1, [sp, #128]
+; CHECK-NEXT:    ldr x2, [sp, #144]
+; CHECK-NEXT:    ldr x30, [sp, #192] // 8-byte Folded Reload
+; CHECK-NEXT:    and x4, x8, #0x1
+; CHECK-NEXT:    add sp, sp, #208
+; CHECK-NEXT:    ret
+;
+; CHECK-BE-LABEL: sdiv257:
+; CHECK-BE:       // %bb.0:
+; CHECK-BE-NEXT:    sub sp, sp, #208
+; CHECK-BE-NEXT:    add x8, sp, #208
+; CHECK-BE-NEXT:    str x30, [sp, #192] // 8-byte Folded Spill
+; CHECK-BE-NEXT:    sbfx x9, x0, #0, #1
+; CHECK-BE-NEXT:    add x0, sp, #128
+; CHECK-BE-NEXT:    ld1 { v0.2d }, [x8]
+; CHECK-BE-NEXT:    mov x8, sp
+; CHECK-BE-NEXT:    add x8, x8, #40
+; CHECK-BE-NEXT:    st1 { v0.2d }, [x8]
+; CHECK-BE-NEXT:    ldr x8, [sp, #224]
+; CHECK-BE-NEXT:    stp x3, x4, [sp, #112]
+; CHECK-BE-NEXT:    mov w3, #512
+; CHECK-BE-NEXT:    stp x1, x2, [sp, #96]
+; CHECK-BE-NEXT:    add x1, sp, #64
+; CHECK-BE-NEXT:    stp x8, x9, [sp, #56]
+; CHECK-BE-NEXT:    sbfx x8, x6, #0, #1
+; CHECK-BE-NEXT:    mov x2, sp
+; CHECK-BE-NEXT:    stp x9, x9, [sp, #80]
+; CHECK-BE-NEXT:    str x9, [sp, #72]
+; CHECK-BE-NEXT:    stp x8, x8, [sp, #8]
+; CHECK-BE-NEXT:    stp x8, x7, [sp, #24]
+; CHECK-BE-NEXT:    str x8, [sp]
+; CHECK-BE-NEXT:    bl __divei4
+; CHECK-BE-NEXT:    ldp x8, x1, [sp, #152]
+; CHECK-BE-NEXT:    ldp x2, x3, [sp, #168]
+; CHECK-BE-NEXT:    ldp x4, x30, [sp, #184] // 8-byte Folded Reload
+; CHECK-BE-NEXT:    and x0, x8, #0x1
+; CHECK-BE-NEXT:    add sp, sp, #208
+; CHECK-BE-NEXT:    ret
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}

diff  --git a/llvm/test/CodeGen/X86/udivmodei5.ll b/llvm/test/CodeGen/X86/udivmodei5.ll
new file mode 100644
index 0000000000000..538b2ce64a97d
--- /dev/null
+++ b/llvm/test/CodeGen/X86/udivmodei5.ll
@@ -0,0 +1,1110 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+
+define i129 @udiv129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: udiv129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $104, %esp
+; X86-NEXT:    movl 24(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 40(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 44(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 32(%ebp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    movl 36(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 28(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 48(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    calll __udivei4
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 12(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 4(%esi)
+; X86-NEXT:    movl %ecx, (%esi)
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movb %al, 16(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: udiv129:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $104, %rsp
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movl $256, %ecx # imm = 0x100
+; X64-NEXT:    callq __udivei4 at PLT
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    addq $104, %rsp
+; X64-NEXT:    retq
+  %res = udiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @urem129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: urem129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $104, %esp
+; X86-NEXT:    movl 24(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 40(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 44(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 32(%ebp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    movl 36(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 28(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 48(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    calll __umodei4
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 12(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 4(%esi)
+; X86-NEXT:    movl %ecx, (%esi)
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movb %al, 16(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: urem129:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $104, %rsp
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movl $256, %ecx # imm = 0x100
+; X64-NEXT:    callq __umodei4 at PLT
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    addq $104, %rsp
+; X64-NEXT:    retq
+  %res = urem i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @sdiv129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: sdiv129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $104, %esp
+; X86-NEXT:    movl 24(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 40(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 44(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 32(%ebp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    movl 36(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 28(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 48(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    calll __divei4
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 12(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 4(%esi)
+; X86-NEXT:    movl %ecx, (%esi)
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movb %al, 16(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: sdiv129:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $104, %rsp
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    negq %r9
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    negq %rdx
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movl $256, %ecx # imm = 0x100
+; X64-NEXT:    callq __divei4 at PLT
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    addq $104, %rsp
+; X64-NEXT:    retq
+  %res = sdiv i129 %a, %b
+  ret i129 %res
+}
+
+define i129 @srem129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: srem129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $104, %esp
+; X86-NEXT:    movl 24(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 40(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 44(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 32(%ebp), %eax
+; X86-NEXT:    movl %eax, (%esp)
+; X86-NEXT:    movl 36(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 28(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 48(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    calll __modei4
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 12(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 4(%esi)
+; X86-NEXT:    movl %ecx, (%esi)
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movb %al, 16(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: srem129:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $104, %rsp
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    negq %r9
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    negq %rdx
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movl $256, %ecx # imm = 0x100
+; X64-NEXT:    callq __modei4 at PLT
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    addq $104, %rsp
+; X64-NEXT:    retq
+  %res = srem i129 %a, %b
+  ret i129 %res
+}
+
+; Some higher sizes
+define i257 @sdiv257(i257 %a, i257 %b) nounwind {
+; X86-LABEL: sdiv257:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $216, %esp
+; X86-NEXT:    movl 40(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 36(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 32(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 28(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 24(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 72(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 76(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 64(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 68(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 56(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 60(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 48(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 52(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 44(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 80(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    pushl $512 # imm = 0x200
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    calll __divei4
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 28(%esi)
+; X86-NEXT:    movl %ecx, 24(%esi)
+; X86-NEXT:    movl %edx, 20(%esi)
+; X86-NEXT:    movl %edi, 16(%esi)
+; X86-NEXT:    movl (%esp), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 12(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 8(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 4(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%esi)
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movb %al, 32(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: sdiv257:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $200, %rsp
+; X64-NEXT:    movq %rdi, %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    negq %r9
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r14, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r11, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r10, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movl $512, %ecx # imm = 0x200
+; X64-NEXT:    callq __divei4 at PLT
+; X64-NEXT:    movl {{[0-9]+}}(%rsp), %eax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    movq %rdi, 24(%rbx)
+; X64-NEXT:    movq %rsi, 16(%rbx)
+; X64-NEXT:    movq %rdx, 8(%rbx)
+; X64-NEXT:    movq %rcx, (%rbx)
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    movb %al, 32(%rbx)
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    addq $200, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r14
+; X64-NEXT:    retq
+  %res = sdiv i257 %a, %b
+  ret i257 %res
+}
+
+define i1001 @srem1001(i1001 %a, i1001 %b) nounwind {
+; X86-LABEL: srem1001:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $496, %esp # imm = 0x1F0
+; X86-NEXT:    movl 132(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 128(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 124(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 120(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 116(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 112(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 108(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 104(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 100(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 96(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 92(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 88(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 84(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 80(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 76(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 72(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 68(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 64(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 60(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 56(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 52(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 48(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 44(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 40(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 36(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 32(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 28(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 24(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 136(%ebp), %eax
+; X86-NEXT:    shll $23, %eax
+; X86-NEXT:    sarl $23, %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 264(%ebp), %eax
+; X86-NEXT:    shll $23, %eax
+; X86-NEXT:    sarl $23, %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 260(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 256(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 252(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 248(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 244(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 240(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 236(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 232(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 228(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 224(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 220(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 216(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 212(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 208(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 204(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 200(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 196(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 192(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 188(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 184(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 180(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 176(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 172(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 168(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 164(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 160(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 156(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 152(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 148(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 144(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 140(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    pushl $1024 # imm = 0x400
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    calll __modei4
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 120(%esi)
+; X86-NEXT:    movl %eax, 116(%esi)
+; X86-NEXT:    movl %ecx, 112(%esi)
+; X86-NEXT:    movl %edx, 108(%esi)
+; X86-NEXT:    movl %edi, 104(%esi)
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 100(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 96(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 92(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 88(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 84(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 80(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 76(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 72(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 68(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 64(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 60(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 56(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 52(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 48(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 44(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 40(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 36(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 32(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 28(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 24(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 20(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 16(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 12(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 8(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, 4(%esi)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, (%esi)
+; X86-NEXT:    movl $511, %eax # imm = 0x1FF
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movw %ax, 124(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: srem1001:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $408, %rsp # imm = 0x198
+; X64-NEXT:    movq %rdi, %rbx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    shlq $23, %rax
+; X64-NEXT:    sarq $23, %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    shlq $23, %rax
+; X64-NEXT:    sarq $23, %rax
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movl $1024, %ecx # imm = 0x400
+; X64-NEXT:    callq __modei4 at PLT
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq %rcx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r10
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r11
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r14
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r15
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r13
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r8
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rbp
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %r9
+; X64-NEXT:    movq %r9, 112(%rbx)
+; X64-NEXT:    movq %rcx, 104(%rbx)
+; X64-NEXT:    movq %rbp, 96(%rbx)
+; X64-NEXT:    movq %rdi, 88(%rbx)
+; X64-NEXT:    movq %rsi, 80(%rbx)
+; X64-NEXT:    movq %rdx, 72(%rbx)
+; X64-NEXT:    movq %r8, 64(%rbx)
+; X64-NEXT:    movq %r13, 56(%rbx)
+; X64-NEXT:    movq %r12, 48(%rbx)
+; X64-NEXT:    movq %r15, 40(%rbx)
+; X64-NEXT:    movq %r14, 32(%rbx)
+; X64-NEXT:    movq %r11, 24(%rbx)
+; X64-NEXT:    movq %r10, 16(%rbx)
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, 8(%rbx)
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rcx # 8-byte Reload
+; X64-NEXT:    movq %rcx, (%rbx)
+; X64-NEXT:    movl %eax, 120(%rbx)
+; X64-NEXT:    shrq $32, %rax
+; X64-NEXT:    andl $511, %eax # imm = 0x1FF
+; X64-NEXT:    movw %ax, 124(%rbx)
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    addq $408, %rsp # imm = 0x198
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+  %res = srem i1001 %a, %b
+  ret i1001 %res
+}
+
+define i129 @chain129(i129 %a, i129 %b) nounwind {
+; X86-LABEL: chain129:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $200, %esp
+; X86-NEXT:    movl 24(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 20(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 16(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 40(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 44(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 32(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 36(%ebp), %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 28(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 48(%ebp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl 8(%ebp), %esi
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    calll __udivei4
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edi, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl $17, (%esp)
+; X86-NEXT:    movl $0, {{[0-9]+}}(%esp)
+; X86-NEXT:    movl %esp, %eax
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    pushl $256 # imm = 0x100
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    calll __divei4
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, 12(%esi)
+; X86-NEXT:    movl %edi, 8(%esi)
+; X86-NEXT:    movl %edx, 4(%esi)
+; X86-NEXT:    movl %ecx, (%esi)
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movb %al, 16(%esi)
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+;
+; X64-LABEL: chain129:
+; X64:       # %bb.0:
+; X64-NEXT:    subq $200, %rsp
+; X64-NEXT:    andl $1, %r9d
+; X64-NEXT:    andl $1, %edx
+; X64-NEXT:    movq %rsi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdi, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r8, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %r9, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movl $256, %ecx # imm = 0x100
+; X64-NEXT:    callq __udivei4 at PLT
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    andl $1, %eax
+; X64-NEXT:    negq %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movq %rdx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq %rax, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $17, {{[0-9]+}}(%rsp)
+; X64-NEXT:    movq $0, {{[0-9]+}}(%rsp)
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rsi
+; X64-NEXT:    leaq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    movl $256, %ecx # imm = 0x100
+; X64-NEXT:    callq __divei4 at PLT
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
+; X64-NEXT:    andl $1, %ecx
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rax
+; X64-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
+; X64-NEXT:    addq $200, %rsp
+; X64-NEXT:    retq
+  %res = udiv i129 %a, %b
+  %res2 = sdiv i129 %res, 17
+  ret i129 %res2
+}


        


More information about the llvm-commits mailing list