[llvm] 6e561d1 - [Intrinsic] Add fixed point saturating division intrinsics.

Mon Feb 24 01:56:01 PST 2020

Author: Bevin Hansson
Date: 2020-02-24T10:50:52+01:00
New Revision: 6e561d1c94edc2ecaab7b79f6b3f1a06f515d531

URL: https://github.com/llvm/llvm-project/commit/6e561d1c94edc2ecaab7b79f6b3f1a06f515d531
DIFF: https://github.com/llvm/llvm-project/commit/6e561d1c94edc2ecaab7b79f6b3f1a06f515d531.diff

LOG: [Intrinsic] Add fixed point saturating division intrinsics.

Summary:
This patch adds intrinsics and ISelDAG nodes for signed
and unsigned fixed-point division:

```
llvm.sdiv.fix.sat.*
llvm.udiv.fix.sat.*
```

These intrinsics perform scaled, saturating division
on two integers or vectors of integers. They are
required for the implementation of the Embedded-C
fixed-point arithmetic in Clang.

Reviewers: bjope, leonardchan, craig.topper

Subscribers: hiraditya, jdoerfert, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D71550

Added: 
    llvm/test/CodeGen/X86/sdiv_fix_sat.ll
    llvm/test/CodeGen/X86/udiv_fix_sat.ll

Modified: 
    llvm/docs/LangRef.rst
    llvm/include/llvm/CodeGen/ISDOpcodes.h
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/include/llvm/IR/Intrinsics.td
    llvm/include/llvm/Target/TargetSelectionDAG.td
    llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/CodeGen/TargetLoweringBase.cpp
    llvm/lib/IR/Verifier.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index bb02c4b02cf4..43dbc70d2fd1 100644

--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -14331,6 +14331,136 @@ Examples
       %res = call i4 @llvm.udiv.fix.i4(i4 3, i4 4, i32 1)  ; %res = 2 (or 1) (1.5 / 2 = 0.75)
 
 
+'``llvm.sdiv.fix.sat.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.sdiv.fix.sat``
+on any integer bit width or vectors of integers.
+
+::
+
+      declare i16 @llvm.sdiv.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
+      declare i32 @llvm.sdiv.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
+      declare i64 @llvm.sdiv.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
+      declare <4 x i32> @llvm.sdiv.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+Overview
+"""""""""
+
+The '``llvm.sdiv.fix.sat``' family of intrinsic functions perform signed
+fixed point saturation division on 2 arguments of the same scale.
+
+Arguments
+""""""""""
+
+The arguments (%a and %b) and the result may be of integer types of any bit
+width, but they must have the same bit width. ``%a`` and ``%b`` are the two
+values that will undergo signed fixed point division. The argument
+``%scale`` represents the scale of both operands, and must be a constant
+integer.
+
+Semantics:
+""""""""""
+
+This operation performs fixed point division on the 2 arguments of a
+specified scale. The result will also be returned in the same scale specified
+in the third argument.
+
+If the result value cannot be precisely represented in the given scale, the
+value is rounded up or down to the closest representable value. The rounding
+direction is unspecified.
+
+The maximum value this operation can clamp to is the largest signed value
+representable by the bit width of the first 2 arguments. The minimum value is the
+smallest signed value representable by this bit width.
+
+It is undefined behavior if the second argument is zero.
+
+
+Examples
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call i4 @llvm.sdiv.fix.sat.i4(i4 6, i4 2, i32 0)  ; %res = 3 (6 / 2 = 3)
+      %res = call i4 @llvm.sdiv.fix.sat.i4(i4 6, i4 4, i32 1)  ; %res = 3 (3 / 2 = 1.5)
+      %res = call i4 @llvm.sdiv.fix.sat.i4(i4 3, i4 -2, i32 1) ; %res = -3 (1.5 / -1 = -1.5)
+
+      ; The result in the following could be rounded up to 1 or down to 0.5
+      %res = call i4 @llvm.sdiv.fix.sat.i4(i4 3, i4 4, i32 1)  ; %res = 2 (or 1) (1.5 / 2 = 0.75)
+
+      ; Saturation
+      %res = call i4 @llvm.sdiv.fix.sat.i4(i4 -8, i4 -1, i32 0)  ; %res = 7 (-8 / -1 = 8 => 7)
+      %res = call i4 @llvm.sdiv.fix.sat.i4(i4 4, i4 2, i32 2)  ; %res = 7 (1 / 0.5 = 2 => 1.75)
+      %res = call i4 @llvm.sdiv.fix.sat.i4(i4 -4, i4 1, i32 2)  ; %res = -8 (-1 / 0.25 = -4 => -2)
+
+
+'``llvm.udiv.fix.sat.*``' Intrinsics
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax
+"""""""
+
+This is an overloaded intrinsic. You can use ``llvm.udiv.fix.sat``
+on any integer bit width or vectors of integers.
+
+::
+
+      declare i16 @llvm.udiv.fix.sat.i16(i16 %a, i16 %b, i32 %scale)
+      declare i32 @llvm.udiv.fix.sat.i32(i32 %a, i32 %b, i32 %scale)
+      declare i64 @llvm.udiv.fix.sat.i64(i64 %a, i64 %b, i32 %scale)
+      declare <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32> %a, <4 x i32> %b, i32 %scale)
+
+Overview
+"""""""""
+
+The '``llvm.udiv.fix.sat``' family of intrinsic functions perform unsigned
+fixed point saturation division on 2 arguments of the same scale.
+
+Arguments
+""""""""""
+
+The arguments (%a and %b) and the result may be of integer types of any bit
+width, but they must have the same bit width. ``%a`` and ``%b`` are the two
+values that will undergo unsigned fixed point division. The argument
+``%scale`` represents the scale of both operands, and must be a constant
+integer.
+
+Semantics:
+""""""""""
+
+This operation performs fixed point division on the 2 arguments of a
+specified scale. The result will also be returned in the same scale specified
+in the third argument.
+
+If the result value cannot be precisely represented in the given scale, the
+value is rounded up or down to the closest representable value. The rounding
+direction is unspecified.
+
+The maximum value this operation can clamp to is the largest unsigned value
+representable by the bit width of the first 2 arguments. The minimum value is the
+smallest unsigned value representable by this bit width (zero).
+
+It is undefined behavior if the second argument is zero.
+
+Examples
+"""""""""
+
+.. code-block:: llvm
+
+      %res = call i4 @llvm.udiv.fix.sat.i4(i4 6, i4 2, i32 0)  ; %res = 3 (6 / 2 = 3)
+      %res = call i4 @llvm.udiv.fix.sat.i4(i4 6, i4 4, i32 1)  ; %res = 3 (3 / 2 = 1.5)
+
+      ; The result in the following could be rounded down to 0.5 or up to 1
+      %res = call i4 @llvm.udiv.fix.sat.i4(i4 3, i4 4, i32 1)  ; %res = 1 (or 2) (1.5 / 2 = 0.75)
+
+      ; Saturation
+      %res = call i4 @llvm.udiv.fix.sat.i4(i4 8, i4 2, i32 2)  ; %res = 15 (2 / 0.5 = 4 => 3.75)
+
+
 Specialised Arithmetic Intrinsics
 ---------------------------------
 

diff  --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index afeaf5e2c26f..c5e4a375a635 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -291,6 +291,11 @@ namespace ISD {
     /// constant integer.
     SDIVFIX, UDIVFIX,
 
+    /// Same as the corresponding unsaturated fixed point instructions, but the
+    /// result is clamped between the min and max values representable by the
+    /// bits of the first 2 operands.
+    SDIVFIXSAT, UDIVFIXSAT,
+
     /// Simple binary floating point operators.
     FADD, FSUB, FMUL, FDIV, FREM,
 

diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 0d6c3ab97018..9583e2b718e5 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1043,7 +1043,9 @@ class TargetLoweringBase {
     case ISD::UMULFIX:
     case ISD::UMULFIXSAT:
     case ISD::SDIVFIX:
+    case ISD::SDIVFIXSAT:
     case ISD::UDIVFIX:
+    case ISD::UDIVFIXSAT:
       Supported = isSupportedFixedPointOperation(Op, VT, Scale);
       break;
     }
@@ -4269,7 +4271,7 @@ class TargetLowering : public TargetLoweringBase {
   /// method accepts integers as its arguments.
   SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
 
-  /// Method for building the DAG expansion of ISD::[US]DIVFIX. This
+  /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This
   /// method accepts integers as its arguments.
   /// Note: This method may fail if the division could not be performed
   /// within the type. Clients must retry with a wider type if this happens.

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 563f8b0e4e7a..797d7b1765c3 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -969,6 +969,14 @@ def int_umul_fix_sat : Intrinsic<[llvm_anyint_ty],
                                  [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
                                  [IntrNoMem, IntrSpeculatable, IntrWillReturn, Commutative, ImmArg<2>]>;
 
+def int_sdiv_fix_sat : Intrinsic<[llvm_anyint_ty],
+                                 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+                                 [IntrNoMem, ImmArg<2>]>;
+
+def int_udiv_fix_sat : Intrinsic<[llvm_anyint_ty],
+                                 [LLVMMatchType<0>, LLVMMatchType<0>, llvm_i32_ty],
+                                 [IntrNoMem, ImmArg<2>]>;
+
 //===------------------------- Memory Use Markers -------------------------===//
 //
 def int_lifetime_start  : Intrinsic<[],

diff  --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 7d8b97069de7..25b77baed2e1 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -402,7 +402,9 @@ def smulfixsat : SDNode<"ISD::SMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>
 def umulfix    : SDNode<"ISD::UMULFIX"   , SDTIntScaledBinOp, [SDNPCommutative]>;
 def umulfixsat : SDNode<"ISD::UMULFIXSAT", SDTIntScaledBinOp, [SDNPCommutative]>;
 def sdivfix    : SDNode<"ISD::SDIVFIX"   , SDTIntScaledBinOp>;
+def sdivfixsat : SDNode<"ISD::SDIVFIXSAT", SDTIntScaledBinOp>;
 def udivfix    : SDNode<"ISD::UDIVFIX"   , SDTIntScaledBinOp>;
+def udivfixsat : SDNode<"ISD::UDIVFIXSAT", SDTIntScaledBinOp>;
 
 def sext_inreg : SDNode<"ISD::SIGN_EXTEND_INREG", SDTExtInreg>;
 def sext_invec : SDNode<"ISD::SIGN_EXTEND_VECTOR_INREG", SDTExtInvec>;

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 6fd048a911b4..fefca4bac201 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1132,7 +1132,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:
   case ISD::SDIVFIX:
-  case ISD::UDIVFIX: {
+  case ISD::SDIVFIXSAT:
+  case ISD::UDIVFIX:
+  case ISD::UDIVFIXSAT: {
     unsigned Scale = Node->getConstantOperandVal(2);
     Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
                                               Node->getValueType(0), Scale);
@@ -3489,7 +3491,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     Results.push_back(TLI.expandFixedPointMul(Node, DAG));
     break;
   case ISD::SDIVFIX:
+  case ISD::SDIVFIXSAT:
   case ISD::UDIVFIX:
+  case ISD::UDIVFIXSAT:
     if (SDValue V = TLI.expandFixedPointDiv(Node->getOpcode(), SDLoc(Node),
                                             Node->getOperand(0),
                                             Node->getOperand(1),

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8e55eaa3e3ba..36675d9459fb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -162,7 +162,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UMULFIXSAT:  Res = PromoteIntRes_MULFIX(N); break;
 
   case ISD::SDIVFIX:
-  case ISD::UDIVFIX:     Res = PromoteIntRes_DIVFIX(N); break;
+  case ISD::SDIVFIXSAT:
+  case ISD::UDIVFIX:
+  case ISD::UDIVFIXSAT:  Res = PromoteIntRes_DIVFIX(N); break;
 
   case ISD::ABS:         Res = PromoteIntRes_ABS(N); break;
 
@@ -784,22 +786,51 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
                      N->getOperand(2));
 }
 
+static SDValue SaturateWidenedDIVFIX(SDValue V, SDLoc &dl,
+                                     unsigned SatW, bool Signed,
+                                     const TargetLowering &TLI,
+                                     SelectionDAG &DAG) {
+  EVT VT = V.getValueType();
+  unsigned VTW = VT.getScalarSizeInBits();
+
+  if (!Signed) {
+    // Saturate to the unsigned maximum by getting the minimum of V and the
+    // maximum.
+    return DAG.getNode(ISD::UMIN, dl, VT, V,
+                       DAG.getConstant(APInt::getLowBitsSet(VTW, SatW),
+                                       dl, VT));
+  }
+
+  // Saturate to the signed maximum (the low SatW - 1 bits) by taking the
+  // signed minimum of it and V.
+  V = DAG.getNode(ISD::SMIN, dl, VT, V,
+                  DAG.getConstant(APInt::getLowBitsSet(VTW, SatW - 1),
+                                  dl, VT));
+  // Saturate to the signed minimum (the high SatW + 1 bits) by taking the
+  // signed maximum of it and V.
+  V = DAG.getNode(ISD::SMAX, dl, VT, V,
+                  DAG.getConstant(APInt::getHighBitsSet(VTW, VTW - SatW + 1),
+                                  dl, VT));
+  return V;
+}
+
 static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
-                                    unsigned Scale, const TargetLowering &TLI,
-                                    SelectionDAG &DAG) {
+                                 unsigned Scale, const TargetLowering &TLI,
+                                 SelectionDAG &DAG, unsigned SatW = 0) {
   EVT VT = LHS.getValueType();
-  bool Signed = N->getOpcode() == ISD::SDIVFIX;
+  unsigned VTSize = VT.getScalarSizeInBits();
+  bool Signed = N->getOpcode() == ISD::SDIVFIX ||
+                N->getOpcode() == ISD::SDIVFIXSAT;
+  bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
+                    N->getOpcode() == ISD::UDIVFIXSAT;
 
   SDLoc dl(N);
-  // See if we can perform the division in this type without widening.
-  if (SDValue V = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
-                                          DAG))
-    return V;
-
-  // If that didn't work, double the type width and try again. That must work,
-  // or something is wrong.
-  EVT WideVT = EVT::getIntegerVT(*DAG.getContext(),
-                                 VT.getScalarSizeInBits() * 2);
+  // Widen the types by a factor of two. This is guaranteed to expand, since it
+  // will always have enough high bits in the LHS to shift into.
+  EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VTSize * 2);
+  if (VT.isVector())
+    WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+                              VT.getVectorElementCount());
   if (Signed) {
     LHS = DAG.getSExtOrTrunc(LHS, dl, WideVT);
     RHS = DAG.getSExtOrTrunc(RHS, dl, WideVT);
@@ -808,18 +839,28 @@ static SDValue earlyExpandDIVFIX(SDNode *N, SDValue LHS, SDValue RHS,
     RHS = DAG.getZExtOrTrunc(RHS, dl, WideVT);
   }
 
-  // TODO: Saturation.
-
   SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, LHS, RHS, Scale,
                                         DAG);
   assert(Res && "Expanding DIVFIX with wide type failed?");
+  if (Saturating) {
+    // If the caller has told us to saturate at something less, use that width
+    // instead of the type before doubling. However, it cannot be more than
+    // what we just widened!
+    assert(SatW <= VTSize &&
+           "Tried to saturate to more than the original type?");
+    Res = SaturateWidenedDIVFIX(Res, dl, SatW == 0 ? VTSize : SatW, Signed,
+                                TLI, DAG);
+  }
   return DAG.getZExtOrTrunc(Res, dl, VT);
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
   SDLoc dl(N);
   SDValue Op1Promoted, Op2Promoted;
-  bool Signed = N->getOpcode() == ISD::SDIVFIX;
+  bool Signed = N->getOpcode() == ISD::SDIVFIX ||
+                N->getOpcode() == ISD::SDIVFIXSAT;
+  bool Saturating = N->getOpcode() == ISD::SDIVFIXSAT ||
+                    N->getOpcode() == ISD::UDIVFIXSAT;
   if (Signed) {
     Op1Promoted = SExtPromotedInteger(N->getOperand(0));
     Op2Promoted = SExtPromotedInteger(N->getOperand(1));
@@ -830,23 +871,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_DIVFIX(SDNode *N) {
   EVT PromotedType = Op1Promoted.getValueType();
   unsigned Scale = N->getConstantOperandVal(2);
 
-  SDValue Res;
   // If the type is already legal and the operation is legal in that type, we
   // should not early expand.
   if (TLI.isTypeLegal(PromotedType)) {
     TargetLowering::LegalizeAction Action =
         TLI.getFixedPointOperationAction(N->getOpcode(), PromotedType, Scale);
-    if (Action == TargetLowering::Legal || Action == TargetLowering::Custom)
-      Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
-                        Op2Promoted, N->getOperand(2));
+    if (Action == TargetLowering::Legal || Action == TargetLowering::Custom) {
+      EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+      unsigned Diff = PromotedType.getScalarSizeInBits() -
+                      N->getValueType(0).getScalarSizeInBits();
+      if (Saturating)
+        Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+                                  DAG.getConstant(Diff, dl, ShiftTy));
+      SDValue Res = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+                                Op2Promoted, N->getOperand(2));
+      if (Saturating)
+        Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, dl, PromotedType, Res,
+                          DAG.getConstant(Diff, dl, ShiftTy));
+      return Res;
+    }
   }
 
-  if (!Res)
-    Res = earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG);
-
-  // TODO: Saturation.
-
-  return Res;
+  // See if we can perform the division in this type without expanding.
+  if (SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, Op1Promoted,
+                                        Op2Promoted, Scale, DAG)) {
+    if (Saturating)
+      Res = SaturateWidenedDIVFIX(Res, dl,
+                                  N->getValueType(0).getScalarSizeInBits(),
+                                  Signed, TLI, DAG);
+    return Res;
+  }
+  // If we cannot, expand it to twice the type width. If we are saturating, give
+  // it the original width as a saturating width so we don't need to emit
+  // two saturations.
+  return earlyExpandDIVFIX(N, Op1Promoted, Op2Promoted, Scale, TLI, DAG,
+                            N->getValueType(0).getScalarSizeInBits());
 }
 
 SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) {
@@ -1315,7 +1374,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:
   case ISD::SDIVFIX:
-  case ISD::UDIVFIX: Res = PromoteIntOp_FIX(N); break;
+  case ISD::SDIVFIXSAT:
+  case ISD::UDIVFIX:
+  case ISD::UDIVFIXSAT: Res = PromoteIntOp_FIX(N); break;
 
   case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
 
@@ -1923,7 +1984,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
   case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
 
   case ISD::SDIVFIX:
-  case ISD::UDIVFIX: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
+  case ISD::SDIVFIXSAT:
+  case ISD::UDIVFIX:
+  case ISD::UDIVFIXSAT: ExpandIntRes_DIVFIX(N, Lo, Hi); break;
 
   case ISD::VECREDUCE_ADD:
   case ISD::VECREDUCE_MUL:
@@ -3253,8 +3316,15 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
 
 void DAGTypeLegalizer::ExpandIntRes_DIVFIX(SDNode *N, SDValue &Lo,
                                            SDValue &Hi) {
-  SDValue Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
-                                  N->getConstantOperandVal(2), TLI, DAG);
+  SDLoc dl(N);
+  // Try expanding in the existing type first.
+  SDValue Res = TLI.expandFixedPointDiv(N->getOpcode(), dl, N->getOperand(0),
+                                        N->getOperand(1),
+                                        N->getConstantOperandVal(2), DAG);
+
+  if (!Res)
+    Res = earlyExpandDIVFIX(N, N->getOperand(0), N->getOperand(1),
+                            N->getConstantOperandVal(2), TLI, DAG);
   SplitInteger(Res, Lo, Hi);
 }
 

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index c3ca69f826ac..a624228dac0d 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -142,7 +142,7 @@ class VectorLegalizer {
   void ExpandUADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandSADDSUBO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   void ExpandMULO(SDNode *Node, SmallVectorImpl<SDValue> &Results);
-  SDValue ExpandFixedPointDiv(SDNode *Node);
+  void ExpandFixedPointDiv(SDNode *Node, SmallVectorImpl<SDValue> &Results);
   SDValue ExpandStrictFPOp(SDNode *Node);
   void ExpandStrictFPOp(SDNode *Node, SmallVectorImpl<SDValue> &Results);
 
@@ -463,7 +463,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:
   case ISD::SDIVFIX:
-  case ISD::UDIVFIX: {
+  case ISD::SDIVFIXSAT:
+  case ISD::UDIVFIX:
+  case ISD::UDIVFIXSAT: {
     unsigned Scale = Node->getConstantOperandVal(2);
     Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
                                               Node->getValueType(0), Scale);
@@ -968,8 +970,11 @@ void VectorLegalizer::Expand(SDNode *Node, SmallVectorImpl<SDValue> &Results) {
     break;
   case ISD::SDIVFIX:
   case ISD::UDIVFIX:
-    Results.push_back(ExpandFixedPointDiv(Node));
+    ExpandFixedPointDiv(Node, Results);
     return;
+  case ISD::SDIVFIXSAT:
+  case ISD::UDIVFIXSAT:
+    break;
 #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN)               \
   case ISD::STRICT_##DAGN:
 #include "llvm/IR/ConstrainedOps.def"
@@ -1454,12 +1459,12 @@ void VectorLegalizer::ExpandMULO(SDNode *Node,
   Results.push_back(Overflow);
 }
 
-SDValue VectorLegalizer::ExpandFixedPointDiv(SDNode *Node) {
+void VectorLegalizer::ExpandFixedPointDiv(SDNode *Node,
+                                          SmallVectorImpl<SDValue> &Results) {
   SDNode *N = Node;
   if (SDValue Expanded = TLI.expandFixedPointDiv(N->getOpcode(), SDLoc(N),
           N->getOperand(0), N->getOperand(1), N->getConstantOperandVal(2), DAG))
-    return Expanded;
-  return DAG.UnrollVectorOp(N);
+    Results.push_back(Expanded);
 }
 
 void VectorLegalizer::ExpandStrictFPOp(SDNode *Node,

diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 862b11c54a76..5e4c35263b7f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -166,7 +166,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:
   case ISD::SDIVFIX:
+  case ISD::SDIVFIXSAT:
   case ISD::UDIVFIX:
+  case ISD::UDIVFIXSAT:
     R = ScalarizeVecRes_FIX(N);
     break;
   }
@@ -956,7 +958,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
   case ISD::UMULFIX:
   case ISD::UMULFIXSAT:
   case ISD::SDIVFIX:
+  case ISD::SDIVFIXSAT:
   case ISD::UDIVFIX:
+  case ISD::UDIVFIXSAT:
     SplitVecRes_FIX(N, Lo, Hi);
     break;
   }

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 7cf3e43088f2..002610ea24f2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5451,7 +5451,8 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
                             SDValue LHS, SDValue RHS, SDValue Scale,
                             SelectionDAG &DAG, const TargetLowering &TLI) {
   EVT VT = LHS.getValueType();
-  bool Signed = Opcode == ISD::SDIVFIX;
+  bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
+  bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
   LLVMContext &Ctx = *DAG.getContext();
 
   // If the type is legal but the operation isn't, this node might survive all
@@ -5463,14 +5464,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
   // by bumping the size by one bit. This will force it to Promote, enabling the
   // early expansion and avoiding the need to expand later.
 
-  // We don't have to do this if Scale is 0; that can always be expanded.
+  // We don't have to do this if Scale is 0; that can always be expanded, unless
+  // it's a saturating signed operation. Those can experience true integer
+  // division overflow, a case which we must avoid.
 
   // FIXME: We wouldn't have to do this (or any of the early
   // expansion/promotion) if it was possible to expand a libcall of an
   // illegal type during operation legalization. But it's not, so things
   // get a bit hacky.
   unsigned ScaleInt = cast<ConstantSDNode>(Scale)->getZExtValue();
-  if (ScaleInt > 0 &&
+  if ((ScaleInt > 0 || (Saturating && Signed)) &&
       (TLI.isTypeLegal(VT) ||
        (VT.isVector() && TLI.isTypeLegal(VT.getVectorElementType())))) {
     TargetLowering::LegalizeAction Action = TLI.getFixedPointOperationAction(
@@ -5492,8 +5495,16 @@ static SDValue expandDivFix(unsigned Opcode, const SDLoc &DL,
         LHS = DAG.getZExtOrTrunc(LHS, DL, PromVT);
         RHS = DAG.getZExtOrTrunc(RHS, DL, PromVT);
       }
-      // TODO: Saturation.
+      EVT ShiftTy = TLI.getShiftAmountTy(PromVT, DAG.getDataLayout());
+      // For saturating operations, we need to shift up the LHS to get the
+      // proper saturation width, and then shift down again afterwards.
+      if (Saturating)
+        LHS = DAG.getNode(ISD::SHL, DL, PromVT, LHS,
+                          DAG.getConstant(1, DL, ShiftTy));
       SDValue Res = DAG.getNode(Opcode, DL, PromVT, LHS, RHS, Scale);
+      if (Saturating)
+        Res = DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, PromVT, Res,
+                          DAG.getConstant(1, DL, ShiftTy));
       return DAG.getZExtOrTrunc(Res, DL, VT);
     }
   }
@@ -5757,6 +5768,10 @@ static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
     return ISD::SDIVFIX;
   case Intrinsic::udiv_fix:
     return ISD::UDIVFIX;
+  case Intrinsic::sdiv_fix_sat:
+    return ISD::SDIVFIXSAT;
+  case Intrinsic::udiv_fix_sat:
+    return ISD::UDIVFIXSAT;
   default:
     llvm_unreachable("Unhandled fixed point intrinsic");
   }
@@ -6460,7 +6475,9 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     return;
   }
   case Intrinsic::sdiv_fix:
-  case Intrinsic::udiv_fix: {
+  case Intrinsic::udiv_fix:
+  case Intrinsic::sdiv_fix_sat:
+  case Intrinsic::udiv_fix_sat: {
     SDValue Op1 = getValue(I.getArgOperand(0));
     SDValue Op2 = getValue(I.getArgOperand(1));
     SDValue Op3 = getValue(I.getArgOperand(2));

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index f57852ced660..0fd132f03af8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -314,7 +314,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::UMULFIXSAT:                 return "umulfixsat";
 
   case ISD::SDIVFIX:                    return "sdivfix";
+  case ISD::SDIVFIXSAT:                 return "sdivfixsat";
   case ISD::UDIVFIX:                    return "udivfix";
+  case ISD::UDIVFIXSAT:                 return "udivfixsat";
 
   // Conversion operators.
   case ISD::SIGN_EXTEND:                return "sign_extend";

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 1e345caa1e17..f3f9c6dd7003 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7332,12 +7332,13 @@ SDValue
 TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
                                     SDValue LHS, SDValue RHS,
                                     unsigned Scale, SelectionDAG &DAG) const {
-  assert((Opcode == ISD::SDIVFIX ||
-          Opcode == ISD::UDIVFIX) &&
+  assert((Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT ||
+          Opcode == ISD::UDIVFIX || Opcode == ISD::UDIVFIXSAT) &&
          "Expected a fixed point division opcode");
 
   EVT VT = LHS.getValueType();
-  bool Signed = Opcode == ISD::SDIVFIX;
+  bool Signed = Opcode == ISD::SDIVFIX || Opcode == ISD::SDIVFIXSAT;
+  bool Saturating = Opcode == ISD::SDIVFIXSAT || Opcode == ISD::UDIVFIXSAT;
   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
 
   // If there is enough room in the type to upscale the LHS or downscale the
@@ -7349,7 +7350,15 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
                             : DAG.computeKnownBits(LHS).countMinLeadingZeros();
   unsigned RHSTrail = DAG.computeKnownBits(RHS).countMinTrailingZeros();
 
-  if (LHSLead + RHSTrail < Scale)
+  // For signed saturating operations, we need to be able to detect true integer
+  // division overflow; that is, when you have MIN / -EPS. However, this
+  // is undefined behavior and if we emit divisions that could take such
+  // values it may cause undesired behavior (arithmetic exceptions on x86, for
+  // example).
+  // Avoid this by requiring an extra bit so that we never get this case.
+  // FIXME: This is a bit unfortunate as it means that for an 8-bit 7-scale
+  // signed saturating division, we need to emit a whopping 32-bit division.
+  if (LHSLead + RHSTrail < Scale + (unsigned)(Saturating && Signed))
     return SDValue();
 
   unsigned LHSShift = std::min(LHSLead, Scale);
@@ -7403,8 +7412,6 @@ TargetLowering::expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
     Quot = DAG.getNode(ISD::UDIV, dl, VT,
                        LHS, RHS);
 
-  // TODO: Saturation.
-
   return Quot;
 }
 

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index bd717a8585ee..95c63d09718c 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -660,7 +660,9 @@ void TargetLoweringBase::initActions() {
     setOperationAction(ISD::UMULFIX, VT, Expand);
     setOperationAction(ISD::UMULFIXSAT, VT, Expand);
     setOperationAction(ISD::SDIVFIX, VT, Expand);
+    setOperationAction(ISD::SDIVFIXSAT, VT, Expand);
     setOperationAction(ISD::UDIVFIX, VT, Expand);
+    setOperationAction(ISD::UDIVFIXSAT, VT, Expand);
 
     // Overflow operations default to expand
     setOperationAction(ISD::SADDO, VT, Expand);

diff  --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 6af581b178dc..fefb3a4751c3 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4727,7 +4727,9 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   case Intrinsic::umul_fix:
   case Intrinsic::umul_fix_sat:
   case Intrinsic::sdiv_fix:
-  case Intrinsic::udiv_fix: {
+  case Intrinsic::sdiv_fix_sat:
+  case Intrinsic::udiv_fix:
+  case Intrinsic::udiv_fix_sat: {
     Value *Op1 = Call.getArgOperand(0);
     Value *Op2 = Call.getArgOperand(1);
     Assert(Op1->getType()->isIntOrIntVectorTy(),
@@ -4742,7 +4744,7 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
            "third argument of [us][mul|div]_fix[_sat] must fit within 32 bits");
 
     if (ID == Intrinsic::smul_fix || ID == Intrinsic::smul_fix_sat ||
-        ID == Intrinsic::sdiv_fix) {
+        ID == Intrinsic::sdiv_fix || ID == Intrinsic::sdiv_fix_sat) {
       Assert(
           Op3->getZExtValue() < Op1->getType()->getScalarSizeInBits(),
           "the scale of s[mul|div]_fix[_sat] must be less than the width of "

diff  --git a/llvm/test/CodeGen/X86/sdiv_fix_sat.ll b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
new file mode 100644
index 000000000000..e35130dcb535
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sdiv_fix_sat.ll
@@ -0,0 +1,1411 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
+
+declare  i4  @llvm.sdiv.fix.sat.i4   (i4,  i4,  i32)
+declare  i15 @llvm.sdiv.fix.sat.i15  (i15, i15, i32)
+declare  i16 @llvm.sdiv.fix.sat.i16  (i16, i16, i32)
+declare  i18 @llvm.sdiv.fix.sat.i18  (i18, i18, i32)
+declare  i64 @llvm.sdiv.fix.sat.i64  (i64, i64, i32)
+declare  <4 x i32> @llvm.sdiv.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32)
+
+define i16 @func(i16 %x, i16 %y) nounwind {
+;
+; X64-LABEL: func:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %si, %esi
+; X64-NEXT:    movswl %di, %ecx
+; X64-NEXT:    shll $8, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    cltd
+; X64-NEXT:    idivl %esi
+; X64-NEXT:    # kill: def $eax killed $eax def $rax
+; X64-NEXT:    leal -1(%rax), %edi
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    sets %sil
+; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    sets %cl
+; X64-NEXT:    xorb %sil, %cl
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    setne %dl
+; X64-NEXT:    testb %cl, %dl
+; X64-NEXT:    cmovel %eax, %edi
+; X64-NEXT:    cmpl $65535, %edi # imm = 0xFFFF
+; X64-NEXT:    movl $65535, %ecx # imm = 0xFFFF
+; X64-NEXT:    cmovll %edi, %ecx
+; X64-NEXT:    cmpl $-65536, %ecx # imm = 0xFFFF0000
+; X64-NEXT:    movl $-65536, %eax # imm = 0xFFFF0000
+; X64-NEXT:    cmovgl %ecx, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $8, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cltd
+; X86-NEXT:    idivl %esi
+; X86-NEXT:    leal -1(%eax), %edi
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    sets %bl
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    sets %cl
+; X86-NEXT:    xorb %bl, %cl
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    setne %dl
+; X86-NEXT:    testb %cl, %dl
+; X86-NEXT:    cmovel %eax, %edi
+; X86-NEXT:    cmpl $65535, %edi # imm = 0xFFFF
+; X86-NEXT:    movl $65535, %ecx # imm = 0xFFFF
+; X86-NEXT:    cmovll %edi, %ecx
+; X86-NEXT:    cmpl $-65536, %ecx # imm = 0xFFFF0000
+; X86-NEXT:    movl $-65536, %eax # imm = 0xFFFF0000
+; X86-NEXT:    cmovgl %ecx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+  %tmp = call i16 @llvm.sdiv.fix.sat.i16(i16 %x, i16 %y, i32 7)
+  ret i16 %tmp
+}
+
+define i16 @func2(i8 %x, i8 %y) nounwind {
+;
+; X64-LABEL: func2:
+; X64:       # %bb.0:
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    movsbl %sil, %ecx
+; X64-NEXT:    movswl %cx, %esi
+; X64-NEXT:    movswl %ax, %ecx
+; X64-NEXT:    shll $14, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    cltd
+; X64-NEXT:    idivl %esi
+; X64-NEXT:    # kill: def $eax killed $eax def $rax
+; X64-NEXT:    leal -1(%rax), %edi
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    sets %sil
+; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    sets %cl
+; X64-NEXT:    xorb %sil, %cl
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    setne %dl
+; X64-NEXT:    testb %cl, %dl
+; X64-NEXT:    cmovel %eax, %edi
+; X64-NEXT:    cmpl $16383, %edi # imm = 0x3FFF
+; X64-NEXT:    movl $16383, %ecx # imm = 0x3FFF
+; X64-NEXT:    cmovll %edi, %ecx
+; X64-NEXT:    cmpl $-16384, %ecx # imm = 0xC000
+; X64-NEXT:    movl $-16384, %eax # imm = 0xC000
+; X64-NEXT:    cmovgl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func2:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $14, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cltd
+; X86-NEXT:    idivl %esi
+; X86-NEXT:    leal -1(%eax), %edi
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    sets %bl
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    sets %cl
+; X86-NEXT:    xorb %bl, %cl
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    setne %dl
+; X86-NEXT:    testb %cl, %dl
+; X86-NEXT:    cmovel %eax, %edi
+; X86-NEXT:    cmpl $16383, %edi # imm = 0x3FFF
+; X86-NEXT:    movl $16383, %ecx # imm = 0x3FFF
+; X86-NEXT:    cmovll %edi, %ecx
+; X86-NEXT:    cmpl $-16384, %ecx # imm = 0xC000
+; X86-NEXT:    movl $-16384, %eax # imm = 0xC000
+; X86-NEXT:    cmovgl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+  %x2 = sext i8 %x to i15
+  %y2 = sext i8 %y to i15
+  %tmp = call i15 @llvm.sdiv.fix.sat.i15(i15 %x2, i15 %y2, i32 14)
+  %tmp2 = sext i15 %tmp to i16
+  ret i16 %tmp2
+}
+
+define i16 @func3(i15 %x, i8 %y) nounwind {
+;
+; X64-LABEL: func3:
+; X64:       # %bb.0:
+; X64-NEXT:    shll $8, %esi
+; X64-NEXT:    movswl %si, %ecx
+; X64-NEXT:    addl %edi, %edi
+; X64-NEXT:    shrl $4, %ecx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    cwtd
+; X64-NEXT:    idivw %cx
+; X64-NEXT:    # kill: def $ax killed $ax def $rax
+; X64-NEXT:    leal -1(%rax), %esi
+; X64-NEXT:    testw %di, %di
+; X64-NEXT:    sets %dil
+; X64-NEXT:    testw %cx, %cx
+; X64-NEXT:    sets %cl
+; X64-NEXT:    xorb %dil, %cl
+; X64-NEXT:    testw %dx, %dx
+; X64-NEXT:    setne %dl
+; X64-NEXT:    testb %cl, %dl
+; X64-NEXT:    cmovel %eax, %esi
+; X64-NEXT:    movswl %si, %eax
+; X64-NEXT:    cmpl $16383, %eax # imm = 0x3FFF
+; X64-NEXT:    movl $16383, %ecx # imm = 0x3FFF
+; X64-NEXT:    cmovll %esi, %ecx
+; X64-NEXT:    movswl %cx, %eax
+; X64-NEXT:    cmpl $-16384, %eax # imm = 0xC000
+; X64-NEXT:    movl $49152, %eax # imm = 0xC000
+; X64-NEXT:    cmovgl %ecx, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func3:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $8, %eax
+; X86-NEXT:    movswl %ax, %esi
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    shrl $4, %esi
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cwtd
+; X86-NEXT:    idivw %si
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    leal -1(%eax), %edi
+; X86-NEXT:    testw %cx, %cx
+; X86-NEXT:    sets %cl
+; X86-NEXT:    testw %si, %si
+; X86-NEXT:    sets %ch
+; X86-NEXT:    xorb %cl, %ch
+; X86-NEXT:    testw %dx, %dx
+; X86-NEXT:    setne %cl
+; X86-NEXT:    testb %ch, %cl
+; X86-NEXT:    cmovel %eax, %edi
+; X86-NEXT:    movswl %di, %eax
+; X86-NEXT:    cmpl $16383, %eax # imm = 0x3FFF
+; X86-NEXT:    movl $16383, %ecx # imm = 0x3FFF
+; X86-NEXT:    cmovll %edi, %ecx
+; X86-NEXT:    movswl %cx, %eax
+; X86-NEXT:    cmpl $-16384, %eax # imm = 0xC000
+; X86-NEXT:    movl $49152, %eax # imm = 0xC000
+; X86-NEXT:    cmovgl %ecx, %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    retl
+  %y2 = sext i8 %y to i15
+  %y3 = shl i15 %y2, 7
+  %tmp = call i15 @llvm.sdiv.fix.sat.i15(i15 %x, i15 %y3, i32 4)
+  %tmp2 = sext i15 %tmp to i16
+  ret i16 %tmp2
+}
+
+define i4 @func4(i4 %x, i4 %y) nounwind {
+;
+; X64-LABEL: func4:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    shlb $4, %sil
+; X64-NEXT:    sarb $4, %sil
+; X64-NEXT:    shlb $4, %dil
+; X64-NEXT:    sarb $4, %dil
+; X64-NEXT:    shlb $2, %dil
+; X64-NEXT:    movsbl %dil, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    idivb %sil
+; X64-NEXT:    movsbl %ah, %ebx
+; X64-NEXT:    movzbl %al, %eax
+; X64-NEXT:    leal -1(%rax), %edi
+; X64-NEXT:    movzbl %dil, %edi
+; X64-NEXT:    testb %sil, %sil
+; X64-NEXT:    sets %dl
+; X64-NEXT:    testb %cl, %cl
+; X64-NEXT:    sets %cl
+; X64-NEXT:    xorb %dl, %cl
+; X64-NEXT:    testb %bl, %bl
+; X64-NEXT:    setne %dl
+; X64-NEXT:    testb %cl, %dl
+; X64-NEXT:    cmovel %eax, %edi
+; X64-NEXT:    cmpb $7, %dil
+; X64-NEXT:    movl $7, %ecx
+; X64-NEXT:    cmovll %edi, %ecx
+; X64-NEXT:    cmpb $-8, %cl
+; X64-NEXT:    movl $248, %eax
+; X64-NEXT:    cmovgl %ecx, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+;
+; X86-LABEL: func4:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
+; X86-NEXT:    shlb $4, %dl
+; X86-NEXT:    sarb $4, %dl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
+; X86-NEXT:    shlb $4, %dh
+; X86-NEXT:    sarb $4, %dh
+; X86-NEXT:    shlb $2, %dh
+; X86-NEXT:    movsbl %dh, %eax
+; X86-NEXT:    idivb %dl
+; X86-NEXT:    movsbl %ah, %ecx
+; X86-NEXT:    movzbl %al, %esi
+; X86-NEXT:    decb %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    testb %dl, %dl
+; X86-NEXT:    sets %dl
+; X86-NEXT:    testb %dh, %dh
+; X86-NEXT:    sets %dh
+; X86-NEXT:    xorb %dl, %dh
+; X86-NEXT:    testb %cl, %cl
+; X86-NEXT:    setne %cl
+; X86-NEXT:    testb %dh, %cl
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    cmpb $7, %al
+; X86-NEXT:    movl $7, %ecx
+; X86-NEXT:    cmovll %eax, %ecx
+; X86-NEXT:    cmpb $-8, %cl
+; X86-NEXT:    movl $248, %eax
+; X86-NEXT:    cmovgl %ecx, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+  %tmp = call i4 @llvm.sdiv.fix.sat.i4(i4 %x, i4 %y, i32 2)
+  ret i4 %tmp
+}
+
+define i64 @func5(i64 %x, i64 %y) nounwind {
+;
+; X64-LABEL: func5:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $40, %rsp
+; X64-NEXT:    movq %rdi, %r15
+; X64-NEXT:    leaq (%rdi,%rdi), %rax
+; X64-NEXT:    shrq $33, %rax
+; X64-NEXT:    movq %rdi, %r12
+; X64-NEXT:    sarq $63, %r12
+; X64-NEXT:    shlq $31, %r12
+; X64-NEXT:    orq %rax, %r12
+; X64-NEXT:    sets {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Folded Spill
+; X64-NEXT:    shlq $32, %r15
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rsi, %r13
+; X64-NEXT:    sarq $63, %r13
+; X64-NEXT:    movq %r15, %rdi
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movq %r13, %rcx
+; X64-NEXT:    callq __divti3
+; X64-NEXT:    movq %rax, %rbx
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    subq $1, %rbx
+; X64-NEXT:    sbbq $0, %rbp
+; X64-NEXT:    testq %r13, %r13
+; X64-NEXT:    sets %r14b
+; X64-NEXT:    xorb {{[-0-9]+}}(%r{{[sb]}}p), %r14b # 1-byte Folded Reload
+; X64-NEXT:    movq %r15, %rdi
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %r13, %rcx
+; X64-NEXT:    callq __modti3
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    setne %al
+; X64-NEXT:    testb %r14b, %al
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbp # 8-byte Folded Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %rbx # 8-byte Folded Reload
+; X64-NEXT:    cmpq $-1, %rbx
+; X64-NEXT:    movq $-1, %rax
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    cmovbq %rbx, %rcx
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmovnsq %rax, %rbx
+; X64-NEXT:    cmoveq %rcx, %rbx
+; X64-NEXT:    cmovnsq %rdx, %rbp
+; X64-NEXT:    testq %rbx, %rbx
+; X64-NEXT:    movl $0, %ecx
+; X64-NEXT:    cmovaq %rbx, %rcx
+; X64-NEXT:    testq %rbp, %rbp
+; X64-NEXT:    cmovnsq %rbp, %rax
+; X64-NEXT:    cmovsq %rdx, %rbx
+; X64-NEXT:    cmpq $-1, %rbp
+; X64-NEXT:    cmoveq %rcx, %rbx
+; X64-NEXT:    shrdq $1, %rax, %rbx
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    addq $40, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+;
+; X86-LABEL: func5:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $88, %esp
+; X86-NEXT:    movl 8(%ebp), %ecx
+; X86-NEXT:    movl 12(%ebp), %eax
+; X86-NEXT:    movl 20(%ebp), %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $31, %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $31, %ecx, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shll $31, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __divti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $1, %esi
+; X86-NEXT:    sbbl $0, %edi
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    sets %al
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    sets %ah
+; X86-NEXT:    xorb %al, %ah
+; X86-NEXT:    movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __modti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    setne %al
+; X86-NEXT:    testb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovsl %ebx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovsl %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $2147483647, %ecx # imm = 0x7FFFFFFF
+; X86-NEXT:    cmovsl %edi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    cmovel %ebx, %edx
+; X86-NEXT:    cmpl $-1, %esi
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    cmovbl %esi, %eax
+; X86-NEXT:    cmpl $2147483647, %edi # imm = 0x7FFFFFFF
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovael %ecx, %esi
+; X86-NEXT:    cmovel %eax, %esi
+; X86-NEXT:    movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-NEXT:    cmovael %eax, %edi
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    cmoval %esi, %eax
+; X86-NEXT:    cmpl $-2147483648, %edi # imm = 0x80000000
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmoval %esi, %ecx
+; X86-NEXT:    cmovel %eax, %ecx
+; X86-NEXT:    movl $-2147483648, %eax # imm = 0x80000000
+; X86-NEXT:    cmoval %edi, %eax
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    movl $-2147483648, %ebx # imm = 0x80000000
+; X86-NEXT:    cmovsl %ebx, %edi
+; X86-NEXT:    movl $0, %ebx
+; X86-NEXT:    cmovsl %ebx, %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmpl $-1, %edx
+; X86-NEXT:    cmovel %ecx, %esi
+; X86-NEXT:    cmovel %eax, %edi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+  %tmp = call i64 @llvm.sdiv.fix.sat.i64(i64 %x, i64 %y, i32 31)
+  ret i64 %tmp
+}
+
+define i18 @func6(i16 %x, i16 %y) nounwind {
+;
+; X64-LABEL: func6:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %di, %ecx
+; X64-NEXT:    movswl %si, %esi
+; X64-NEXT:    shll $7, %ecx
+; X64-NEXT:    movl %ecx, %eax
+; X64-NEXT:    cltd
+; X64-NEXT:    idivl %esi
+; X64-NEXT:    # kill: def $eax killed $eax def $rax
+; X64-NEXT:    leal -1(%rax), %edi
+; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    sets %sil
+; X64-NEXT:    testl %ecx, %ecx
+; X64-NEXT:    sets %cl
+; X64-NEXT:    xorb %sil, %cl
+; X64-NEXT:    testl %edx, %edx
+; X64-NEXT:    setne %dl
+; X64-NEXT:    testb %cl, %dl
+; X64-NEXT:    cmovel %eax, %edi
+; X64-NEXT:    cmpl $131071, %edi # imm = 0x1FFFF
+; X64-NEXT:    movl $131071, %ecx # imm = 0x1FFFF
+; X64-NEXT:    cmovll %edi, %ecx
+; X64-NEXT:    cmpl $-131072, %ecx # imm = 0xFFFE0000
+; X64-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
+; X64-NEXT:    cmovgl %ecx, %eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func6:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shll $7, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    cltd
+; X86-NEXT:    idivl %esi
+; X86-NEXT:    leal -1(%eax), %edi
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    sets %bl
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    sets %cl
+; X86-NEXT:    xorb %bl, %cl
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    setne %dl
+; X86-NEXT:    testb %cl, %dl
+; X86-NEXT:    cmovel %eax, %edi
+; X86-NEXT:    cmpl $131071, %edi # imm = 0x1FFFF
+; X86-NEXT:    movl $131071, %ecx # imm = 0x1FFFF
+; X86-NEXT:    cmovll %edi, %ecx
+; X86-NEXT:    cmpl $-131072, %ecx # imm = 0xFFFE0000
+; X86-NEXT:    movl $-131072, %eax # imm = 0xFFFE0000
+; X86-NEXT:    cmovgl %ecx, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    retl
+  %x2 = sext i16 %x to i18
+  %y2 = sext i16 %y to i18
+  %tmp = call i18 @llvm.sdiv.fix.sat.i18(i18 %x2, i18 %y2, i32 7)
+  ret i18 %tmp
+}
+
+define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
+;
+; X64-LABEL: vec:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r15
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %r13
+; X64-NEXT:    pushq %r12
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    subq $104, %rsp
+; X64-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    pxor %xmm2, %xmm2
+; X64-NEXT:    pcmpgtd %xmm0, %xmm2
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
+; X64-NEXT:    paddq %xmm0, %xmm0
+; X64-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    movq %xmm0, %rbp
+; X64-NEXT:    movq %rbp, %r12
+; X64-NEXT:    shrq $33, %r12
+; X64-NEXT:    movq %rbp, %r14
+; X64-NEXT:    sarq $63, %r14
+; X64-NEXT:    shlq $31, %r14
+; X64-NEXT:    orq %r14, %r12
+; X64-NEXT:    pxor %xmm0, %xmm0
+; X64-NEXT:    pcmpgtd %xmm1, %xmm0
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    movq %xmm1, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rbx
+; X64-NEXT:    sarq $63, %rbx
+; X64-NEXT:    shlq $31, %rbp
+; X64-NEXT:    movq %rbp, %rdi
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    callq __divti3
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    subq $1, %r13
+; X64-NEXT:    sbbq $0, %r15
+; X64-NEXT:    shrq $63, %r14
+; X64-NEXT:    xorl %ebx, %r14d
+; X64-NEXT:    movq %rbp, %rdi
+; X64-NEXT:    movq %r12, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rbx, %rcx
+; X64-NEXT:    callq __modti3
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    setne %al
+; X64-NEXT:    testb %r14b, %al
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movl $4294967295, %edx # imm = 0xFFFFFFFF
+; X64-NEXT:    cmpq %rdx, %r13
+; X64-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NEXT:    cmovbq %r13, %rax
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    testq %r15, %r15
+; X64-NEXT:    cmovnsq %rdx, %r13
+; X64-NEXT:    cmoveq %rax, %r13
+; X64-NEXT:    cmovnsq %rcx, %r15
+; X64-NEXT:    movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
+; X64-NEXT:    cmpq %rcx, %r13
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    cmovaq %r13, %rax
+; X64-NEXT:    testq %r15, %r15
+; X64-NEXT:    cmovsq %rcx, %r13
+; X64-NEXT:    cmpq $-1, %r15
+; X64-NEXT:    cmoveq %rax, %r13
+; X64-NEXT:    movq %r13, %xmm0
+; X64-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; X64-NEXT:    # xmm0 = mem[2,3,0,1]
+; X64-NEXT:    movq %xmm0, %r13
+; X64-NEXT:    movq %r13, %rbx
+; X64-NEXT:    shrq $33, %rbx
+; X64-NEXT:    movq %r13, %r14
+; X64-NEXT:    sarq $63, %r14
+; X64-NEXT:    shlq $31, %r14
+; X64-NEXT:    orq %r14, %rbx
+; X64-NEXT:    pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; X64-NEXT:    # xmm0 = mem[2,3,0,1]
+; X64-NEXT:    movq %xmm0, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    sarq $63, %rbp
+; X64-NEXT:    shlq $31, %r13
+; X64-NEXT:    movq %r13, %rdi
+; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    callq __divti3
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    subq $1, %r12
+; X64-NEXT:    sbbq $0, %r15
+; X64-NEXT:    shrq $63, %r14
+; X64-NEXT:    xorl %ebp, %r14d
+; X64-NEXT:    movq %r13, %rdi
+; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    callq __modti3
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    setne %al
+; X64-NEXT:    testb %r14b, %al
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-NEXT:    cmpq %rcx, %r12
+; X64-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NEXT:    cmovbq %r12, %rax
+; X64-NEXT:    testq %r15, %r15
+; X64-NEXT:    cmovnsq %rcx, %r12
+; X64-NEXT:    cmoveq %rax, %r12
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovnsq %rax, %r15
+; X64-NEXT:    movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
+; X64-NEXT:    cmpq %rcx, %r12
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    cmovaq %r12, %rax
+; X64-NEXT:    testq %r15, %r15
+; X64-NEXT:    cmovsq %rcx, %r12
+; X64-NEXT:    cmpq $-1, %r15
+; X64-NEXT:    cmoveq %rax, %r12
+; X64-NEXT:    movq %r12, %xmm0
+; X64-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT:    psrlq $1, %xmm1
+; X64-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X64-NEXT:    # xmm1 = mem[2,3,0,1]
+; X64-NEXT:    pxor %xmm0, %xmm0
+; X64-NEXT:    pcmpgtd %xmm1, %xmm0
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    paddq %xmm1, %xmm1
+; X64-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    movq %xmm1, %r12
+; X64-NEXT:    movq %r12, %rbx
+; X64-NEXT:    shrq $33, %rbx
+; X64-NEXT:    movq %r12, %r14
+; X64-NEXT:    sarq $63, %r14
+; X64-NEXT:    shlq $31, %r14
+; X64-NEXT:    orq %r14, %rbx
+; X64-NEXT:    pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
+; X64-NEXT:    # xmm1 = mem[2,3,0,1]
+; X64-NEXT:    pxor %xmm0, %xmm0
+; X64-NEXT:    pcmpgtd %xmm1, %xmm0
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; X64-NEXT:    movdqa %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    movq %xmm1, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    sarq $63, %rbp
+; X64-NEXT:    shlq $31, %r12
+; X64-NEXT:    movq %r12, %rdi
+; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    callq __divti3
+; X64-NEXT:    movq %rax, %r13
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    subq $1, %r13
+; X64-NEXT:    sbbq $0, %r15
+; X64-NEXT:    shrq $63, %r14
+; X64-NEXT:    xorl %ebp, %r14d
+; X64-NEXT:    movq %r12, %rdi
+; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    callq __modti3
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    setne %al
+; X64-NEXT:    testb %r14b, %al
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r13 # 8-byte Folded Reload
+; X64-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-NEXT:    cmpq %rcx, %r13
+; X64-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NEXT:    cmovbq %r13, %rax
+; X64-NEXT:    testq %r15, %r15
+; X64-NEXT:    cmovnsq %rcx, %r13
+; X64-NEXT:    cmoveq %rax, %r13
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovnsq %rax, %r15
+; X64-NEXT:    movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
+; X64-NEXT:    cmpq %rcx, %r13
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    cmovaq %r13, %rax
+; X64-NEXT:    testq %r15, %r15
+; X64-NEXT:    cmovsq %rcx, %r13
+; X64-NEXT:    cmpq $-1, %r15
+; X64-NEXT:    cmoveq %rax, %r13
+; X64-NEXT:    movq %r13, %xmm0
+; X64-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; X64-NEXT:    pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; X64-NEXT:    # xmm0 = mem[2,3,0,1]
+; X64-NEXT:    movq %xmm0, %r13
+; X64-NEXT:    movq %r13, %rbx
+; X64-NEXT:    shrq $33, %rbx
+; X64-NEXT:    movq %r13, %r14
+; X64-NEXT:    sarq $63, %r14
+; X64-NEXT:    shlq $31, %r14
+; X64-NEXT:    orq %r14, %rbx
+; X64-NEXT:    pshufd $78, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
+; X64-NEXT:    # xmm0 = mem[2,3,0,1]
+; X64-NEXT:    movq %xmm0, %rdx
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %rbp
+; X64-NEXT:    sarq $63, %rbp
+; X64-NEXT:    shlq $31, %r13
+; X64-NEXT:    movq %r13, %rdi
+; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    callq __divti3
+; X64-NEXT:    movq %rax, %r12
+; X64-NEXT:    movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    movq %rdx, %r15
+; X64-NEXT:    movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
+; X64-NEXT:    subq $1, %r12
+; X64-NEXT:    sbbq $0, %r15
+; X64-NEXT:    shrq $63, %r14
+; X64-NEXT:    xorl %ebp, %r14d
+; X64-NEXT:    movq %r13, %rdi
+; X64-NEXT:    movq %rbx, %rsi
+; X64-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rdx # 8-byte Reload
+; X64-NEXT:    movq %rbp, %rcx
+; X64-NEXT:    callq __modti3
+; X64-NEXT:    orq %rax, %rdx
+; X64-NEXT:    setne %al
+; X64-NEXT:    testb %r14b, %al
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r15 # 8-byte Folded Reload
+; X64-NEXT:    cmoveq {{[-0-9]+}}(%r{{[sb]}}p), %r12 # 8-byte Folded Reload
+; X64-NEXT:    movl $4294967295, %ecx # imm = 0xFFFFFFFF
+; X64-NEXT:    cmpq %rcx, %r12
+; X64-NEXT:    movl $4294967295, %eax # imm = 0xFFFFFFFF
+; X64-NEXT:    cmovbq %r12, %rax
+; X64-NEXT:    testq %r15, %r15
+; X64-NEXT:    cmovnsq %rcx, %r12
+; X64-NEXT:    cmoveq %rax, %r12
+; X64-NEXT:    movl $0, %eax
+; X64-NEXT:    cmovnsq %rax, %r15
+; X64-NEXT:    movabsq $-4294967296, %rcx # imm = 0xFFFFFFFF00000000
+; X64-NEXT:    cmpq %rcx, %r12
+; X64-NEXT:    movq %rcx, %rax
+; X64-NEXT:    cmovaq %r12, %rax
+; X64-NEXT:    testq %r15, %r15
+; X64-NEXT:    cmovsq %rcx, %r12
+; X64-NEXT:    cmpq $-1, %r15
+; X64-NEXT:    cmoveq %rax, %r12
+; X64-NEXT:    movq %r12, %xmm0
+; X64-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
+; X64-NEXT:    psrlq $1, %xmm1
+; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
+; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2]
+; X64-NEXT:    addq $104, %rsp
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r12
+; X64-NEXT:    popq %r13
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %r15
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+;
+; X86-LABEL: vec:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $256, %esp # imm = 0x100
+; X86-NEXT:    movl 24(%ebp), %ecx
+; X86-NEXT:    movl 40(%ebp), %ebx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    adcl %eax, %eax
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    shrl %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $31, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negl %edi
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __modti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl 36(%ebp), %edi
+; X86-NEXT:    movl %edi, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    movl 20(%ebp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    adcl %eax, %eax
+; X86-NEXT:    movl %ecx, %esi
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    shrl %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $31, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negl %ebx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __modti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl 28(%ebp), %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    movl 12(%ebp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    adcl %eax, %eax
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    shrl %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $31, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    negl %esi
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __divti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl 32(%ebp), %edx
+; X86-NEXT:    movl %edx, %edi
+; X86-NEXT:    sarl $31, %edi
+; X86-NEXT:    movl 16(%ebp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    adcl %eax, %eax
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    andl $1, %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    shrl %ecx
+; X86-NEXT:    subl %eax, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $31, %ebx
+; X86-NEXT:    negl %esi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __modti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl 32(%ebp)
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __divti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl 40(%ebp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __divti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl 36(%ebp)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __divti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %ecx
+; X86-NEXT:    sbbl $0, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    testl %ebx, %ebx
+; X86-NEXT:    sets %bl
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    sets %bh
+; X86-NEXT:    xorb %bl, %bh
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    orl %eax, %edx
+; X86-NEXT:    setne %al
+; X86-NEXT:    testb %bh, %al
+; X86-NEXT:    cmovel %esi, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    sbbl $0, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    sets %bl
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    sets %bh
+; X86-NEXT:    xorb %bl, %bh
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl %edi, %eax
+; X86-NEXT:    setne %al
+; X86-NEXT:    testb %bh, %al
+; X86-NEXT:    cmovel %edx, %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $1, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    sets %al
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    sets %bl
+; X86-NEXT:    xorb %al, %bl
+; X86-NEXT:    leal {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl 28(%ebp)
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll __modti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    orl %eax, %ecx
+; X86-NEXT:    setne %al
+; X86-NEXT:    testb %bl, %al
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    subl $1, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    movl %edi, %eax
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    sbbl $0, %ecx
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    sets %bl
+; X86-NEXT:    cmpl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
+; X86-NEXT:    sets %bh
+; X86-NEXT:    xorb %bl, %bh
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    setne %al
+; X86-NEXT:    testb %bh, %al
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel %edi, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    cmovsl %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    cmovsl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:    sarl $31, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %esi
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    cmovel %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    cmovsl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    sarl $31, %ebx
+; X86-NEXT:    movl %ebx, %edx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    cmovel %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    cmovsl %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    cmovsl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    sarl $31, %esi
+; X86-NEXT:    movl %esi, %edx
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    cmovel %eax, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovsl %eax, %edi
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmovsl %edx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    testl %ecx, %ecx
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl %edx, %ecx
+; X86-NEXT:    cmpl $-1, %edx
+; X86-NEXT:    movl $-1, %eax
+; X86-NEXT:    cmovael %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovbl %edx, %ecx
+; X86-NEXT:    andl %edx, %esi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovel %ecx, %esi
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmoval %eax, %ecx
+; X86-NEXT:    cmpl $-1, %esi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    cmovnel %edx, %ecx
+; X86-NEXT:    testl %edi, %edi
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovsl %edx, %esi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    cmovsl %edx, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT:    cmpl $-1, %edi
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    cmovnel %esi, %edi
+; X86-NEXT:    shldl $31, %eax, %edi
+; X86-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    cmpl $1, %esi
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovbl %esi, %ecx
+; X86-NEXT:    andl %esi, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovel %ecx, %ebx
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmoval %eax, %ecx
+; X86-NEXT:    cmpl $-1, %ebx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovnel %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    movl $-1, %edx
+; X86-NEXT:    cmovsl %edx, %ebx
+; X86-NEXT:    cmovsl %edi, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    cmpl $-1, %esi
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    cmovnel %ebx, %esi
+; X86-NEXT:    shldl $31, %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    cmovael %edx, %eax
+; X86-NEXT:    movl $-1, %ebx
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovbl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT:    andl %edx, %edi
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovel %ecx, %edi
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmoval %eax, %ecx
+; X86-NEXT:    cmpl $-1, %edi
+; X86-NEXT:    movl $0, %edx
+; X86-NEXT:    cmovnel %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT:    testl %esi, %esi
+; X86-NEXT:    cmovsl %ebx, %edi
+; X86-NEXT:    cmovsl %edx, %eax
+; X86-NEXT:    andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT:    cmpl $-1, %esi
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    cmovnel %edi, %esi
+; X86-NEXT:    shldl $31, %eax, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    cmovael %ebx, %eax
+; X86-NEXT:    movl $-1, %esi
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl $0, %eax
+; X86-NEXT:    sbbl %eax, %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    orl %ecx, %eax
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmovbl %edx, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    andl %edx, %ebx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT:    cmovel %ecx, %ebx
+; X86-NEXT:    cmovnel {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT:    testl %eax, %eax
+; X86-NEXT:    movl $0, %ecx
+; X86-NEXT:    cmoval %eax, %ecx
+; X86-NEXT:    cmpl $-1, %ebx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    cmovnel %edi, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    cmovsl %esi, %ebx
+; X86-NEXT:    movl %ebx, %esi
+; X86-NEXT:    cmovsl %edi, %eax
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT:    andl %edx, %ebx
+; X86-NEXT:    cmpl $-1, %ebx
+; X86-NEXT:    cmovel %ecx, %eax
+; X86-NEXT:    cmovnel %esi, %ebx
+; X86-NEXT:    shldl $31, %eax, %ebx
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl %ebx, 12(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 8(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, 4(%eax)
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    leal -12(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %tmp = call <4 x i32> @llvm.sdiv.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 31)
+  ret <4 x i32> %tmp
+}

diff  --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
new file mode 100644
index 000000000000..5c15bde7cc6a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll
@@ -0,0 +1,528 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686 -mattr=cmov | FileCheck %s --check-prefix=X86
+
+declare  i4  @llvm.udiv.fix.sat.i4   (i4,  i4,  i32)
+declare  i15 @llvm.udiv.fix.sat.i15  (i15, i15, i32)
+declare  i16 @llvm.udiv.fix.sat.i16  (i16, i16, i32)
+declare  i18 @llvm.udiv.fix.sat.i18  (i18, i18, i32)
+declare  i64 @llvm.udiv.fix.sat.i64  (i64, i64, i32)
+declare  <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32>, <4 x i32>, i32)
+
+define i16 @func(i16 %x, i16 %y) nounwind {
+; X64-LABEL: func:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %si, %ecx
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    shll $8, %eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divl %ecx
+; X64-NEXT:    cmpl $131071, %eax # imm = 0x1FFFF
+; X64-NEXT:    movl $131071, %ecx # imm = 0x1FFFF
+; X64-NEXT:    cmovael %ecx, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movzwl %ax, %eax
+; X86-NEXT:    shll $8, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    divl %ecx
+; X86-NEXT:    cmpl $131071, %eax # imm = 0x1FFFF
+; X86-NEXT:    movl $131071, %ecx # imm = 0x1FFFF
+; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+  %tmp = call i16 @llvm.udiv.fix.sat.i16(i16 %x, i16 %y, i32 7)
+  ret i16 %tmp
+}
+
+define i16 @func2(i8 %x, i8 %y) nounwind {
+; X64-LABEL: func2:
+; X64:       # %bb.0:
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    andl $32767, %eax # imm = 0x7FFF
+; X64-NEXT:    movsbl %sil, %ecx
+; X64-NEXT:    andl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    shll $14, %eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divl %ecx
+; X64-NEXT:    cmpl $32767, %eax # imm = 0x7FFF
+; X64-NEXT:    movl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmovbl %eax, %ecx
+; X64-NEXT:    addl %ecx, %ecx
+; X64-NEXT:    movswl %cx, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func2:
+; X86:       # %bb.0:
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    shll $14, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    divl %ecx
+; X86-NEXT:    cmpl $32767, %eax # imm = 0x7FFF
+; X86-NEXT:    movl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT:    cmovbl %eax, %ecx
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    movswl %cx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+  %x2 = sext i8 %x to i15
+  %y2 = sext i8 %y to i15
+  %tmp = call i15 @llvm.udiv.fix.sat.i15(i15 %x2, i15 %y2, i32 14)
+  %tmp2 = sext i15 %tmp to i16
+  ret i16 %tmp2
+}
+
+define i16 @func3(i15 %x, i8 %y) nounwind {
+; X64-LABEL: func3:
+; X64:       # %bb.0:
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rdi,%rdi), %eax
+; X64-NEXT:    movzbl %sil, %ecx
+; X64-NEXT:    shll $4, %ecx
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divw %cx
+; X64-NEXT:    # kill: def $ax killed $ax def $eax
+; X64-NEXT:    movzwl %ax, %ecx
+; X64-NEXT:    cmpl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    movl $32767, %ecx # imm = 0x7FFF
+; X64-NEXT:    cmovbl %eax, %ecx
+; X64-NEXT:    addl %ecx, %ecx
+; X64-NEXT:    movswl %cx, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func3:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    addl %eax, %eax
+; X86-NEXT:    movzbl %cl, %ecx
+; X86-NEXT:    shll $4, %ecx
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    divw %cx
+; X86-NEXT:    # kill: def $ax killed $ax def $eax
+; X86-NEXT:    movzwl %ax, %ecx
+; X86-NEXT:    cmpl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT:    movl $32767, %ecx # imm = 0x7FFF
+; X86-NEXT:    cmovbl %eax, %ecx
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    movswl %cx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+  %y2 = sext i8 %y to i15
+  %y3 = shl i15 %y2, 7
+  %tmp = call i15 @llvm.udiv.fix.sat.i15(i15 %x, i15 %y3, i32 4)
+  %tmp2 = sext i15 %tmp to i16
+  ret i16 %tmp2
+}
+
+define i4 @func4(i4 %x, i4 %y) nounwind {
+; X64-LABEL: func4:
+; X64:       # %bb.0:
+; X64-NEXT:    andb $15, %sil
+; X64-NEXT:    andb $15, %dil
+; X64-NEXT:    shlb $2, %dil
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    divb %sil
+; X64-NEXT:    movzbl %al, %ecx
+; X64-NEXT:    cmpb $15, %cl
+; X64-NEXT:    movl $15, %eax
+; X64-NEXT:    cmovbl %ecx, %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func4:
+; X86:       # %bb.0:
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
+; X86-NEXT:    andb $15, %cl
+; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    andb $15, %al
+; X86-NEXT:    shlb $2, %al
+; X86-NEXT:    movzbl %al, %eax
+; X86-NEXT:    divb %cl
+; X86-NEXT:    movzbl %al, %ecx
+; X86-NEXT:    cmpb $15, %al
+; X86-NEXT:    movl $15, %eax
+; X86-NEXT:    cmovbl %ecx, %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
+; X86-NEXT:    retl
+  %tmp = call i4 @llvm.udiv.fix.sat.i4(i4 %x, i4 %y, i32 2)
+  ret i4 %tmp
+}
+
+define i64 @func5(i64 %x, i64 %y) nounwind {
+; X64-LABEL: func5:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movq %rsi, %rdx
+; X64-NEXT:    leaq (%rdi,%rdi), %rsi
+; X64-NEXT:    shrq $33, %rsi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $32, %rax
+; X64-NEXT:    andl $-2147483648, %eax # imm = 0x80000000
+; X64-NEXT:    orq %rax, %rsi
+; X64-NEXT:    shlq $32, %rdi
+; X64-NEXT:    xorl %ebx, %ebx
+; X64-NEXT:    xorl %ecx, %ecx
+; X64-NEXT:    callq __udivti3
+; X64-NEXT:    cmpq $-1, %rax
+; X64-NEXT:    movq $-1, %rcx
+; X64-NEXT:    cmovbq %rax, %rcx
+; X64-NEXT:    cmpq $1, %rdx
+; X64-NEXT:    movl $1, %esi
+; X64-NEXT:    cmovbq %rdx, %rsi
+; X64-NEXT:    sbbq %rbx, %rbx
+; X64-NEXT:    notq %rbx
+; X64-NEXT:    orq %rax, %rbx
+; X64-NEXT:    cmpq $1, %rdx
+; X64-NEXT:    cmoveq %rcx, %rbx
+; X64-NEXT:    shrdq $1, %rsi, %rbx
+; X64-NEXT:    movq %rbx, %rax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+;
+; X86-LABEL: func5:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    movl %esp, %ebp
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    andl $-8, %esp
+; X86-NEXT:    subl $24, %esp
+; X86-NEXT:    movl 8(%ebp), %eax
+; X86-NEXT:    movl 12(%ebp), %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    shrl %edx
+; X86-NEXT:    shldl $31, %eax, %ecx
+; X86-NEXT:    shll $31, %eax
+; X86-NEXT:    movl %esp, %esi
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl 20(%ebp)
+; X86-NEXT:    pushl 16(%ebp)
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    calll __udivti3
+; X86-NEXT:    addl $32, %esp
+; X86-NEXT:    movl (%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    movl $-1, %esi
+; X86-NEXT:    cmovbl %eax, %esi
+; X86-NEXT:    cmpl $-1, %edx
+; X86-NEXT:    cmovel %edx, %eax
+; X86-NEXT:    cmovel %esi, %eax
+; X86-NEXT:    cmovael %ecx, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    cmovnel %ecx, %edx
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    leal -4(%ebp), %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl
+  %tmp = call i64 @llvm.udiv.fix.sat.i64(i64 %x, i64 %y, i32 31)
+  ret i64 %tmp
+}
+
+define i18 @func6(i16 %x, i16 %y) nounwind {
+; X64-LABEL: func6:
+; X64:       # %bb.0:
+; X64-NEXT:    movswl %di, %eax
+; X64-NEXT:    andl $262143, %eax # imm = 0x3FFFF
+; X64-NEXT:    movswl %si, %ecx
+; X64-NEXT:    andl $262143, %ecx # imm = 0x3FFFF
+; X64-NEXT:    shll $7, %eax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divl %ecx
+; X64-NEXT:    cmpl $262143, %eax # imm = 0x3FFFF
+; X64-NEXT:    movl $262143, %ecx # imm = 0x3FFFF
+; X64-NEXT:    cmovael %ecx, %eax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func6:
+; X86:       # %bb.0:
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    andl $262143, %ecx # imm = 0x3FFFF
+; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl $262143, %eax # imm = 0x3FFFF
+; X86-NEXT:    shll $7, %eax
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    divl %ecx
+; X86-NEXT:    cmpl $262143, %eax # imm = 0x3FFFF
+; X86-NEXT:    movl $262143, %ecx # imm = 0x3FFFF
+; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    retl
+  %x2 = sext i16 %x to i18
+  %y2 = sext i16 %y to i18
+  %tmp = call i18 @llvm.udiv.fix.sat.i18(i18 %x2, i18 %y2, i32 7)
+  ret i18 %tmp
+}
+
+define i16 @func7(i16 %x, i16 %y) nounwind {
+; X64-LABEL: func7:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl %si, %ecx
+; X64-NEXT:    movzwl %di, %eax
+; X64-NEXT:    addl %eax, %eax
+; X64-NEXT:    shlq $16, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divq %rcx
+; X64-NEXT:    cmpq $131071, %rax # imm = 0x1FFFF
+; X64-NEXT:    movl $131071, %ecx # imm = 0x1FFFF
+; X64-NEXT:    cmovaeq %rcx, %rax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $rax
+; X64-NEXT:    retq
+;
+; X86-LABEL: func7:
+; X86:       # %bb.0:
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movzwl %cx, %ecx
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    shrl $16, %edx
+; X86-NEXT:    shll $16, %ecx
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %edx
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    calll __udivdi3
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    cmpl $131071, %eax # imm = 0x1FFFF
+; X86-NEXT:    movl $131071, %ecx # imm = 0x1FFFF
+; X86-NEXT:    cmovael %ecx, %eax
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    cmovnel %ecx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+  %tmp = call i16 @llvm.udiv.fix.sat.i16(i16 %x, i16 %y, i32 16)
+  ret i16 %tmp
+}
+
+define <4 x i32> @vec(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X64-LABEL: vec:
+; X64:       # %bb.0:
+; X64-NEXT:    pxor %xmm8, %xmm8
+; X64-NEXT:    movdqa %xmm1, %xmm2
+; X64-NEXT:    punpckhdq {{.*#+}} xmm2 = xmm2[2],xmm8[2],xmm2[3],xmm8[3]
+; X64-NEXT:    movq %xmm2, %rcx
+; X64-NEXT:    movdqa %xmm0, %xmm4
+; X64-NEXT:    punpckhdq {{.*#+}} xmm4 = xmm4[2],xmm8[2],xmm4[3],xmm8[3]
+; X64-NEXT:    paddq %xmm4, %xmm4
+; X64-NEXT:    psllq $31, %xmm4
+; X64-NEXT:    movq %xmm4, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divq %rcx
+; X64-NEXT:    movq %rax, %xmm7
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[2,3,0,1]
+; X64-NEXT:    movq %xmm2, %rcx
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm4[2,3,0,1]
+; X64-NEXT:    movq %xmm2, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divq %rcx
+; X64-NEXT:    movq %rax, %xmm2
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm7 = xmm7[0],xmm2[0]
+; X64-NEXT:    movdqa {{.*#+}} xmm4 = [9223372039002259456,9223372039002259456]
+; X64-NEXT:    movdqa %xmm7, %xmm2
+; X64-NEXT:    pxor %xmm4, %xmm2
+; X64-NEXT:    movdqa {{.*#+}} xmm9 = [9223372043297226751,9223372043297226751]
+; X64-NEXT:    movdqa %xmm9, %xmm6
+; X64-NEXT:    pcmpgtd %xmm2, %xmm6
+; X64-NEXT:    pshufd {{.*#+}} xmm3 = xmm6[0,0,2,2]
+; X64-NEXT:    pcmpeqd %xmm9, %xmm2
+; X64-NEXT:    pshufd {{.*#+}} xmm5 = xmm2[1,1,3,3]
+; X64-NEXT:    pand %xmm3, %xmm5
+; X64-NEXT:    pshufd {{.*#+}} xmm2 = xmm6[1,1,3,3]
+; X64-NEXT:    por %xmm5, %xmm2
+; X64-NEXT:    movdqa {{.*#+}} xmm6 = [8589934591,8589934591]
+; X64-NEXT:    pand %xmm2, %xmm7
+; X64-NEXT:    pandn %xmm6, %xmm2
+; X64-NEXT:    por %xmm7, %xmm2
+; X64-NEXT:    psrlq $1, %xmm2
+; X64-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm8[0],xmm1[1],xmm8[1]
+; X64-NEXT:    movq %xmm1, %rcx
+; X64-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm8[0],xmm0[1],xmm8[1]
+; X64-NEXT:    paddq %xmm0, %xmm0
+; X64-NEXT:    psllq $31, %xmm0
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divq %rcx
+; X64-NEXT:    movq %rax, %xmm3
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1]
+; X64-NEXT:    movq %xmm1, %rcx
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
+; X64-NEXT:    movq %xmm0, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    divq %rcx
+; X64-NEXT:    movq %rax, %xmm0
+; X64-NEXT:    punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm0[0]
+; X64-NEXT:    pxor %xmm3, %xmm4
+; X64-NEXT:    movdqa %xmm9, %xmm0
+; X64-NEXT:    pcmpgtd %xmm4, %xmm0
+; X64-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[0,0,2,2]
+; X64-NEXT:    pcmpeqd %xmm9, %xmm4
+; X64-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
+; X64-NEXT:    pand %xmm1, %xmm4
+; X64-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; X64-NEXT:    por %xmm4, %xmm0
+; X64-NEXT:    pand %xmm0, %xmm3
+; X64-NEXT:    pandn %xmm6, %xmm0
+; X64-NEXT:    por %xmm3, %xmm0
+; X64-NEXT:    psrlq $1, %xmm0
+; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,2],xmm2[0,2]
+; X64-NEXT:    retq
+;
+; X86-LABEL: vec:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    subl $16, %esp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    addl %ecx, %ecx
+; X86-NEXT:    setb %al
+; X86-NEXT:    shldl $31, %ecx, %eax
+; X86-NEXT:    shll $31, %ecx
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    calll __udivdi3
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovbl %eax, %ecx
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    notl %edi
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    movl %edi, %ebx
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    addl %esi, %esi
+; X86-NEXT:    setb %al
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X86-NEXT:    cmovel %ecx, %ebx
+; X86-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $1, %ecx
+; X86-NEXT:    cmovael %ecx, %edx
+; X86-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    shldl $31, %esi, %eax
+; X86-NEXT:    shll $31, %esi
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    calll __udivdi3
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovbl %eax, %ecx
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl $1, %esi
+; X86-NEXT:    cmovbl %edx, %esi
+; X86-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT:    movl $0, %esi
+; X86-NEXT:    sbbl %esi, %esi
+; X86-NEXT:    notl %esi
+; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    addl %edi, %edi
+; X86-NEXT:    setb %al
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; X86-NEXT:    cmovel %ecx, %esi
+; X86-NEXT:    shldl $31, %edi, %eax
+; X86-NEXT:    shll $31, %edi
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    pushl %edi
+; X86-NEXT:    calll __udivdi3
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    movl $-1, %ebx
+; X86-NEXT:    cmovbl %eax, %ebx
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl $0, %edi
+; X86-NEXT:    sbbl %edi, %edi
+; X86-NEXT:    notl %edi
+; X86-NEXT:    orl %eax, %edi
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    addl %ebp, %ebp
+; X86-NEXT:    setb %cl
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl %edx, %eax
+; X86-NEXT:    movl $1, %edx
+; X86-NEXT:    cmovael %edx, %eax
+; X86-NEXT:    movl %eax, (%esp) # 4-byte Spill
+; X86-NEXT:    cmovel %ebx, %edi
+; X86-NEXT:    shldl $31, %ebp, %ecx
+; X86-NEXT:    shll $31, %ebp
+; X86-NEXT:    pushl $0
+; X86-NEXT:    pushl {{[0-9]+}}(%esp)
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    pushl %ebp
+; X86-NEXT:    calll __udivdi3
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    cmpl $-1, %eax
+; X86-NEXT:    movl $-1, %ecx
+; X86-NEXT:    cmovbl %eax, %ecx
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl $1, %ebx
+; X86-NEXT:    cmovbl %edx, %ebx
+; X86-NEXT:    movl $0, %ebp
+; X86-NEXT:    sbbl %ebp, %ebp
+; X86-NEXT:    notl %ebp
+; X86-NEXT:    orl %eax, %ebp
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    cmovel %ecx, %ebp
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $1, %eax, %ecx
+; X86-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $1, %eax, %esi
+; X86-NEXT:    movl (%esp), %eax # 4-byte Reload
+; X86-NEXT:    shrdl $1, %eax, %edi
+; X86-NEXT:    shrdl $1, %ebx, %ebp
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %ebp, 12(%eax)
+; X86-NEXT:    movl %edi, 8(%eax)
+; X86-NEXT:    movl %esi, 4(%eax)
+; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    addl $16, %esp
+; X86-NEXT:    popl %esi
+; X86-NEXT:    popl %edi
+; X86-NEXT:    popl %ebx
+; X86-NEXT:    popl %ebp
+; X86-NEXT:    retl $4
+  %tmp = call <4 x i32> @llvm.udiv.fix.sat.v4i32(<4 x i32> %x, <4 x i32> %y, i32 31)
+  ret <4 x i32> %tmp
+}