[llvm] r362112 - [FPEnv] Added a special UnrollVectorOp method to deal with the chain on StrictFP opcodes

Kevin P. Neal via llvm-commits llvm-commits at lists.llvm.org
Thu May 30 09:44:48 PDT 2019


Author: kpn
Date: Thu May 30 09:44:47 2019
New Revision: 362112

URL: http://llvm.org/viewvc/llvm-project?rev=362112&view=rev
Log:
[FPEnv] Added a special UnrollVectorOp method to deal with the chain on StrictFP opcodes

This change creates UnrollVectorOp_StrictFP. The purpose of this is to address a failure that consistently occurs when calling StrictFP functions on vectors whose number of elements is 3 + 2n on most platforms, such as PowerPC or SystemZ. The old UnrollVectorOp method does not expect that the vector that it will unroll will have a chain, so it has an assert that prevents it from running if this is the case. This new StrictFP version of the method deals with the chain while unrolling the vector. With this new function in place during vector widending, llc can run vector-constrained-fp-intrinsics.ll for SystemZ successfully.

Submitted by:	Drew Wock <drew.wock at sas.com>
Reviewed by:	Cameron McInally, Kevin P. Neal
Approved by:	Cameron McInally
Differential Revision:	http://reviews.llvm.org/D62546

Added:
    llvm/trunk/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
    llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h?rev=362112&r1=362111&r2=362112&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeTypes.h Thu May 30 09:44:47 2019
@@ -857,6 +857,11 @@ private:
   SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
   SDValue WidenVecOp_VECREDUCE(SDNode *N);
 
+  /// Helper function to generate a set of operations to perform
+  /// a vector operation for a wider type.
+  ///
+  SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
+
   //===--------------------------------------------------------------------===//
   // Vector Widening Utilities Support: LegalizeVectorTypes.cpp
   //===--------------------------------------------------------------------===//

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp?rev=362112&r1=362111&r2=362112&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp Thu May 30 09:44:47 2019
@@ -1318,6 +1318,63 @@ void DAGTypeLegalizer::SplitVecRes_Stric
   ReplaceValueWith(SDValue(N, 1), Chain);
 }
 
+SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
+  SDValue Chain = N->getOperand(0);
+  EVT VT = N->getValueType(0);
+  unsigned NE = VT.getVectorNumElements();
+  EVT EltVT = VT.getVectorElementType();
+  SDLoc dl(N);
+
+  SmallVector<SDValue, 8> Scalars;
+  SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+  // If ResNE is 0, fully unroll the vector op.
+  if (ResNE == 0)
+    ResNE = NE;
+  else if (NE > ResNE)
+    NE = ResNE;
+
+  //The results of each unrolled operation, including the chain.
+  EVT ChainVTs[] = {EltVT, MVT::Other};
+  SmallVector<SDValue, 8> Chains;
+
+  unsigned i;
+  for (i = 0; i != NE; ++i) {
+    Operands[0] = Chain;
+    for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
+      SDValue Operand = N->getOperand(j);
+      EVT OperandVT = Operand.getValueType();
+      if (OperandVT.isVector()) {
+        EVT OperandEltVT = OperandVT.getVectorElementType();
+        Operands[j] =
+            DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+                    DAG.getConstant(i, dl, TLI.getVectorIdxTy(
+                          DAG.getDataLayout())));
+      } else {
+        Operands[j] = Operand;
+      }
+    }
+    SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
+    Scalar.getNode()->setFlags(N->getFlags());
+
+    //Add in the scalar as well as its chain value to the
+    //result vectors.
+    Scalars.push_back(Scalar);
+    Chains.push_back(Scalar.getValue(1));
+  }
+
+  for (; i < ResNE; ++i)
+    Scalars.push_back(DAG.getUNDEF(EltVT));
+
+  // Build a new factor node to connect the chain back together.
+  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+  ReplaceValueWith(SDValue(N, 1), Chain);
+
+  // Create a new BUILD_VECTOR node
+  EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
+  return DAG.getBuildVector(VecVT, dl, Scalars);
+}
+
 void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
                                               SDValue &Lo, SDValue &Hi) {
   SDLoc dl(N);
@@ -2968,7 +3025,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_St
 
   // No legal vector version so unroll the vector operation and then widen.
   if (NumElts == 1)
-    return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+    return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
 
   // Since the operation can trap, apply operation on the original vector.
   EVT MaxVT = VT;

Added: llvm/trunk/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll?rev=362112&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll Thu May 30 09:44:47 2019
@@ -0,0 +1,6445 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O3 -mtriple=s390x-linux-gnu < %s | FileCheck --check-prefix=S390X %s
+; RUN: llc -O3 -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck --check-prefix=SZ13 %s
+
+define <1 x float> @constrained_vector_fdiv_v1f32() {
+; S390X-LABEL: constrained_vector_fdiv_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI0_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI0_1
+; S390X-NEXT:    deb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI0_0
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    deb %f0, 0(%r1)
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <1 x float> @llvm.experimental.constrained.fdiv.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %div
+}
+
+define <2 x double> @constrained_vector_fdiv_v2f64() {
+; S390X-LABEL: constrained_vector_fdiv_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI1_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI1_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI1_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ddbr %f0, %f1
+; S390X-NEXT:    ddbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI1_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI1_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %div
+}
+
+define <3 x float> @constrained_vector_fdiv_v3f32() {
+; S390X-LABEL: constrained_vector_fdiv_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI2_0
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_1
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_2
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI2_3
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    debr %f0, %f1
+; S390X-NEXT:    debr %f2, %f1
+; S390X-NEXT:    debr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI2_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI2_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    debr %f1, %f0
+; SZ13-NEXT:    vgmf %v2, 2, 8
+; SZ13-NEXT:    vgmf %v3, 1, 1
+; SZ13-NEXT:    debr %f2, %f0
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    debr %f3, %f0
+; SZ13-NEXT:    vmrhf %v0, %v2, %v3
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <3 x float> @llvm.experimental.constrained.fdiv.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %div
+}
+
+define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fdiv_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI3_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI3_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI3_0
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ddb %f1, 16(%r2)
+; S390X-NEXT:    ddb %f0, 8(%r2)
+; S390X-NEXT:    ddb %f2, 0(%r2)
+; S390X-NEXT:    std %f1, 16(%r2)
+; S390X-NEXT:    std %f0, 8(%r2)
+; S390X-NEXT:    std %f2, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI3_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v0, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI3_1
+; SZ13-NEXT:    ldeb %f1, 0(%r1)
+; SZ13-NEXT:    ddb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %div = call <3 x double> @llvm.experimental.constrained.fdiv.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %div, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fdiv_v4f64() {
+; S390X-LABEL: constrained_vector_fdiv_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI4_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_1
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI4_4
+; S390X-NEXT:    ldeb %f6, 0(%r1)
+; S390X-NEXT:    ddbr %f0, %f1
+; S390X-NEXT:    ddbr %f2, %f1
+; S390X-NEXT:    ddbr %f4, %f1
+; S390X-NEXT:    ddbr %f6, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fdiv_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI4_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI4_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v24, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI4_2
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfddb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %div = call <4 x double> @llvm.experimental.constrained.fdiv.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %div
+}
+
+define <1 x float> @constrained_vector_frem_v1f32() {
+; S390X-LABEL: constrained_vector_frem_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI5_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI5_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmodf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI5_0
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    brasl %r14, fmodf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <1 x float> @llvm.experimental.constrained.frem.v1f32(
+           <1 x float> <float 1.000000e+00>,
+           <1 x float> <float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %rem
+}
+
+define <2 x double> @constrained_vector_frem_v2f64() {
+; S390X-LABEL: constrained_vector_frem_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI6_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI6_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    larl %r1, .LCPI6_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -184
+; SZ13-NEXT:    .cfi_def_cfa_offset 344
+; SZ13-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI6_0
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 296(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <2 x double> @llvm.experimental.constrained.frem.v2f64(
+           <2 x double> <double 1.000000e+00, double 2.000000e+00>,
+           <2 x double> <double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %rem
+}
+
+define <3 x float> @constrained_vector_frem_v3f32() {
+; S390X-LABEL: constrained_vector_frem_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI7_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI7_1
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf at PLT
+; S390X-NEXT:    larl %r1, .LCPI7_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf at PLT
+; S390X-NEXT:    larl %r1, .LCPI7_3
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmodf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI7_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI7_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmf %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmodf at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %rem = call <3 x float> @llvm.experimental.constrained.frem.v3f32(
+           <3 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>,
+           <3 x float> <float 1.000000e+01, float 1.000000e+01, float 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %rem
+}
+
+define void @constrained_vector_frem_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_frem_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f2, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI8_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    larl %r1, .LCPI8_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    larl %r1, .LCPI8_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v2, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v2, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    larl %r1, .LCPI8_0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %rem = call <3 x double> @llvm.experimental.constrained.frem.v3f64(
+           <3 x double> <double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %rem, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_frem_v4f64() {
+; S390X-LABEL: constrained_vector_frem_v4f64:
+; S390X:       # %bb.0:
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    larl %r1, .LCPI9_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI9_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    larl %r1, .LCPI9_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    larl %r1, .LCPI9_3
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    larl %r1, .LCPI9_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, fmod at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ldr %f2, %f10
+; S390X-NEXT:    ldr %f4, %f11
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_frem_v4f64:
+; SZ13:       # %bb.0:
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI9_0
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    vgmg %v0, 1, 1
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vgmg %v0, 2, 11
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI9_1
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    larl %r1, .LCPI9_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmod at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+  %rem = call <4 x double> @llvm.experimental.constrained.frem.v4f64(
+           <4 x double> <double 1.000000e+00, double 2.000000e+00,
+                         double 3.000000e+00, double 4.000000e+00>,
+           <4 x double> <double 1.000000e+01, double 1.000000e+01,
+                         double 1.000000e+01, double 1.000000e+01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %rem
+}
+
+define <1 x float> @constrained_vector_fmul_v1f32() {
+; S390X-LABEL: constrained_vector_fmul_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI10_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI10_1
+; S390X-NEXT:    meeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 1, 1
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    meebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <1 x float> @llvm.experimental.constrained.fmul.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 2.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %mul
+}
+
+define <2 x double> @constrained_vector_fmul_v2f64() {
+; S390X-LABEL: constrained_vector_fmul_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI11_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI11_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI11_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    mdbr %f0, %f1
+; S390X-NEXT:    mdbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI11_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI11_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfmdb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 2.000000e+00, double 3.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %mul
+}
+
+define <3 x float> @constrained_vector_fmul_v3f32() {
+; S390X-LABEL: constrained_vector_fmul_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI12_0
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_1
+; S390X-NEXT:    ler %f0, %f4
+; S390X-NEXT:    meeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_2
+; S390X-NEXT:    ler %f2, %f4
+; S390X-NEXT:    meeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI12_3
+; S390X-NEXT:    meeb %f4, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 1, 8
+; SZ13-NEXT:    larl %r1, .LCPI12_0
+; SZ13-NEXT:    vgmf %v2, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    meeb %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI12_1
+; SZ13-NEXT:    meebr %f2, %f0
+; SZ13-NEXT:    meeb %f0, 0(%r1)
+; SZ13-NEXT:    vmrhf %v0, %v2, %v0
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <3 x float> @llvm.experimental.constrained.fmul.v3f32(
+           <3 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000,
+                        float 0x7FF0000000000000>,
+           <3 x float> <float 1.000000e+00, float 1.000000e+01, float 1.000000e+02>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %mul
+}
+
+define void @constrained_vector_fmul_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fmul_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI13_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    mdb %f0, 16(%r2)
+; S390X-NEXT:    mdb %f2, 8(%r2)
+; S390X-NEXT:    mdb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI13_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI13_1
+; SZ13-NEXT:    vfmdb %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    mdb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %mul = call <3 x double> @llvm.experimental.constrained.fmul.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                        double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %mul, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fmul_v4f64() {
+; S390X-LABEL: constrained_vector_fmul_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI14_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI14_4
+; S390X-NEXT:    ldeb %f6, 0(%r1)
+; S390X-NEXT:    mdbr %f0, %f1
+; S390X-NEXT:    mdbr %f2, %f1
+; S390X-NEXT:    mdbr %f4, %f1
+; S390X-NEXT:    mdbr %f6, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fmul_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI14_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI14_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI14_2
+; SZ13-NEXT:    vfmdb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfmdb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %mul = call <4 x double> @llvm.experimental.constrained.fmul.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 2.000000e+00, double 3.000000e+00,
+                         double 4.000000e+00, double 5.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %mul
+}
+
+define <1 x float> @constrained_vector_fadd_v1f32() {
+; S390X-LABEL: constrained_vector_fadd_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI15_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI15_1
+; S390X-NEXT:    aeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    aebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <1 x float> @llvm.experimental.constrained.fadd.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %add
+}
+
+define <2 x double> @constrained_vector_fadd_v2f64() {
+; S390X-LABEL: constrained_vector_fadd_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI16_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI16_1
+; S390X-NEXT:    ld %f2, 0(%r1)
+; S390X-NEXT:    adbr %f0, %f2
+; S390X-NEXT:    larl %r1, .LCPI16_2
+; S390X-NEXT:    adb %f2, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI16_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI16_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    vfadb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
+           <2 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %add
+}
+
+define <3 x float> @constrained_vector_fadd_v3f32() {
+; S390X-LABEL: constrained_vector_fadd_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI17_0
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI17_1
+; S390X-NEXT:    ler %f2, %f1
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    aeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI17_2
+; S390X-NEXT:    aeb %f2, 0(%r1)
+; S390X-NEXT:    lzer %f4
+; S390X-NEXT:    aebr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgbm %v0, 15
+; SZ13-NEXT:    vgmf %v2, 1, 1
+; SZ13-NEXT:    vgmf %v3, 2, 8
+; SZ13-NEXT:    lzer %f1
+; SZ13-NEXT:    aebr %f1, %f0
+; SZ13-NEXT:    aebr %f2, %f0
+; SZ13-NEXT:    aebr %f3, %f0
+; SZ13-NEXT:    vmrhf %v0, %v2, %v3
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <3 x float> @llvm.experimental.constrained.fadd.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %add
+}
+
+define void @constrained_vector_fadd_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fadd_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI18_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    adb %f0, 16(%r2)
+; S390X-NEXT:    adb %f2, 8(%r2)
+; S390X-NEXT:    adb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI18_0
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI18_1
+; SZ13-NEXT:    vfadb %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    adb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %add = call <3 x double> @llvm.experimental.constrained.fadd.v3f64(
+           <3 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %add, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fadd_v4f64() {
+; S390X-LABEL: constrained_vector_fadd_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI19_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_1
+; S390X-NEXT:    ld %f6, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI19_3
+; S390X-NEXT:    ldeb %f4, 0(%r1)
+; S390X-NEXT:    adbr %f0, %f6
+; S390X-NEXT:    larl %r1, .LCPI19_2
+; S390X-NEXT:    ldr %f2, %f6
+; S390X-NEXT:    adb %f2, 0(%r1)
+; S390X-NEXT:    adbr %f4, %f6
+; S390X-NEXT:    larl %r1, .LCPI19_4
+; S390X-NEXT:    adb %f6, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fadd_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI19_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI19_1
+; SZ13-NEXT:    vl %v1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI19_2
+; SZ13-NEXT:    vfadb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfadb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %add = call <4 x double> @llvm.experimental.constrained.fadd.v4f64(
+           <4 x double> <double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF,
+                         double 0x7FEFFFFFFFFFFFFF, double 0x7FEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %add
+}
+
+define <1 x float> @constrained_vector_fsub_v1f32() {
+; S390X-LABEL: constrained_vector_fsub_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI20_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI20_1
+; S390X-NEXT:    seb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 8
+; SZ13-NEXT:    vgmf %v1, 1, 8
+; SZ13-NEXT:    sebr %f1, %f0
+; SZ13-NEXT:    vlr %v24, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <1 x float> @llvm.experimental.constrained.fsub.v1f32(
+           <1 x float> <float 0x7FF0000000000000>,
+           <1 x float> <float 1.000000e+00>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <1 x float> %sub
+}
+
+define <2 x double> @constrained_vector_fsub_v2f64() {
+; S390X-LABEL: constrained_vector_fsub_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI21_1
+; S390X-NEXT:    ld %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI21_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f0, %f2
+; S390X-NEXT:    larl %r1, .LCPI21_2
+; S390X-NEXT:    sdb %f2, 0(%r1)
+; S390X-NEXT:    sdbr %f0, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI21_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    vfsdb %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
+           <2 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <2 x double> <double 1.000000e+00, double 1.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <2 x double> %sub
+}
+
+define <3 x float> @constrained_vector_fsub_v3f32() {
+; S390X-LABEL: constrained_vector_fsub_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI22_0
+; S390X-NEXT:    le %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI22_1
+; S390X-NEXT:    ler %f0, %f4
+; S390X-NEXT:    seb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI22_2
+; S390X-NEXT:    ler %f2, %f4
+; S390X-NEXT:    seb %f2, 0(%r1)
+; S390X-NEXT:    lzer %f1
+; S390X-NEXT:    sebr %f4, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgbm %v2, 15
+; SZ13-NEXT:    lzer %f1
+; SZ13-NEXT:    sebr %f2, %f1
+; SZ13-NEXT:    vgmf %v1, 1, 1
+; SZ13-NEXT:    vgbm %v3, 15
+; SZ13-NEXT:    vgbm %v0, 15
+; SZ13-NEXT:    sebr %f3, %f1
+; SZ13-NEXT:    vgmf %v1, 2, 8
+; SZ13-NEXT:    sebr %f0, %f1
+; SZ13-NEXT:    vmrhf %v0, %v3, %v0
+; SZ13-NEXT:    vrepf %v1, %v2, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <3 x float> @llvm.experimental.constrained.fsub.v3f32(
+           <3 x float> <float 0xFFFFFFFFE0000000, float 0xFFFFFFFFE0000000,
+                        float 0xFFFFFFFFE0000000>,
+           <3 x float> <float 2.0, float 1.0, float 0.0>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <3 x float> %sub
+}
+
+define void @constrained_vector_fsub_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_fsub_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI23_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ldr %f1, %f0
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    sdb %f0, 16(%r2)
+; S390X-NEXT:    sdb %f2, 8(%r2)
+; S390X-NEXT:    sdb %f1, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f2, 8(%r2)
+; S390X-NEXT:    std %f1, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    vfsdb %v0, %v1, %v0
+; SZ13-NEXT:    sdb %f1, 16(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sub = call <3 x double> @llvm.experimental.constrained.fsub.v3f64(
+           <3 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF>,
+           <3 x double> %b,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  store <3 x double> %sub, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_fsub_v4f64() {
+; S390X-LABEL: constrained_vector_fsub_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI24_1
+; S390X-NEXT:    ld %f6, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_0
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f0, %f6
+; S390X-NEXT:    larl %r1, .LCPI24_2
+; S390X-NEXT:    ldr %f2, %f6
+; S390X-NEXT:    sdb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI24_3
+; S390X-NEXT:    ldeb %f3, 0(%r1)
+; S390X-NEXT:    ldr %f4, %f6
+; S390X-NEXT:    larl %r1, .LCPI24_4
+; S390X-NEXT:    sdb %f6, 0(%r1)
+; S390X-NEXT:    sdbr %f0, %f1
+; S390X-NEXT:    sdbr %f4, %f3
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fsub_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI24_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vgmg %v1, 12, 10
+; SZ13-NEXT:    larl %r1, .LCPI24_1
+; SZ13-NEXT:    vfsdb %v24, %v1, %v0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsdb %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sub = call <4 x double> @llvm.experimental.constrained.fsub.v4f64(
+           <4 x double> <double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF,
+                         double 0xFFEFFFFFFFFFFFFF, double 0xFFEFFFFFFFFFFFFF>,
+           <4 x double> <double 1.000000e+00, double 1.000000e-01,
+                         double 2.000000e+00, double 2.000000e-01>,
+           metadata !"round.dynamic",
+           metadata !"fpexcept.strict")
+  ret <4 x double> %sub
+}
+
+define <1 x float> @constrained_vector_sqrt_v1f32() {
+; S390X-LABEL: constrained_vector_sqrt_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI25_0
+; S390X-NEXT:    sqeb %f0, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI25_0
+; SZ13-NEXT:    sqeb %f0, 0(%r1)
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <1 x float> @llvm.experimental.constrained.sqrt.v1f32(
+                              <1 x float> <float 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %sqrt
+}
+
+define <2 x double> @constrained_vector_sqrt_v2f64() {
+; S390X-LABEL: constrained_vector_sqrt_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI26_0
+; S390X-NEXT:    larl %r2, .LCPI26_1
+; S390X-NEXT:    ldeb %f0, 0(%r2)
+; S390X-NEXT:    sqdb %f2, 0(%r1)
+; S390X-NEXT:    sqdbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI26_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %sqrt
+}
+
+define <3 x float> @constrained_vector_sqrt_v3f32() {
+; S390X-LABEL: constrained_vector_sqrt_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI27_0
+; S390X-NEXT:    sqeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI27_1
+; S390X-NEXT:    sqeb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI27_2
+; S390X-NEXT:    sqeb %f4, 0(%r1)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI27_0
+; SZ13-NEXT:    sqeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI27_1
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    sqeb %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI27_2
+; SZ13-NEXT:    sqeb %f2, 0(%r1)
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %sqrt = call <3 x float> @llvm.experimental.constrained.sqrt.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sqrt
+}
+
+define void @constrained_vector_sqrt_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_sqrt_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    sqdb %f0, 16(%r2)
+; S390X-NEXT:    sqdb %f1, 8(%r2)
+; S390X-NEXT:    sqdb %f2, 0(%r2)
+; S390X-NEXT:    std %f0, 16(%r2)
+; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f2, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    sqdb %f1, 16(%r2)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    std %f1, 16(%r2)
+; SZ13-NEXT:    vfsqdb %v0, %v0
+; SZ13-NEXT:    vst %v0, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sqrt = call <3 x double> @llvm.experimental.constrained.sqrt.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %sqrt, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_sqrt_v4f64() {
+; S390X-LABEL: constrained_vector_sqrt_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI29_0
+; S390X-NEXT:    sqdb %f2, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_1
+; S390X-NEXT:    sqdb %f4, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_3
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI29_2
+; S390X-NEXT:    sqdb %f6, 0(%r1)
+; S390X-NEXT:    sqdbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sqrt_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI29_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v24, %v0
+; SZ13-NEXT:    larl %r1, .LCPI29_1
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfsqdb %v26, %v0
+; SZ13-NEXT:    br %r14
+ entry:
+  %sqrt = call <4 x double> @llvm.experimental.constrained.sqrt.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %sqrt
+}
+
+define <1 x float> @constrained_vector_pow_v1f32() {
+; S390X-LABEL: constrained_vector_pow_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI30_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI30_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, powf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI30_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI30_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, powf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <1 x float> @llvm.experimental.constrained.pow.v1f32(
+                             <1 x float> <float 42.0>,
+                             <1 x float> <float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %pow
+}
+
+define <2 x double> @constrained_vector_pow_v2f64() {
+; S390X-LABEL: constrained_vector_pow_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI31_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI31_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    larl %r1, .LCPI31_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -184
+; SZ13-NEXT:    .cfi_def_cfa_offset 344
+; SZ13-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI31_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI31_1
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    larl %r1, .LCPI31_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 296(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <2 x double> @llvm.experimental.constrained.pow.v2f64(
+                             <2 x double> <double 42.1, double 42.2>,
+                             <2 x double> <double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %pow
+}
+
+define <3 x float> @constrained_vector_pow_v3f32() {
+; S390X-LABEL: constrained_vector_pow_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI32_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI32_1
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf at PLT
+; S390X-NEXT:    larl %r1, .LCPI32_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf at PLT
+; S390X-NEXT:    larl %r1, .LCPI32_3
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, powf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI32_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI32_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI32_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI32_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, powf at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <3 x float> @llvm.experimental.constrained.pow.v3f32(
+                             <3 x float> <float 42.0, float 43.0, float 44.0>,
+                             <3 x float> <float 3.0, float 3.0, float 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <3 x float> %pow
+}
+
+define void @constrained_vector_pow_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_pow_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI33_0
+; S390X-NEXT:    ldeb %f9, 0(%r1)
+; S390X-NEXT:    ld %f10, 8(%r2)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f10
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f10, 8(%r13)
+; S390X-NEXT:    std %f11, 0(%r13)
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -208
+; SZ13-NEXT:    .cfi_def_cfa_offset 368
+; SZ13-NEXT:    std %f8, 200(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    std %f9, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    .cfi_offset %f9, -176
+; SZ13-NEXT:    larl %r1, .LCPI33_0
+; SZ13-NEXT:    ldeb %f9, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %pow = call <3 x double> @llvm.experimental.constrained.pow.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 3.0, double 3.0, double 3.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %pow, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_pow_v4f64() {
+; S390X-LABEL: constrained_vector_pow_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    larl %r1, .LCPI34_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI34_1
+; S390X-NEXT:    ldeb %f8, 0(%r1)
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    larl %r1, .LCPI34_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    larl %r1, .LCPI34_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    larl %r1, .LCPI34_4
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    ldr %f2, %f8
+; S390X-NEXT:    brasl %r14, pow at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    ldr %f2, %f10
+; S390X-NEXT:    ldr %f4, %f11
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 304(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_pow_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI34_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI34_1
+; SZ13-NEXT:    ldeb %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    larl %r1, .LCPI34_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI34_3
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    larl %r1, .LCPI34_4
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, pow at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %pow = call <4 x double> @llvm.experimental.constrained.pow.v4f64(
+                             <4 x double> <double 42.1, double 42.2,
+                                           double 42.3, double 42.4>,
+                             <4 x double> <double 3.0, double 3.0,
+                                           double 3.0, double 3.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %pow
+}
+
+define <1 x float> @constrained_vector_powi_v1f32() {
+; S390X-LABEL: constrained_vector_powi_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI35_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powisf2 at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI35_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2 at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <1 x float> @llvm.experimental.constrained.powi.v1f32(
+                              <1 x float> <float 42.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <1 x float> %powi
+}
+
+define <2 x double> @constrained_vector_powi_v2f64() {
+; S390X-LABEL: constrained_vector_powi_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI36_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI36_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI36_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI36_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <2 x double> @llvm.experimental.constrained.powi.v2f64(
+                              <2 x double> <double 42.1, double 42.2>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %powi
+}
+
+define <3 x float> @constrained_vector_powi_v3f32() {
+; S390X-LABEL: constrained_vector_powi_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI37_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powisf2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI37_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, __powisf2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI37_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, __powisf2 at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI37_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI37_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI37_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powisf2 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <3 x float> @llvm.experimental.constrained.powi.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %powi
+}
+
+define void @constrained_vector_powi_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_powi_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI38_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI38_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI38_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    std %f0, 8(%r13)
+; S390X-NEXT:    std %f9, 0(%r13)
+; S390X-NEXT:    std %f8, 16(%r13)
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI38_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI38_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI38_2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 280(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %powi = call <3 x double> @llvm.experimental.constrained.powi.v3f64(
+                          <3 x double> <double 42.0, double 42.1, double 42.2>,
+                          i32 3,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %powi, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_powi_v4f64() {
+; S390X-LABEL: constrained_vector_powi_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI39_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI39_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI39_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI39_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    lghi %r2, 3
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, __powidf2 at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_powi_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI39_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI39_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI39_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI39_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    lghi %r2, 3
+; SZ13-NEXT:    brasl %r14, __powidf2 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %powi = call <4 x double> @llvm.experimental.constrained.powi.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              i32 3,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %powi
+}
+
+define <1 x float> @constrained_vector_sin_v1f32() {
+; S390X-LABEL: constrained_vector_sin_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI40_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sinf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI40_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <1 x float> @llvm.experimental.constrained.sin.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %sin
+}
+
+define <2 x double> @constrained_vector_sin_v2f64() {
+; S390X-LABEL: constrained_vector_sin_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI41_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    larl %r1, .LCPI41_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI41_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    larl %r1, .LCPI41_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <2 x double> @llvm.experimental.constrained.sin.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %sin
+}
+
+define <3 x float> @constrained_vector_sin_v3f32() {
+; S390X-LABEL: constrained_vector_sin_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI42_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sinf at PLT
+; S390X-NEXT:    larl %r1, .LCPI42_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, sinf at PLT
+; S390X-NEXT:    larl %r1, .LCPI42_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, sinf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI42_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI42_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI42_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sinf at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <3 x float> @llvm.experimental.constrained.sin.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %sin
+}
+
+define void @constrained_vector_sin_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_sin_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %sin = call <3 x double> @llvm.experimental.constrained.sin.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %sin, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_sin_v4f64() {
+; S390X-LABEL: constrained_vector_sin_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI44_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    larl %r1, .LCPI44_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    larl %r1, .LCPI44_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    larl %r1, .LCPI44_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, sin at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_sin_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI44_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    larl %r1, .LCPI44_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI44_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    larl %r1, .LCPI44_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, sin at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %sin = call <4 x double> @llvm.experimental.constrained.sin.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %sin
+}
+
+define <1 x float> @constrained_vector_cos_v1f32() {
+; S390X-LABEL: constrained_vector_cos_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI45_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cosf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI45_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <1 x float> @llvm.experimental.constrained.cos.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %cos
+}
+
+define <2 x double> @constrained_vector_cos_v2f64() {
+; S390X-LABEL: constrained_vector_cos_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI46_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    larl %r1, .LCPI46_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI46_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    larl %r1, .LCPI46_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <2 x double> @llvm.experimental.constrained.cos.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %cos
+}
+
+define <3 x float> @constrained_vector_cos_v3f32() {
+; S390X-LABEL: constrained_vector_cos_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI47_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cosf at PLT
+; S390X-NEXT:    larl %r1, .LCPI47_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, cosf at PLT
+; S390X-NEXT:    larl %r1, .LCPI47_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, cosf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI47_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI47_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI47_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cosf at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <3 x float> @llvm.experimental.constrained.cos.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %cos
+}
+
+define void @constrained_vector_cos_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_cos_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %cos = call <3 x double> @llvm.experimental.constrained.cos.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %cos, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_cos_v4f64() {
+; S390X-LABEL: constrained_vector_cos_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI49_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    larl %r1, .LCPI49_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    larl %r1, .LCPI49_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    larl %r1, .LCPI49_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, cos at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_cos_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI49_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    larl %r1, .LCPI49_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI49_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    larl %r1, .LCPI49_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, cos at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %cos = call <4 x double> @llvm.experimental.constrained.cos.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %cos
+}
+
+define <1 x float> @constrained_vector_exp_v1f32() {
+; S390X-LABEL: constrained_vector_exp_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI50_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, expf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI50_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <1 x float> @llvm.experimental.constrained.exp.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp
+}
+
+define <2 x double> @constrained_vector_exp_v2f64() {
+; S390X-LABEL: constrained_vector_exp_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI51_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    larl %r1, .LCPI51_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI51_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    larl %r1, .LCPI51_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <2 x double> @llvm.experimental.constrained.exp.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %exp
+}
+
+define <3 x float> @constrained_vector_exp_v3f32() {
+; S390X-LABEL: constrained_vector_exp_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI52_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, expf at PLT
+; S390X-NEXT:    larl %r1, .LCPI52_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, expf at PLT
+; S390X-NEXT:    larl %r1, .LCPI52_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, expf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI52_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI52_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI52_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, expf at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <3 x float> @llvm.experimental.constrained.exp.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp
+}
+
+define void @constrained_vector_exp_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_exp_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %exp = call <3 x double> @llvm.experimental.constrained.exp.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %exp, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_exp_v4f64() {
+; S390X-LABEL: constrained_vector_exp_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI54_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    larl %r1, .LCPI54_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    larl %r1, .LCPI54_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    larl %r1, .LCPI54_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI54_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    larl %r1, .LCPI54_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI54_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    larl %r1, .LCPI54_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp = call <4 x double> @llvm.experimental.constrained.exp.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %exp
+}
+
+define <1 x float> @constrained_vector_exp2_v1f32() {
+; S390X-LABEL: constrained_vector_exp2_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI55_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2f at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI55_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <1 x float> @llvm.experimental.constrained.exp2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %exp2
+}
+
+define <2 x double> @constrained_vector_exp2_v2f64() {
+; S390X-LABEL: constrained_vector_exp2_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI56_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI56_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI56_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI56_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <2 x double> @llvm.experimental.constrained.exp2.v2f64(
+                              <2 x double> <double 42.1, double 42.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %exp2
+}
+
+define <3 x float> @constrained_vector_exp2_v3f32() {
+; S390X-LABEL: constrained_vector_exp2_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI57_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2f at PLT
+; S390X-NEXT:    larl %r1, .LCPI57_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2f at PLT
+; S390X-NEXT:    larl %r1, .LCPI57_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2f at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI57_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f at PLT
+; SZ13-NEXT:    larl %r1, .LCPI57_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f at PLT
+; SZ13-NEXT:    larl %r1, .LCPI57_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2f at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <3 x float> @llvm.experimental.constrained.exp2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %exp2
+}
+
+define void @constrained_vector_exp2_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_exp2_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %exp2 = call <3 x double> @llvm.experimental.constrained.exp2.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %exp2, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_exp2_v4f64() {
+; S390X-LABEL: constrained_vector_exp2_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI59_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI59_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI59_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI59_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, exp2 at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_exp2_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI59_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI59_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI59_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI59_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, exp2 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %exp2 = call <4 x double> @llvm.experimental.constrained.exp2.v4f64(
+                              <4 x double> <double 42.1, double 42.2,
+                                            double 42.3, double 42.4>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %exp2
+}
+
+define <1 x float> @constrained_vector_log_v1f32() {
+; S390X-LABEL: constrained_vector_log_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI60_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, logf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI60_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <1 x float> @llvm.experimental.constrained.log.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log
+}
+
+define <2 x double> @constrained_vector_log_v2f64() {
+; S390X-LABEL: constrained_vector_log_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI61_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    larl %r1, .LCPI61_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI61_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    larl %r1, .LCPI61_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <2 x double> @llvm.experimental.constrained.log.v2f64(
+                             <2 x double> <double 42.0, double 42.1>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <2 x double> %log
+}
+
+define <3 x float> @constrained_vector_log_v3f32() {
+; S390X-LABEL: constrained_vector_log_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI62_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, logf at PLT
+; S390X-NEXT:    larl %r1, .LCPI62_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, logf at PLT
+; S390X-NEXT:    larl %r1, .LCPI62_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, logf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI62_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI62_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI62_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, logf at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <3 x float> @llvm.experimental.constrained.log.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log
+}
+
+define void @constrained_vector_log_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log = call <3 x double> @llvm.experimental.constrained.log.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log_v4f64() {
+; S390X-LABEL: constrained_vector_log_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI64_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    larl %r1, .LCPI64_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    larl %r1, .LCPI64_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    larl %r1, .LCPI64_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI64_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    larl %r1, .LCPI64_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI64_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    larl %r1, .LCPI64_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log = call <4 x double> @llvm.experimental.constrained.log.v4f64(
+                             <4 x double> <double 42.0, double 42.1,
+                                           double 42.2, double 42.3>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <4 x double> %log
+}
+
+define <1 x float> @constrained_vector_log10_v1f32() {
+; S390X-LABEL: constrained_vector_log10_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI65_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10f at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI65_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <1 x float> @llvm.experimental.constrained.log10.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log10
+}
+
+define <2 x double> @constrained_vector_log10_v2f64() {
+; S390X-LABEL: constrained_vector_log10_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI66_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    larl %r1, .LCPI66_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI66_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI66_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <2 x double> @llvm.experimental.constrained.log10.v2f64(
+                               <2 x double> <double 42.0, double 42.1>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <2 x double> %log10
+}
+
+define <3 x float> @constrained_vector_log10_v3f32() {
+; S390X-LABEL: constrained_vector_log10_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI67_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10f at PLT
+; S390X-NEXT:    larl %r1, .LCPI67_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log10f at PLT
+; S390X-NEXT:    larl %r1, .LCPI67_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log10f at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI67_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f at PLT
+; SZ13-NEXT:    larl %r1, .LCPI67_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f at PLT
+; SZ13-NEXT:    larl %r1, .LCPI67_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10f at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <3 x float> @llvm.experimental.constrained.log10.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log10
+}
+
+define void @constrained_vector_log10_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log10_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log10 = call <3 x double> @llvm.experimental.constrained.log10.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log10, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log10_v4f64() {
+; S390X-LABEL: constrained_vector_log10_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI69_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    larl %r1, .LCPI69_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    larl %r1, .LCPI69_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    larl %r1, .LCPI69_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log10 at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI69_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI69_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI69_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI69_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log10 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log10 = call <4 x double> @llvm.experimental.constrained.log10.v4f64(
+                               <4 x double> <double 42.0, double 42.1,
+                                             double 42.2, double 42.3>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <4 x double> %log10
+}
+
+define <1 x float> @constrained_vector_log2_v1f32() {
+; S390X-LABEL: constrained_vector_log2_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI70_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2f at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI70_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <1 x float> @llvm.experimental.constrained.log2.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %log2
+}
+
+define <2 x double> @constrained_vector_log2_v2f64() {
+; S390X-LABEL: constrained_vector_log2_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI71_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI71_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI71_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI71_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <2 x double> @llvm.experimental.constrained.log2.v2f64(
+                              <2 x double> <double 42.0, double 42.1>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <2 x double> %log2
+}
+
+define <3 x float> @constrained_vector_log2_v3f32() {
+; S390X-LABEL: constrained_vector_log2_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI72_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2f at PLT
+; S390X-NEXT:    larl %r1, .LCPI72_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log2f at PLT
+; S390X-NEXT:    larl %r1, .LCPI72_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, log2f at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI72_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f at PLT
+; SZ13-NEXT:    larl %r1, .LCPI72_1
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f at PLT
+; SZ13-NEXT:    larl %r1, .LCPI72_2
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2f at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <3 x float> @llvm.experimental.constrained.log2.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %log2
+}
+
+define void @constrained_vector_log2_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log2_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %log2 = call <3 x double> @llvm.experimental.constrained.log2.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %log2, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_log2_v4f64() {
+; S390X-LABEL: constrained_vector_log2_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI74_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI74_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI74_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    larl %r1, .LCPI74_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, log2 at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log2_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI74_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI74_1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI74_2
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    larl %r1, .LCPI74_3
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    brasl %r14, log2 at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %log2 = call <4 x double> @llvm.experimental.constrained.log2.v4f64(
+                              <4 x double> <double 42.0, double 42.1,
+                                            double 42.2, double 42.3>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <4 x double> %log2
+}
+
+define <1 x float> @constrained_vector_rint_v1f32() {
+; S390X-LABEL: constrained_vector_rint_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI75_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    fiebr %f0, 0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI75_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    fiebr %f0, 0, %f0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <1 x float> @llvm.experimental.constrained.rint.v1f32(
+                             <1 x float> <float 42.0>,
+                             metadata !"round.dynamic",
+                             metadata !"fpexcept.strict")
+  ret <1 x float> %rint
+}
+
+define <2 x double> @constrained_vector_rint_v2f64() {
+; S390X-LABEL: constrained_vector_rint_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI76_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI76_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f2, 0, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI76_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <2 x double> @llvm.experimental.constrained.rint.v2f64(
+                        <2 x double> <double 42.1, double 42.0>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <2 x double> %rint
+}
+
+define <3 x float> @constrained_vector_rint_v3f32() {
+; S390X-LABEL: constrained_vector_rint_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI77_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI77_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI77_2
+; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    fiebr %f0, 0, %f0
+; S390X-NEXT:    fiebr %f2, 0, %f1
+; S390X-NEXT:    fiebr %f4, 0, %f3
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI77_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI77_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI77_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    fiebr %f0, 0, %f0
+; SZ13-NEXT:    fiebr %f1, 0, %f1
+; SZ13-NEXT:    fiebr %f2, 0, %f2
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+ entry:
+  %rint = call <3 x float> @llvm.experimental.constrained.rint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %rint
+}
+
+define void @constrained_vector_rint_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_rint_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f1, 8(%r2)
+; S390X-NEXT:    ld %f2, 16(%r2)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f1, 0, %f1
+; S390X-NEXT:    fidbr %f2, 0, %f2
+; S390X-NEXT:    std %f2, 16(%r2)
+; S390X-NEXT:    std %f1, 8(%r2)
+; S390X-NEXT:    std %f0, 0(%r2)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 0, 0
+; SZ13-NEXT:    fidbra %f0, 0, %f0, 0
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %rint = call <3 x double> @llvm.experimental.constrained.rint.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %rint, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_rint_v4f64() {
+; S390X-LABEL: constrained_vector_rint_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI79_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_2
+; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI79_3
+; S390X-NEXT:    ld %f5, 0(%r1)
+; S390X-NEXT:    fidbr %f0, 0, %f0
+; S390X-NEXT:    fidbr %f2, 0, %f1
+; S390X-NEXT:    fidbr %f4, 0, %f3
+; S390X-NEXT:    fidbr %f6, 0, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_rint_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI79_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI79_1
+; SZ13-NEXT:    vfidb %v24, %v0, 0, 0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v26, %v0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %rint = call <4 x double> @llvm.experimental.constrained.rint.v4f64(
+                        <4 x double> <double 42.1, double 42.2,
+                                      double 42.3, double 42.4>,
+                        metadata !"round.dynamic",
+                        metadata !"fpexcept.strict")
+  ret <4 x double> %rint
+}
+
+define <1 x float> @constrained_vector_nearbyint_v1f32() {
+; S390X-LABEL: constrained_vector_nearbyint_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI80_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyintf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI80_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(
+                               <1 x float> <float 42.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %nearby
+}
+
+define <2 x double> @constrained_vector_nearbyint_v2f64() {
+; S390X-LABEL: constrained_vector_nearbyint_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI81_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    larl %r1, .LCPI81_1
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI81_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(
+                                <2 x double> <double 42.1, double 42.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %nearby
+}
+
+define <3 x float> @constrained_vector_nearbyint_v3f32() {
+; S390X-LABEL: constrained_vector_nearbyint_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI82_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyintf at PLT
+; S390X-NEXT:    larl %r1, .LCPI82_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyintf at PLT
+; S390X-NEXT:    larl %r1, .LCPI82_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyintf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI82_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI82_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI82_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    fiebra %f0, 0, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 0, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 0, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(
+                              <3 x float> <float 42.0, float 43.0, float 44.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %nearby
+}
+
+define void @constrained_vector_nearbyint_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_nearbyint_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    vfidb %v1, %v1, 4, 0
+; SZ13-NEXT:    fidbra %f0, 0, %f0, 4
+; SZ13-NEXT:    std %f0, 16(%r2)
+; SZ13-NEXT:    vst %v1, 0(%r2)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %nearby = call <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(
+                          <3 x double> %b,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %nearby, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_nearbyint_v4f64() {
+; S390X-LABEL: constrained_vector_nearbyint_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI84_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    larl %r1, .LCPI84_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    larl %r1, .LCPI84_2
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    larl %r1, .LCPI84_3
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, nearbyint at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_nearbyint_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI84_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI84_1
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v26, %v0, 4, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %nearby = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(
+                                <4 x double> <double 42.1, double 42.2,
+                                              double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %nearby
+}
+
+define <1 x float> @constrained_vector_maxnum_v1f32() {
+; S390X-LABEL: constrained_vector_maxnum_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI85_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI85_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmaxf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI85_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI85_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmaxf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %max
+}
+
+define <2 x double> @constrained_vector_maxnum_v2f64() {
+; S390X-LABEL: constrained_vector_maxnum_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI86_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI86_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    larl %r1, .LCPI86_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI86_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI86_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI86_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    larl %r1, .LCPI86_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI86_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %max
+}
+
+define <3 x float> @constrained_vector_maxnum_v3f32() {
+; S390X-LABEL: constrained_vector_maxnum_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI87_0
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI87_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    brasl %r14, fmaxf at PLT
+; S390X-NEXT:    larl %r1, .LCPI87_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI87_3
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, fmaxf at PLT
+; S390X-NEXT:    larl %r1, .LCPI87_4
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fmaxf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI87_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI87_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fmaxf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI87_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fmaxf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI87_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI87_4
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmaxf at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %max
+}
+
+define void @constrained_vector_log10_maxnum_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_log10_maxnum_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI88_0
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ld %f9, 8(%r2)
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    larl %r1, .LCPI88_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f9
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    larl %r1, .LCPI88_2
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f9, 8(%r13)
+; S390X-NEXT:    std %f10, 0(%r13)
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_log10_maxnum_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI88_0
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    larl %r1, .LCPI88_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    larl %r1, .LCPI88_2
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %a
+  %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 40.0, double 41.0, double 42.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %max, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_maxnum_v4f64() {
+; S390X-LABEL: constrained_vector_maxnum_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI89_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    larl %r1, .LCPI89_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    larl %r1, .LCPI89_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_5
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    larl %r1, .LCPI89_6
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI89_7
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmax at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_maxnum_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI89_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    larl %r1, .LCPI89_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI89_4
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_5
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    larl %r1, .LCPI89_6
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI89_7
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmax at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %max
+}
+
+define <1 x float> @constrained_vector_minnum_v1f32() {
+; S390X-LABEL: constrained_vector_minnum_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI90_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI90_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fminf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -160
+; SZ13-NEXT:    .cfi_def_cfa_offset 320
+; SZ13-NEXT:    larl %r1, .LCPI90_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI90_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fminf at PLT
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    lmg %r14, %r15, 272(%r15)
+; SZ13-NEXT:    br %r14
+ entry:
+  %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32(
+                               <1 x float> <float 42.0>, <1 x float> <float 41.0>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %min
+}
+
+define <2 x double> @constrained_vector_minnum_v2f64() {
+; S390X-LABEL: constrained_vector_minnum_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI91_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI91_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    larl %r1, .LCPI91_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI91_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -176
+; SZ13-NEXT:    .cfi_def_cfa_offset 336
+; SZ13-NEXT:    larl %r1, .LCPI91_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI91_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    larl %r1, .LCPI91_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI91_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 288(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64(
+                                <2 x double> <double 43.0, double 42.0>,
+                                <2 x double> <double 41.0, double 40.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %min
+}
+
+define <3 x float> @constrained_vector_minnum_v3f32() {
+; S390X-LABEL: constrained_vector_minnum_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI92_0
+; S390X-NEXT:    le %f8, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI92_1
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    brasl %r14, fminf at PLT
+; S390X-NEXT:    larl %r1, .LCPI92_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI92_3
+; S390X-NEXT:    le %f2, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, fminf at PLT
+; S390X-NEXT:    larl %r1, .LCPI92_4
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f10, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    ler %f2, %f8
+; S390X-NEXT:    brasl %r14, fminf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f9
+; S390X-NEXT:    ler %f2, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -200
+; SZ13-NEXT:    .cfi_def_cfa_offset 360
+; SZ13-NEXT:    std %f8, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    larl %r1, .LCPI92_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI92_1
+; SZ13-NEXT:    lde %f8, 0(%r1)
+; SZ13-NEXT:    ldr %f2, %f8
+; SZ13-NEXT:    brasl %r14, fminf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI92_2
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    brasl %r14, fminf at PLT
+; SZ13-NEXT:    larl %r1, .LCPI92_3
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI92_4
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fminf at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0s killed $f0s def $v0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vrepf %v1, %v1, 0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32(
+                              <3 x float> <float 43.0, float 44.0, float 45.0>,
+                              <3 x float> <float 41.0, float 42.0, float 43.0>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %min
+}
+
+define void @constrained_vector_minnum_v3f64(<3 x double>* %a) {
+; S390X-LABEL: constrained_vector_minnum_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r13, %r15, 104(%r15)
+; S390X-NEXT:    .cfi_offset %r13, -56
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -192
+; S390X-NEXT:    .cfi_def_cfa_offset 352
+; S390X-NEXT:    std %f8, 184(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f11, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    .cfi_offset %f11, -192
+; S390X-NEXT:    lgr %r13, %r2
+; S390X-NEXT:    ld %f8, 16(%r2)
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    larl %r1, .LCPI93_0
+; S390X-NEXT:    ldeb %f9, 0(%r1)
+; S390X-NEXT:    ld %f10, 8(%r2)
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    ldr %f11, %f0
+; S390X-NEXT:    ldr %f0, %f10
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    std %f0, 16(%r13)
+; S390X-NEXT:    std %f10, 8(%r13)
+; S390X-NEXT:    std %f11, 0(%r13)
+; S390X-NEXT:    ld %f8, 184(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f11, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r13, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r13, %r15, 104(%r15)
+; SZ13-NEXT:    .cfi_offset %r13, -56
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -208
+; SZ13-NEXT:    .cfi_def_cfa_offset 368
+; SZ13-NEXT:    std %f8, 200(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    std %f9, 192(%r15) # 8-byte Folded Spill
+; SZ13-NEXT:    .cfi_offset %f8, -168
+; SZ13-NEXT:    .cfi_offset %f9, -176
+; SZ13-NEXT:    larl %r1, .LCPI93_0
+; SZ13-NEXT:    ldeb %f9, 0(%r1)
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    ld %f8, 16(%r2)
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    lgr %r13, %r2
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    vl %v0, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    vrepg %v0, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v1, %v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldr %f0, %f8
+; SZ13-NEXT:    ldr %f2, %f9
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    std %f0, 16(%r13)
+; SZ13-NEXT:    vl %v0, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    ld %f8, 200(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    ld %f9, 192(%r15) # 8-byte Folded Reload
+; SZ13-NEXT:    vst %v0, 0(%r13)
+; SZ13-NEXT:    lmg %r13, %r15, 312(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+ %b = load <3 x double>, <3 x double>* %a
+ %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64(
+                          <3 x double> %b,
+                          <3 x double> <double 3.0, double 3.0, double 3.0>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  store <3 x double> %min, <3 x double>* %a
+  ret void
+}
+
+define <4 x double> @constrained_vector_minnum_v4f64() {
+; S390X-LABEL: constrained_vector_minnum_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -184
+; S390X-NEXT:    .cfi_def_cfa_offset 344
+; S390X-NEXT:    std %f8, 176(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f10, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    .cfi_offset %f10, -184
+; S390X-NEXT:    larl %r1, .LCPI94_0
+; S390X-NEXT:    ldeb %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_1
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    larl %r1, .LCPI94_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_3
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    larl %r1, .LCPI94_4
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_5
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    larl %r1, .LCPI94_6
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI94_7
+; S390X-NEXT:    ldeb %f2, 0(%r1)
+; S390X-NEXT:    ldr %f10, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, fmin at PLT
+; S390X-NEXT:    ldr %f6, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ldr %f4, %f10
+; S390X-NEXT:    ld %f8, 176(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f10, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 296(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_minnum_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    stmg %r14, %r15, 112(%r15)
+; SZ13-NEXT:    .cfi_offset %r14, -48
+; SZ13-NEXT:    .cfi_offset %r15, -40
+; SZ13-NEXT:    aghi %r15, -192
+; SZ13-NEXT:    .cfi_def_cfa_offset 352
+; SZ13-NEXT:    larl %r1, .LCPI94_0
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_1
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    larl %r1, .LCPI94_2
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_3
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    vl %v1, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v0, %v0, %v1
+; SZ13-NEXT:    larl %r1, .LCPI94_4
+; SZ13-NEXT:    vst %v0, 176(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_5
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    larl %r1, .LCPI94_6
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vst %v0, 160(%r15) # 16-byte Folded Spill
+; SZ13-NEXT:    ldeb %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI94_7
+; SZ13-NEXT:    ldeb %f2, 0(%r1)
+; SZ13-NEXT:    brasl %r14, fmin at PLT
+; SZ13-NEXT:    vl %v1, 160(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    vl %v24, 176(%r15) # 16-byte Folded Reload
+; SZ13-NEXT:    # kill: def $f0d killed $f0d def $v0
+; SZ13-NEXT:    vmrhg %v26, %v0, %v1
+; SZ13-NEXT:    lmg %r14, %r15, 304(%r15)
+; SZ13-NEXT:    br %r14
+entry:
+  %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64(
+                                <4 x double> <double 44.0, double 45.0,
+                                              double 46.0, double 47.0>,
+                                <4 x double> <double 40.0, double 41.0,
+                                              double 42.0, double 43.0>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %min
+}
+
+define <1 x float> @constrained_vector_fptrunc_v1f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v1f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI95_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v1f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI95_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    wledb %v24, %f0, 0, 0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(
+                                <1 x double><double 42.1>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <1 x float> %result
+}
+
+define <2 x float> @constrained_vector_fptrunc_v2f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI96_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI96_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    ledbr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI96_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI96_1
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    vmrhg %v24, %v0, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(
+                                <2 x double><double 42.1, double 42.2>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x float> %result
+}
+
+define void @constrained_vector_fptrunc_v3f64(<3 x double>* %src, <3 x float>* %dest) {
+; S390X-LABEL: constrained_vector_fptrunc_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    ld %f0, 0(%r2)
+; S390X-NEXT:    ld %f1, 16(%r2)
+; S390X-NEXT:    ld %f2, 8(%r2)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    lgdr %r0, %f0
+; S390X-NEXT:    nilf %r0, 0
+; S390X-NEXT:    ledbr %f0, %f2
+; S390X-NEXT:    lgdr %r1, %f0
+; S390X-NEXT:    srlg %r1, %r1, 32
+; S390X-NEXT:    lr %r0, %r1
+; S390X-NEXT:    ledbr %f0, %f1
+; S390X-NEXT:    ste %f0, 8(%r3)
+; S390X-NEXT:    stg %r0, 0(%r3)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v1, 0(%r2)
+; SZ13-NEXT:    ledbra %f2, 0, %f1, 0
+; SZ13-NEXT:    vrepg %v1, %v1, 1
+; SZ13-NEXT:    ld %f0, 16(%r2)
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    vmrhf %v1, %v2, %v1
+; SZ13-NEXT:    vmrhg %v1, %v1, %v1
+; SZ13-NEXT:    ste %f0, 8(%r3)
+; SZ13-NEXT:    vsteg %v1, 0(%r3), 0
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x double>, <3 x double>* %src
+  %result = call <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(
+                                <3 x double> %b,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  store <3 x float> %result, <3 x float>* %dest
+  ret void
+}
+
+define <4 x float> @constrained_vector_fptrunc_v4f64() {
+; S390X-LABEL: constrained_vector_fptrunc_v4f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI98_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_2
+; S390X-NEXT:    ld %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI98_3
+; S390X-NEXT:    ld %f5, 0(%r1)
+; S390X-NEXT:    ledbr %f0, %f0
+; S390X-NEXT:    ledbr %f2, %f1
+; S390X-NEXT:    ledbr %f4, %f3
+; S390X-NEXT:    ledbr %f6, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fptrunc_v4f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI98_0
+; SZ13-NEXT:    ld %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI98_1
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    ledbra %f0, 0, %f0, 0
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    larl %r1, .LCPI98_2
+; SZ13-NEXT:    vmrhf %v0, %v1, %v0
+; SZ13-NEXT:    ld %f1, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI98_3
+; SZ13-NEXT:    ld %f2, 0(%r1)
+; SZ13-NEXT:    ledbra %f1, 0, %f1, 0
+; SZ13-NEXT:    ledbra %f2, 0, %f2, 0
+; SZ13-NEXT:    vmrhf %v1, %v2, %v1
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(
+                                <4 x double><double 42.1, double 42.2,
+                                             double 42.3, double 42.4>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <4 x float> %result
+}
+
+define <1 x double> @constrained_vector_fpext_v1f32() {
+; S390X-LABEL: constrained_vector_fpext_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI99_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI99_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    wldeb %v24, %f0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(
+                                <1 x float><float 42.0>,
+                                metadata !"fpexcept.strict")
+  ret <1 x double> %result
+}
+
+define <2 x double> @constrained_vector_fpext_v2f32() {
+; S390X-LABEL: constrained_vector_fpext_v2f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI100_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI100_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    ldebr %f2, %f1
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v2f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI100_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI100_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(
+                                <2 x float><float 42.0, float 43.0>,
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %result
+}
+
+define void @constrained_vector_fpext_v3f64(<3 x float>* %src, <3 x double>* %dest) {
+; S390X-LABEL: constrained_vector_fpext_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    lg %r0, 0(%r2)
+; S390X-NEXT:    le %f0, 8(%r2)
+; S390X-NEXT:    sllg %r1, %r0, 32
+; S390X-NEXT:    ldgr %f1, %r1
+; S390X-NEXT:    nilf %r0, 0
+; S390X-NEXT:    ldgr %f2, %r0
+; S390X-NEXT:    ldebr %f2, %f2
+; S390X-NEXT:    ldebr %f1, %f1
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    std %f0, 16(%r3)
+; S390X-NEXT:    std %f1, 8(%r3)
+; S390X-NEXT:    std %f2, 0(%r3)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vl %v0, 0(%r2)
+; SZ13-NEXT:    vrepf %v2, %v0, 1
+; SZ13-NEXT:    ldebr %f1, %f0
+; SZ13-NEXT:    ldebr %f2, %f2
+; SZ13-NEXT:    vrepf %v0, %v0, 2
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    vmrhg %v1, %v1, %v2
+; SZ13-NEXT:    std %f0, 16(%r3)
+; SZ13-NEXT:    vst %v1, 0(%r3)
+; SZ13-NEXT:    br %r14
+entry:
+  %b = load <3 x float>, <3 x float>* %src
+  %result = call <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(
+                              <3 x float> %b,
+                              metadata !"fpexcept.strict")
+  store <3 x double> %result, <3 x double>* %dest
+  ret void
+}
+
+define <4 x double> @constrained_vector_fpext_v4f32() {
+; S390X-LABEL: constrained_vector_fpext_v4f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    larl %r1, .LCPI102_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_2
+; S390X-NEXT:    le %f3, 0(%r1)
+; S390X-NEXT:    larl %r1, .LCPI102_3
+; S390X-NEXT:    le %f5, 0(%r1)
+; S390X-NEXT:    ldebr %f0, %f0
+; S390X-NEXT:    ldebr %f2, %f1
+; S390X-NEXT:    ldebr %f4, %f3
+; S390X-NEXT:    ldebr %f6, %f5
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_fpext_v4f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI102_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI102_1
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    larl %r1, .LCPI102_2
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI102_3
+; SZ13-NEXT:    lde %f1, 0(%r1)
+; SZ13-NEXT:    ldebr %f0, %f0
+; SZ13-NEXT:    ldebr %f1, %f1
+; SZ13-NEXT:    vmrhg %v26, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %result = call <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(
+                                <4 x float><float 42.0, float 43.0,
+                                            float 44.0, float 45.0>,
+                                metadata !"fpexcept.strict")
+  ret <4 x double> %result
+}
+
+define <1 x float> @constrained_vector_ceil_v1f32() {
+; S390X-LABEL: constrained_vector_ceil_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI103_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceilf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <1 x float> @llvm.experimental.constrained.ceil.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %ceil
+}
+
+define <2 x double> @constrained_vector_ceil_v2f64() {
+; S390X-LABEL: constrained_vector_ceil_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI104_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceil at PLT
+; S390X-NEXT:    larl %r1, .LCPI104_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, ceil at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI104_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 6
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <2 x double> @llvm.experimental.constrained.ceil.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %ceil
+}
+
+define <3 x float> @constrained_vector_ceil_v3f32() {
+; S390X-LABEL: constrained_vector_ceil_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI105_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceilf at PLT
+; S390X-NEXT:    larl %r1, .LCPI105_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, ceilf at PLT
+; S390X-NEXT:    larl %r1, .LCPI105_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, ceilf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI105_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI105_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 6, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 6, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 6, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <3 x float> @llvm.experimental.constrained.ceil.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %ceil
+}
+
+define <3 x double> @constrained_vector_ceil_v3f64() {
+; S390X-LABEL: constrained_vector_ceil_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI106_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, ceil at PLT
+; S390X-NEXT:    larl %r1, .LCPI106_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, ceil at PLT
+; S390X-NEXT:    larl %r1, .LCPI106_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, ceil at PLT
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_ceil_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmg %v0, 2, 12
+; SZ13-NEXT:    larl %r1, .LCPI106_0
+; SZ13-NEXT:    fidbra %f4, 6, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v0, %v0, 4, 6
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    br %r14
+entry:
+  %ceil = call <3 x double> @llvm.experimental.constrained.ceil.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %ceil
+}
+
+define <1 x float> @constrained_vector_floor_v1f32() {
+; S390X-LABEL: constrained_vector_floor_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI107_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floorf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <1 x float> @llvm.experimental.constrained.floor.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %floor
+}
+
+
+define <2 x double> @constrained_vector_floor_v2f64() {
+; S390X-LABEL: constrained_vector_floor_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI108_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floor at PLT
+; S390X-NEXT:    larl %r1, .LCPI108_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, floor at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI108_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 7
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <2 x double> @llvm.experimental.constrained.floor.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %floor
+}
+
+define <3 x float> @constrained_vector_floor_v3f32() {
+; S390X-LABEL: constrained_vector_floor_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI109_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floorf at PLT
+; S390X-NEXT:    larl %r1, .LCPI109_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, floorf at PLT
+; S390X-NEXT:    larl %r1, .LCPI109_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, floorf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI109_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI109_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 7, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 7, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 7, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <3 x float> @llvm.experimental.constrained.floor.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %floor
+}
+
+define <3 x double> @constrained_vector_floor_v3f64() {
+; S390X-LABEL: constrained_vector_floor_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI110_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, floor at PLT
+; S390X-NEXT:    larl %r1, .LCPI110_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, floor at PLT
+; S390X-NEXT:    larl %r1, .LCPI110_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, floor at PLT
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_floor_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmg %v0, 2, 12
+; SZ13-NEXT:    larl %r1, .LCPI110_0
+; SZ13-NEXT:    fidbra %f4, 7, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v0, %v0, 4, 7
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    br %r14
+entry:
+  %floor = call <3 x double> @llvm.experimental.constrained.floor.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %floor
+}
+
+define <1 x float> @constrained_vector_round_v1f32() {
+; S390X-LABEL: constrained_vector_round_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI111_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, roundf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <1 x float> @llvm.experimental.constrained.round.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %round
+}
+
+define <2 x double> @constrained_vector_round_v2f64() {
+; S390X-LABEL: constrained_vector_round_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI112_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, round at PLT
+; S390X-NEXT:    larl %r1, .LCPI112_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, round at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI112_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 1
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <2 x double> @llvm.experimental.constrained.round.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %round
+}
+
+define <3 x float> @constrained_vector_round_v3f32() {
+; S390X-LABEL: constrained_vector_round_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI113_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, roundf at PLT
+; S390X-NEXT:    larl %r1, .LCPI113_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, roundf at PLT
+; S390X-NEXT:    larl %r1, .LCPI113_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, roundf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI113_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI113_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 1, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 1, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 1, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <3 x float> @llvm.experimental.constrained.round.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %round
+}
+
+
+define <3 x double> @constrained_vector_round_v3f64() {
+; S390X-LABEL: constrained_vector_round_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI114_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, round at PLT
+; S390X-NEXT:    larl %r1, .LCPI114_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, round at PLT
+; S390X-NEXT:    larl %r1, .LCPI114_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, round at PLT
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_round_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmg %v0, 2, 12
+; SZ13-NEXT:    larl %r1, .LCPI114_0
+; SZ13-NEXT:    fidbra %f4, 1, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v0, %v0, 4, 1
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    br %r14
+entry:
+  %round = call <3 x double> @llvm.experimental.constrained.round.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %round
+}
+
+define <1 x float> @constrained_vector_trunc_v1f32() {
+; S390X-LABEL: constrained_vector_trunc_v1f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -160
+; S390X-NEXT:    .cfi_def_cfa_offset 320
+; S390X-NEXT:    larl %r1, .LCPI115_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, truncf at PLT
+; S390X-NEXT:    lmg %r14, %r15, 272(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v1f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmf %v0, 2, 9
+; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
+; SZ13-NEXT:    vlr %v24, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <1 x float> @llvm.experimental.constrained.trunc.v1f32(
+                               <1 x float> <float 1.5>,
+                               metadata !"round.dynamic",
+                               metadata !"fpexcept.strict")
+  ret <1 x float> %trunc
+}
+
+define <2 x double> @constrained_vector_trunc_v2f64() {
+; S390X-LABEL: constrained_vector_trunc_v2f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -168
+; S390X-NEXT:    .cfi_def_cfa_offset 328
+; S390X-NEXT:    std %f8, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    larl %r1, .LCPI116_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, trunc at PLT
+; S390X-NEXT:    larl %r1, .LCPI116_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, trunc at PLT
+; S390X-NEXT:    ldr %f2, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ld %f8, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 280(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v2f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI116_0
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v24, %v0, 4, 5
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <2 x double> @llvm.experimental.constrained.trunc.v2f64(
+                                <2 x double> <double 1.1, double 1.9>,
+                                metadata !"round.dynamic",
+                                metadata !"fpexcept.strict")
+  ret <2 x double> %trunc
+}
+
+define <3 x float> @constrained_vector_trunc_v3f32() {
+; S390X-LABEL: constrained_vector_trunc_v3f32:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI117_0
+; S390X-NEXT:    le %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, truncf at PLT
+; S390X-NEXT:    larl %r1, .LCPI117_1
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f8, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, truncf at PLT
+; S390X-NEXT:    larl %r1, .LCPI117_2
+; S390X-NEXT:    le %f1, 0(%r1)
+; S390X-NEXT:    ler %f9, %f0
+; S390X-NEXT:    ler %f0, %f1
+; S390X-NEXT:    brasl %r14, truncf at PLT
+; S390X-NEXT:    ler %f4, %f0
+; S390X-NEXT:    ler %f0, %f8
+; S390X-NEXT:    ler %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v3f32:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    larl %r1, .LCPI117_0
+; SZ13-NEXT:    lde %f0, 0(%r1)
+; SZ13-NEXT:    larl %r1, .LCPI117_1
+; SZ13-NEXT:    lde %f2, 0(%r1)
+; SZ13-NEXT:    vgmf %v1, 2, 9
+; SZ13-NEXT:    fiebra %f0, 5, %f0, 4
+; SZ13-NEXT:    fiebra %f1, 5, %f1, 4
+; SZ13-NEXT:    fiebra %f2, 5, %f2, 4
+; SZ13-NEXT:    vmrhf %v1, %v1, %v2
+; SZ13-NEXT:    vrepf %v0, %v0, 0
+; SZ13-NEXT:    vmrhg %v24, %v1, %v0
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <3 x float> @llvm.experimental.constrained.trunc.v3f32(
+                              <3 x float> <float 1.5, float 2.5, float 3.5>,
+                              metadata !"round.dynamic",
+                              metadata !"fpexcept.strict")
+  ret <3 x float> %trunc
+}
+
+define <3 x double> @constrained_vector_trunc_v3f64() {
+; S390X-LABEL: constrained_vector_trunc_v3f64:
+; S390X:       # %bb.0: # %entry
+; S390X-NEXT:    stmg %r14, %r15, 112(%r15)
+; S390X-NEXT:    .cfi_offset %r14, -48
+; S390X-NEXT:    .cfi_offset %r15, -40
+; S390X-NEXT:    aghi %r15, -176
+; S390X-NEXT:    .cfi_def_cfa_offset 336
+; S390X-NEXT:    std %f8, 168(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    std %f9, 160(%r15) # 8-byte Folded Spill
+; S390X-NEXT:    .cfi_offset %f8, -168
+; S390X-NEXT:    .cfi_offset %f9, -176
+; S390X-NEXT:    larl %r1, .LCPI118_0
+; S390X-NEXT:    ld %f0, 0(%r1)
+; S390X-NEXT:    brasl %r14, trunc at PLT
+; S390X-NEXT:    larl %r1, .LCPI118_1
+; S390X-NEXT:    ld %f1, 0(%r1)
+; S390X-NEXT:    ldr %f8, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, trunc at PLT
+; S390X-NEXT:    larl %r1, .LCPI118_2
+; S390X-NEXT:    ldeb %f1, 0(%r1)
+; S390X-NEXT:    ldr %f9, %f0
+; S390X-NEXT:    ldr %f0, %f1
+; S390X-NEXT:    brasl %r14, trunc at PLT
+; S390X-NEXT:    ldr %f4, %f0
+; S390X-NEXT:    ldr %f0, %f8
+; S390X-NEXT:    ldr %f2, %f9
+; S390X-NEXT:    ld %f8, 168(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    ld %f9, 160(%r15) # 8-byte Folded Reload
+; S390X-NEXT:    lmg %r14, %r15, 288(%r15)
+; S390X-NEXT:    br %r14
+;
+; SZ13-LABEL: constrained_vector_trunc_v3f64:
+; SZ13:       # %bb.0: # %entry
+; SZ13-NEXT:    vgmg %v0, 2, 12
+; SZ13-NEXT:    larl %r1, .LCPI118_0
+; SZ13-NEXT:    fidbra %f4, 5, %f0, 4
+; SZ13-NEXT:    vl %v0, 0(%r1)
+; SZ13-NEXT:    vfidb %v0, %v0, 4, 5
+; SZ13-NEXT:    vrepg %v2, %v0, 1
+; SZ13-NEXT:    # kill: def $f0d killed $f0d killed $v0
+; SZ13-NEXT:    # kill: def $f2d killed $f2d killed $v2
+; SZ13-NEXT:    br %r14
+entry:
+  %trunc = call <3 x double> @llvm.experimental.constrained.trunc.v3f64(
+                          <3 x double> <double 1.1, double 1.9, double 1.5>,
+                          metadata !"round.dynamic",
+                          metadata !"fpexcept.strict")
+  ret <3 x double> %trunc
+}
+
+; Single width declarations
+declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.frem.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.pow.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.powi.v2f64(<2 x double>, i32, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.sin.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.cos.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.exp2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata)
+declare <2 x float> @llvm.experimental.constrained.fptrunc.v2f32.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.fpext.v2f64.v2f32(<2 x float>, metadata)
+declare <2 x double> @llvm.experimental.constrained.ceil.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.floor.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.round.v2f64(<2 x double>, metadata, metadata)
+declare <2 x double> @llvm.experimental.constrained.trunc.v2f64(<2 x double>, metadata, metadata)
+
+; Scalar width declarations
+declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fsub.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fmul.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fdiv.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.frem.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sqrt.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.pow.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.powi.v1f32(<1 x float>, i32, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.sin.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.cos.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.exp2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.fptrunc.v1f32.v1f64(<1 x double>, metadata, metadata)
+declare <1 x double> @llvm.experimental.constrained.fpext.v1f64.v1f32(<1 x float>, metadata)
+declare <1 x float> @llvm.experimental.constrained.ceil.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.floor.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.round.v1f32(<1 x float>, metadata, metadata)
+declare <1 x float> @llvm.experimental.constrained.trunc.v1f32(<1 x float>, metadata, metadata)
+
+; Illegal width declarations
+declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fadd.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fsub.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fsub.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fmul.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fmul.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fdiv.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fdiv.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.frem.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.frem.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sqrt.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sqrt.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.pow.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.pow.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.powi.v3f32(<3 x float>, i32, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.powi.v3f64(<3 x double>, i32, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.sin.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.sin.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.cos.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.cos.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.exp2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.exp2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log10.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log10.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.log2.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.log2.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.fptrunc.v3f32.v3f64(<3 x double>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.fpext.v3f64.v3f32(<3 x float>, metadata)
+declare <3 x float> @llvm.experimental.constrained.ceil.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.ceil.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.floor.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.floor.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.round.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.round.v3f64(<3 x double>, metadata, metadata)
+declare <3 x float> @llvm.experimental.constrained.trunc.v3f32(<3 x float>, metadata, metadata)
+declare <3 x double> @llvm.experimental.constrained.trunc.v3f64(<3 x double>, metadata, metadata)
+
+; Double width declarations
+declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fsub.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fmul.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fdiv.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.frem.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sqrt.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.pow.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.powi.v4f64(<4 x double>, i32, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.sin.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.cos.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.exp2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)
+declare <4 x float> @llvm.experimental.constrained.fptrunc.v4f32.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.fpext.v4f64.v4f32(<4 x float>, metadata)
+declare <4 x double> @llvm.experimental.constrained.ceil.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.floor.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.round.v4f64(<4 x double>, metadata, metadata)
+declare <4 x double> @llvm.experimental.constrained.trunc.v4f64(<4 x double>, metadata, metadata)




More information about the llvm-commits mailing list