[llvm] r236527 - [SystemZ] Add vector intrinsics

Ulrich Weigand ulrich.weigand at de.ibm.com
Tue May 5 12:31:09 PDT 2015


Author: uweigand
Date: Tue May  5 14:31:09 2015
New Revision: 236527

URL: http://llvm.org/viewvc/llvm-project?rev=236527&view=rev
Log:
[SystemZ] Add vector intrinsics

This adds intrinsics to allow access to all of the z13 vector instructions.
Note that instructions whose semantics can be described by standard LLVM IR
do not get any intrinsics.

For each instructions whose semantics *cannot* (fully) be described, we
define an LLVM IR target-specific intrinsic that directly maps to this
instruction.

For instructions that also set the condition code, the LLVM IR intrinsic
returns the post-instruction CC value as a second result.  Instruction
selection will attempt to detect code that compares that CC value against
constants and use the condition code directly instead.

Based on a patch by Richard Sandiford.


Added:
    llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics.ll
Modified:
    llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td
    llvm/trunk/lib/Target/SystemZ/SystemZ.h
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
    llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td
    llvm/trunk/lib/Target/SystemZ/SystemZOperators.td

Modified: llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td?rev=236527&r1=236526&r2=236527&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td Tue May  5 14:31:09 2015
@@ -11,6 +11,185 @@
 //
 //===----------------------------------------------------------------------===//
 
+class SystemZUnaryConv<string name, LLVMType result, LLVMType arg>
+  : GCCBuiltin<"__builtin_s390_" ## name>,
+    Intrinsic<[result], [arg], [IntrNoMem]>;
+
+class SystemZUnary<string name, LLVMType type>
+  : SystemZUnaryConv<name, type, type>;
+
+class SystemZUnaryConvCC<LLVMType result, LLVMType arg>
+  : Intrinsic<[result, llvm_i32_ty], [arg], [IntrNoMem]>;
+
+class SystemZUnaryCC<LLVMType type>
+  : SystemZUnaryConvCC<type, type>;
+
+class SystemZBinaryConv<string name, LLVMType result, LLVMType arg>
+  : GCCBuiltin<"__builtin_s390_" ## name>,
+    Intrinsic<[result], [arg, arg], [IntrNoMem]>;
+
+class SystemZBinary<string name, LLVMType type>
+  : SystemZBinaryConv<name, type, type>;
+
+class SystemZBinaryInt<string name, LLVMType type>
+  : GCCBuiltin<"__builtin_s390_" ## name>,
+    Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>;
+
+class SystemZBinaryConvCC<LLVMType result, LLVMType arg>
+  : Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>;
+
+class SystemZBinaryConvIntCC<LLVMType result, LLVMType arg>
+  : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty], [IntrNoMem]>;
+
+class SystemZBinaryCC<LLVMType type>
+  : SystemZBinaryConvCC<type, type>;
+
+class SystemZTernaryConv<string name, LLVMType result, LLVMType arg>
+  : GCCBuiltin<"__builtin_s390_" ## name>,
+    Intrinsic<[result], [arg, arg, result], [IntrNoMem]>;
+
+class SystemZTernary<string name, LLVMType type>
+  : SystemZTernaryConv<name, type, type>;
+
+class SystemZTernaryInt<string name, LLVMType type>
+  : GCCBuiltin<"__builtin_s390_" ## name>,
+    Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem]>;
+
+class SystemZTernaryIntCC<LLVMType type>
+  : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty], [IntrNoMem]>;
+
+class SystemZQuaternaryInt<string name, LLVMType type>
+  : GCCBuiltin<"__builtin_s390_" ## name>,
+    Intrinsic<[type], [type, type, type, llvm_i32_ty], [IntrNoMem]>;
+
+class SystemZQuaternaryIntCC<LLVMType type>
+  : Intrinsic<[type, llvm_i32_ty], [type, type, type, llvm_i32_ty],
+              [IntrNoMem]>;
+
+multiclass SystemZUnaryExtBHF<string name> {
+  def b : SystemZUnaryConv<name##"b", llvm_v8i16_ty, llvm_v16i8_ty>;
+  def h : SystemZUnaryConv<name##"h", llvm_v4i32_ty, llvm_v8i16_ty>;
+  def f : SystemZUnaryConv<name##"f", llvm_v2i64_ty, llvm_v4i32_ty>;
+}
+
+multiclass SystemZUnaryExtBHWF<string name> {
+  def b  : SystemZUnaryConv<name##"b",  llvm_v8i16_ty, llvm_v16i8_ty>;
+  def hw : SystemZUnaryConv<name##"hw", llvm_v4i32_ty, llvm_v8i16_ty>;
+  def f  : SystemZUnaryConv<name##"f",  llvm_v2i64_ty, llvm_v4i32_ty>;
+}
+
+multiclass SystemZUnaryBHF<string name> {
+  def b : SystemZUnary<name##"b", llvm_v16i8_ty>;
+  def h : SystemZUnary<name##"h", llvm_v8i16_ty>;
+  def f : SystemZUnary<name##"f", llvm_v4i32_ty>;
+}
+
+multiclass SystemZUnaryBHFG<string name> : SystemZUnaryBHF<name> {
+  def g : SystemZUnary<name##"g", llvm_v2i64_ty>;
+}
+
+multiclass SystemZUnaryCCBHF {
+  def bs : SystemZUnaryCC<llvm_v16i8_ty>;
+  def hs : SystemZUnaryCC<llvm_v8i16_ty>;
+  def fs : SystemZUnaryCC<llvm_v4i32_ty>;
+}
+
+multiclass SystemZBinaryTruncHFG<string name> {
+  def h : SystemZBinaryConv<name##"h", llvm_v16i8_ty, llvm_v8i16_ty>;
+  def f : SystemZBinaryConv<name##"f", llvm_v8i16_ty, llvm_v4i32_ty>;
+  def g : SystemZBinaryConv<name##"g", llvm_v4i32_ty, llvm_v2i64_ty>;
+}
+
+multiclass SystemZBinaryTruncCCHFG {
+  def hs : SystemZBinaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
+  def fs : SystemZBinaryConvCC<llvm_v8i16_ty, llvm_v4i32_ty>;
+  def gs : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v2i64_ty>;
+}
+
+multiclass SystemZBinaryExtBHF<string name> {
+  def b : SystemZBinaryConv<name##"b", llvm_v8i16_ty, llvm_v16i8_ty>;
+  def h : SystemZBinaryConv<name##"h", llvm_v4i32_ty, llvm_v8i16_ty>;
+  def f : SystemZBinaryConv<name##"f", llvm_v2i64_ty, llvm_v4i32_ty>;
+}
+
+multiclass SystemZBinaryExtBHFG<string name> : SystemZBinaryExtBHF<name> {
+  def g : SystemZBinaryConv<name##"g", llvm_v16i8_ty, llvm_v2i64_ty>;
+}
+
+multiclass SystemZBinaryBHF<string name> {
+  def b : SystemZBinary<name##"b", llvm_v16i8_ty>;
+  def h : SystemZBinary<name##"h", llvm_v8i16_ty>;
+  def f : SystemZBinary<name##"f", llvm_v4i32_ty>;
+}
+
+multiclass SystemZBinaryBHFG<string name> : SystemZBinaryBHF<name> {
+  def g : SystemZBinary<name##"g", llvm_v2i64_ty>;
+}
+
+multiclass SystemZBinaryIntBHFG<string name> {
+  def b : SystemZBinaryInt<name##"b", llvm_v16i8_ty>;
+  def h : SystemZBinaryInt<name##"h", llvm_v8i16_ty>;
+  def f : SystemZBinaryInt<name##"f", llvm_v4i32_ty>;
+  def g : SystemZBinaryInt<name##"g", llvm_v2i64_ty>;
+}
+
+multiclass SystemZBinaryCCBHF {
+  def bs : SystemZBinaryCC<llvm_v16i8_ty>;
+  def hs : SystemZBinaryCC<llvm_v8i16_ty>;
+  def fs : SystemZBinaryCC<llvm_v4i32_ty>;
+}
+
+multiclass SystemZCompareBHFG<string name> {
+  def bs : SystemZBinaryCC<llvm_v16i8_ty>;
+  def hs : SystemZBinaryCC<llvm_v8i16_ty>;
+  def fs : SystemZBinaryCC<llvm_v4i32_ty>;
+  def gs : SystemZBinaryCC<llvm_v2i64_ty>;
+}
+
+multiclass SystemZTernaryExtBHF<string name> {
+  def b : SystemZTernaryConv<name##"b", llvm_v8i16_ty, llvm_v16i8_ty>;
+  def h : SystemZTernaryConv<name##"h", llvm_v4i32_ty, llvm_v8i16_ty>;
+  def f : SystemZTernaryConv<name##"f", llvm_v2i64_ty, llvm_v4i32_ty>;
+}
+
+multiclass SystemZTernaryExtBHFG<string name> : SystemZTernaryExtBHF<name> {
+  def g : SystemZTernaryConv<name##"g", llvm_v16i8_ty, llvm_v2i64_ty>;
+}
+
+multiclass SystemZTernaryBHF<string name> {
+  def b : SystemZTernary<name##"b", llvm_v16i8_ty>;
+  def h : SystemZTernary<name##"h", llvm_v8i16_ty>;
+  def f : SystemZTernary<name##"f", llvm_v4i32_ty>;
+}
+
+multiclass SystemZTernaryIntBHF<string name> {
+  def b : SystemZTernaryInt<name##"b", llvm_v16i8_ty>;
+  def h : SystemZTernaryInt<name##"h", llvm_v8i16_ty>;
+  def f : SystemZTernaryInt<name##"f", llvm_v4i32_ty>;
+}
+
+multiclass SystemZTernaryIntCCBHF {
+  def bs : SystemZTernaryIntCC<llvm_v16i8_ty>;
+  def hs : SystemZTernaryIntCC<llvm_v8i16_ty>;
+  def fs : SystemZTernaryIntCC<llvm_v4i32_ty>;
+}
+
+multiclass SystemZQuaternaryIntBHF<string name> {
+  def b : SystemZQuaternaryInt<name##"b", llvm_v16i8_ty>;
+  def h : SystemZQuaternaryInt<name##"h", llvm_v8i16_ty>;
+  def f : SystemZQuaternaryInt<name##"f", llvm_v4i32_ty>;
+}
+
+multiclass SystemZQuaternaryIntBHFG<string name> : SystemZQuaternaryIntBHF<name> {
+  def g : SystemZQuaternaryInt<name##"g", llvm_v2i64_ty>;
+}
+
+multiclass SystemZQuaternaryIntCCBHF {
+  def bs : SystemZQuaternaryIntCC<llvm_v16i8_ty>;
+  def hs : SystemZQuaternaryIntCC<llvm_v8i16_ty>;
+  def fs : SystemZQuaternaryIntCC<llvm_v4i32_ty>;
+}
+
 //===----------------------------------------------------------------------===//
 //
 // Transactional-execution intrinsics
@@ -44,3 +223,154 @@ let TargetPrefix = "s390" in {
                               Intrinsic<[], [llvm_i32_ty]>;
 }
 
+//===----------------------------------------------------------------------===//
+//
+// Vector intrinsics
+//
+//===----------------------------------------------------------------------===//
+
+let TargetPrefix = "s390" in {
+  def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">,
+                      Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                [IntrNoMem]>;
+
+  def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">,
+                      Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
+                                [IntrReadArgMem]>;
+
+  def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">,
+                     Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
+                               [IntrReadArgMem]>;
+
+  def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">,
+                      Intrinsic<[llvm_v2i64_ty],
+                                [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
+                                [IntrNoMem]>;
+
+  def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">,
+                       Intrinsic<[llvm_v16i8_ty],
+                                 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+                                 [IntrNoMem]>;
+
+  defm int_s390_vpks : SystemZBinaryTruncHFG<"vpks">;
+  defm int_s390_vpks : SystemZBinaryTruncCCHFG;
+
+  defm int_s390_vpkls : SystemZBinaryTruncHFG<"vpkls">;
+  defm int_s390_vpkls : SystemZBinaryTruncCCHFG;
+
+  def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">,
+                      Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
+                                // In fact write-only but there's no property
+                                // for that.
+                                [IntrReadWriteArgMem]>;
+
+  defm int_s390_vupl  : SystemZUnaryExtBHWF<"vupl">;
+  defm int_s390_vupll : SystemZUnaryExtBHF<"vupll">;
+
+  defm int_s390_vuph  : SystemZUnaryExtBHF<"vuph">;
+  defm int_s390_vuplh : SystemZUnaryExtBHF<"vuplh">;
+
+  defm int_s390_vacc : SystemZBinaryBHFG<"vacc">;
+
+  def int_s390_vaq    : SystemZBinary<"vaq",     llvm_v16i8_ty>;
+  def int_s390_vacq   : SystemZTernary<"vacq",   llvm_v16i8_ty>;
+  def int_s390_vaccq  : SystemZBinary<"vaccq",   llvm_v16i8_ty>;
+  def int_s390_vacccq : SystemZTernary<"vacccq", llvm_v16i8_ty>;
+
+  defm int_s390_vavg  : SystemZBinaryBHFG<"vavg">;
+  defm int_s390_vavgl : SystemZBinaryBHFG<"vavgl">;
+
+  def int_s390_vcksm : SystemZBinary<"vcksm", llvm_v4i32_ty>;
+
+  defm int_s390_vgfm  : SystemZBinaryExtBHFG<"vgfm">;
+  defm int_s390_vgfma : SystemZTernaryExtBHFG<"vgfma">;
+
+  defm int_s390_vmah  : SystemZTernaryBHF<"vmah">;
+  defm int_s390_vmalh : SystemZTernaryBHF<"vmalh">;
+  defm int_s390_vmae  : SystemZTernaryExtBHF<"vmae">;
+  defm int_s390_vmale : SystemZTernaryExtBHF<"vmale">;
+  defm int_s390_vmao  : SystemZTernaryExtBHF<"vmao">;
+  defm int_s390_vmalo : SystemZTernaryExtBHF<"vmalo">;
+
+  defm int_s390_vmh  : SystemZBinaryBHF<"vmh">;
+  defm int_s390_vmlh : SystemZBinaryBHF<"vmlh">;
+  defm int_s390_vme  : SystemZBinaryExtBHF<"vme">;
+  defm int_s390_vmle : SystemZBinaryExtBHF<"vmle">;
+  defm int_s390_vmo  : SystemZBinaryExtBHF<"vmo">;
+  defm int_s390_vmlo : SystemZBinaryExtBHF<"vmlo">;
+
+  defm int_s390_verllv : SystemZBinaryBHFG<"verllv">;
+  defm int_s390_verll  : SystemZBinaryIntBHFG<"verll">;
+  defm int_s390_verim  : SystemZQuaternaryIntBHFG<"verim">;
+
+  def int_s390_vsl   : SystemZBinary<"vsl",   llvm_v16i8_ty>;
+  def int_s390_vslb  : SystemZBinary<"vslb",  llvm_v16i8_ty>;
+  def int_s390_vsra  : SystemZBinary<"vsra",  llvm_v16i8_ty>;
+  def int_s390_vsrab : SystemZBinary<"vsrab", llvm_v16i8_ty>;
+  def int_s390_vsrl  : SystemZBinary<"vsrl",  llvm_v16i8_ty>;
+  def int_s390_vsrlb : SystemZBinary<"vsrlb", llvm_v16i8_ty>;
+
+  def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">,
+                       Intrinsic<[llvm_v16i8_ty],
+                                 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+                                 [IntrNoMem]>;
+
+  defm int_s390_vscbi : SystemZBinaryBHFG<"vscbi">;
+
+  def int_s390_vsq     : SystemZBinary<"vsq",      llvm_v16i8_ty>;
+  def int_s390_vsbiq   : SystemZTernary<"vsbiq",   llvm_v16i8_ty>;
+  def int_s390_vscbiq  : SystemZBinary<"vscbiq",   llvm_v16i8_ty>;
+  def int_s390_vsbcbiq : SystemZTernary<"vsbcbiq", llvm_v16i8_ty>;
+
+  def int_s390_vsumb : SystemZBinaryConv<"vsumb", llvm_v4i32_ty, llvm_v16i8_ty>;
+  def int_s390_vsumh : SystemZBinaryConv<"vsumh", llvm_v4i32_ty, llvm_v8i16_ty>;
+
+  def int_s390_vsumgh : SystemZBinaryConv<"vsumgh", llvm_v2i64_ty,
+                                          llvm_v8i16_ty>;
+  def int_s390_vsumgf : SystemZBinaryConv<"vsumgf", llvm_v2i64_ty,
+                                          llvm_v4i32_ty>;
+
+  def int_s390_vsumqf : SystemZBinaryConv<"vsumqf", llvm_v16i8_ty,
+                                          llvm_v4i32_ty>;
+  def int_s390_vsumqg : SystemZBinaryConv<"vsumqg", llvm_v16i8_ty,
+                                          llvm_v2i64_ty>;
+
+  def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>;
+
+  defm int_s390_vceq : SystemZCompareBHFG<"vceq">;
+  defm int_s390_vch  : SystemZCompareBHFG<"vch">;
+  defm int_s390_vchl : SystemZCompareBHFG<"vchl">;
+
+  defm int_s390_vfae  : SystemZTernaryIntBHF<"vfae">;
+  defm int_s390_vfae  : SystemZTernaryIntCCBHF;
+  defm int_s390_vfaez : SystemZTernaryIntBHF<"vfaez">;
+  defm int_s390_vfaez : SystemZTernaryIntCCBHF;
+
+  defm int_s390_vfee  : SystemZBinaryBHF<"vfee">;
+  defm int_s390_vfee  : SystemZBinaryCCBHF;
+  defm int_s390_vfeez : SystemZBinaryBHF<"vfeez">;
+  defm int_s390_vfeez : SystemZBinaryCCBHF;
+
+  defm int_s390_vfene  : SystemZBinaryBHF<"vfene">;
+  defm int_s390_vfene  : SystemZBinaryCCBHF;
+  defm int_s390_vfenez : SystemZBinaryBHF<"vfenez">;
+  defm int_s390_vfenez : SystemZBinaryCCBHF;
+
+  defm int_s390_vistr : SystemZUnaryBHF<"vistr">;
+  defm int_s390_vistr : SystemZUnaryCCBHF;
+
+  defm int_s390_vstrc  : SystemZQuaternaryIntBHF<"vstrc">;
+  defm int_s390_vstrc  : SystemZQuaternaryIntCCBHF;
+  defm int_s390_vstrcz : SystemZQuaternaryIntBHF<"vstrcz">;
+  defm int_s390_vstrcz : SystemZQuaternaryIntCCBHF;
+
+  def int_s390_vfcedbs  : SystemZBinaryConvCC<llvm_v2i64_ty, llvm_v2f64_ty>;
+  def int_s390_vfchdbs  : SystemZBinaryConvCC<llvm_v2i64_ty, llvm_v2f64_ty>;
+  def int_s390_vfchedbs : SystemZBinaryConvCC<llvm_v2i64_ty, llvm_v2f64_ty>;
+
+  def int_s390_vftcidb : SystemZBinaryConvIntCC<llvm_v2i64_ty, llvm_v2f64_ty>;
+
+  def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty],
+                                 [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
+                                 [IntrNoMem]>;
+}

Modified: llvm/trunk/lib/Target/SystemZ/SystemZ.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZ.h?rev=236527&r1=236526&r2=236527&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZ.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZ.h Tue May  5 14:31:09 2015
@@ -80,6 +80,13 @@ const unsigned CCMASK_TEND_TX   = CCMASK
 const unsigned CCMASK_TEND_NOTX = CCMASK_2;
 const unsigned CCMASK_TEND      = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
 
+// Condition-code mask assignments for vector comparisons (and similar
+// operations).
+const unsigned CCMASK_VCMP_ALL       = CCMASK_0;
+const unsigned CCMASK_VCMP_MIXED     = CCMASK_1;
+const unsigned CCMASK_VCMP_NONE      = CCMASK_3;
+const unsigned CCMASK_VCMP           = CCMASK_0 | CCMASK_1 | CCMASK_3;
+
 // The position of the low CC bit in an IPM result.
 const unsigned IPM_CC = 28;
 

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=236527&r1=236526&r2=236527&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Tue May  5 14:31:09 2015
@@ -440,6 +440,7 @@ SystemZTargetLowering::SystemZTargetLowe
 
   // Handle intrinsics.
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
   // We want to use MVC in preference to even a single load/store pair.
   MaxStoresPerMemcpy = 0;
@@ -1253,6 +1254,143 @@ static bool isIntrinsicWithCCAndChain(SD
   }
 }
 
+// Return true if Op is an intrinsic node without chain that returns the
+// CC value as its final argument.  Provide the associated SystemZISD
+// opcode and the mask of valid CC values if so.
+static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  switch (Id) {
+  case Intrinsic::s390_vpkshs:
+  case Intrinsic::s390_vpksfs:
+  case Intrinsic::s390_vpksgs:
+    Opcode = SystemZISD::PACKS_CC;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vpklshs:
+  case Intrinsic::s390_vpklsfs:
+  case Intrinsic::s390_vpklsgs:
+    Opcode = SystemZISD::PACKLS_CC;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vceqbs:
+  case Intrinsic::s390_vceqhs:
+  case Intrinsic::s390_vceqfs:
+  case Intrinsic::s390_vceqgs:
+    Opcode = SystemZISD::VICMPES;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vchbs:
+  case Intrinsic::s390_vchhs:
+  case Intrinsic::s390_vchfs:
+  case Intrinsic::s390_vchgs:
+    Opcode = SystemZISD::VICMPHS;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vchlbs:
+  case Intrinsic::s390_vchlhs:
+  case Intrinsic::s390_vchlfs:
+  case Intrinsic::s390_vchlgs:
+    Opcode = SystemZISD::VICMPHLS;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vtm:
+    Opcode = SystemZISD::VTM;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vfaebs:
+  case Intrinsic::s390_vfaehs:
+  case Intrinsic::s390_vfaefs:
+    Opcode = SystemZISD::VFAE_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfaezbs:
+  case Intrinsic::s390_vfaezhs:
+  case Intrinsic::s390_vfaezfs:
+    Opcode = SystemZISD::VFAEZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfeebs:
+  case Intrinsic::s390_vfeehs:
+  case Intrinsic::s390_vfeefs:
+    Opcode = SystemZISD::VFEE_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfeezbs:
+  case Intrinsic::s390_vfeezhs:
+  case Intrinsic::s390_vfeezfs:
+    Opcode = SystemZISD::VFEEZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfenebs:
+  case Intrinsic::s390_vfenehs:
+  case Intrinsic::s390_vfenefs:
+    Opcode = SystemZISD::VFENE_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfenezbs:
+  case Intrinsic::s390_vfenezhs:
+  case Intrinsic::s390_vfenezfs:
+    Opcode = SystemZISD::VFENEZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vistrbs:
+  case Intrinsic::s390_vistrhs:
+  case Intrinsic::s390_vistrfs:
+    Opcode = SystemZISD::VISTR_CC;
+    CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
+    return true;
+
+  case Intrinsic::s390_vstrcbs:
+  case Intrinsic::s390_vstrchs:
+  case Intrinsic::s390_vstrcfs:
+    Opcode = SystemZISD::VSTRC_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vstrczbs:
+  case Intrinsic::s390_vstrczhs:
+  case Intrinsic::s390_vstrczfs:
+    Opcode = SystemZISD::VSTRCZ_CC;
+    CCValid = SystemZ::CCMASK_ANY;
+    return true;
+
+  case Intrinsic::s390_vfcedbs:
+    Opcode = SystemZISD::VFCMPES;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vfchdbs:
+    Opcode = SystemZISD::VFCMPHS;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vfchedbs:
+    Opcode = SystemZISD::VFCMPHES;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  case Intrinsic::s390_vftcidb:
+    Opcode = SystemZISD::VFTCI;
+    CCValid = SystemZ::CCMASK_VCMP;
+    return true;
+
+  default:
+    return false;
+  }
+}
+
 // Emit an intrinsic with chain with a glued value instead of its CC result.
 static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
                                              unsigned Opcode) {
@@ -1273,6 +1411,23 @@ static SDValue emitIntrinsicWithChainAnd
   return Intr;
 }
 
+// Emit an intrinsic with a glued value instead of its CC result.
+static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
+                                     unsigned Opcode) {
+  // Copy all operands except the intrinsic ID.
+  unsigned NumOps = Op.getNumOperands();
+  SmallVector<SDValue, 6> Ops;
+  Ops.reserve(NumOps - 1);
+  for (unsigned I = 1; I < NumOps; ++I)
+    Ops.push_back(Op.getOperand(I));
+
+  if (Op->getNumValues() == 1)
+    return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
+  assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
+  SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
+  return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
+}
+
 // CC is a comparison that will be implemented using an integer or
 // floating-point comparison.  Return the condition code mask for
 // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
@@ -1876,6 +2031,10 @@ static Comparison getCmp(SelectionDAG &D
         CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
         isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
       return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
+    if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
+        CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
+        isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
+      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
   }
   Comparison C(CmpOp0, CmpOp1);
   C.CCMask = CCMaskForCondCode(Cond);
@@ -1924,6 +2083,9 @@ static SDValue emitCmp(SelectionDAG &DAG
     case ISD::INTRINSIC_W_CHAIN:
       Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
       break;
+    case ISD::INTRINSIC_WO_CHAIN:
+      Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
+      break;
     default:
       llvm_unreachable("Invalid comparison operands");
     }
@@ -3058,6 +3220,67 @@ SystemZTargetLowering::lowerINTRINSIC_W_
   return SDValue();
 }
 
+SDValue
+SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
+                                               SelectionDAG &DAG) const {
+  unsigned Opcode, CCValid;
+  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
+    SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
+    SDValue CC = getCCResult(DAG, Glued.getNode());
+    if (Op->getNumValues() == 1)
+      return CC;
+    assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
+    return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
+		    Glued, CC);
+  }
+
+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+  switch (Id) {
+  case Intrinsic::s390_vpdi:
+    return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+  case Intrinsic::s390_vperm:
+    return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
+
+  case Intrinsic::s390_vuphb:
+  case Intrinsic::s390_vuphh:
+  case Intrinsic::s390_vuphf:
+    return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1));
+
+  case Intrinsic::s390_vuplhb:
+  case Intrinsic::s390_vuplhh:
+  case Intrinsic::s390_vuplhf:
+    return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1));
+
+  case Intrinsic::s390_vuplb:
+  case Intrinsic::s390_vuplhw:
+  case Intrinsic::s390_vuplf:
+    return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1));
+
+  case Intrinsic::s390_vupllb:
+  case Intrinsic::s390_vupllh:
+  case Intrinsic::s390_vupllf:
+    return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1));
+
+  case Intrinsic::s390_vsumb:
+  case Intrinsic::s390_vsumh:
+  case Intrinsic::s390_vsumgh:
+  case Intrinsic::s390_vsumgf:
+  case Intrinsic::s390_vsumqf:
+  case Intrinsic::s390_vsumqg:
+    return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
+                       Op.getOperand(1), Op.getOperand(2));
+  }
+
+  return SDValue();
+}
+
 namespace {
 // Says that SystemZISD operation Opcode can be used to perform the equivalent
 // of a VPERM with permute vector Bytes.  If Opcode takes three operands,
@@ -4117,6 +4340,8 @@ SDValue SystemZTargetLowering::LowerOper
     return lowerPREFETCH(Op, DAG);
   case ISD::INTRINSIC_W_CHAIN:
     return lowerINTRINSIC_W_CHAIN(Op, DAG);
+  case ISD::INTRINSIC_WO_CHAIN:
+    return lowerINTRINSIC_WO_CHAIN(Op, DAG);
   case ISD::BUILD_VECTOR:
     return lowerBUILD_VECTOR(Op, DAG);
   case ISD::VECTOR_SHUFFLE:
@@ -4195,6 +4420,8 @@ const char *SystemZTargetLowering::getTa
     OPCODE(PERMUTE_DWORDS);
     OPCODE(PERMUTE);
     OPCODE(PACK);
+    OPCODE(PACKS_CC);
+    OPCODE(PACKLS_CC);
     OPCODE(UNPACK_HIGH);
     OPCODE(UNPACKL_HIGH);
     OPCODE(UNPACK_LOW);
@@ -4206,11 +4433,28 @@ const char *SystemZTargetLowering::getTa
     OPCODE(VICMPE);
     OPCODE(VICMPH);
     OPCODE(VICMPHL);
+    OPCODE(VICMPES);
+    OPCODE(VICMPHS);
+    OPCODE(VICMPHLS);
     OPCODE(VFCMPE);
     OPCODE(VFCMPH);
     OPCODE(VFCMPHE);
+    OPCODE(VFCMPES);
+    OPCODE(VFCMPHS);
+    OPCODE(VFCMPHES);
+    OPCODE(VFTCI);
     OPCODE(VEXTEND);
     OPCODE(VROUND);
+    OPCODE(VTM);
+    OPCODE(VFAE_CC);
+    OPCODE(VFAEZ_CC);
+    OPCODE(VFEE_CC);
+    OPCODE(VFEEZ_CC);
+    OPCODE(VFENE_CC);
+    OPCODE(VFENEZ_CC);
+    OPCODE(VISTR_CC);
+    OPCODE(VSTRC_CC);
+    OPCODE(VSTRCZ_CC);
     OPCODE(ATOMIC_SWAPW);
     OPCODE(ATOMIC_LOADW_ADD);
     OPCODE(ATOMIC_LOADW_SUB);

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h?rev=236527&r1=236526&r2=236527&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h Tue May  5 14:31:09 2015
@@ -201,6 +201,11 @@ enum {
   // Pack vector operands 0 and 1 into a single vector with half-sized elements.
   PACK,
 
+  // Likewise, but saturate the result and set CC.  PACKS_CC does signed
+  // saturation and PACKLS_CC does unsigned saturation.
+  PACKS_CC,
+  PACKLS_CC,
+
   // Unpack the first half of vector operand 0 into double-sized elements.
   // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
   UNPACK_HIGH,
@@ -228,6 +233,11 @@ enum {
   VICMPH,
   VICMPHL,
 
+  // Likewise, but also set the condition codes on the result.
+  VICMPES,
+  VICMPHS,
+  VICMPHLS,
+
   // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
   // vector result.  VFCMPE is for "ordered and equal", VFCMPH for "ordered and
   // greater than" and VFCMPHE for "ordered and greater than or equal to".
@@ -235,6 +245,14 @@ enum {
   VFCMPH,
   VFCMPHE,
 
+  // Likewise, but also set the condition codes on the result.
+  VFCMPES,
+  VFCMPHS,
+  VFCMPHES,
+
+  // Test floating-point data class for vectors.
+  VFTCI,
+
   // Extend the even f32 elements of vector operand 0 to produce a vector
   // of f64 elements.
   VEXTEND,
@@ -243,6 +261,20 @@ enum {
   // even elements of the result.
   VROUND,
 
+  // AND the two vector operands together and set CC based on the result.
+  VTM,
+
+  // String operations that set CC as a side-effect.
+  VFAE_CC,
+  VFAEZ_CC,
+  VFEE_CC,
+  VFEEZ_CC,
+  VFENE_CC,
+  VFENEZ_CC,
+  VISTR_CC,
+  VSTRC_CC,
+  VSTRCZ_CC,
+
   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
   // ATOMIC_LOAD_<op>.
   //
@@ -438,6 +470,7 @@ private:
   SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+  SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
   SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;

Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td?rev=236527&r1=236526&r2=236527&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td Tue May  5 14:31:09 2015
@@ -100,17 +100,20 @@ let Predicates = [FeatureVector] in {
   def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>;
 
   // Load to block boundary.  The number of loaded bytes is only known
-  // at run time.
-  def VLBB : BinaryVRX<"vlbb", 0xE707, null_frag, v128any, 0>;
+  // at run time.  The instruction is really polymorphic, but v128b matches
+  // the return type of the associated intrinsic.
+  def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>;
 
   // Load count to block boundary.
   let Defs = [CC] in
     def LCBB : InstRXE<0xE727, (outs GR32:$R1),
                                (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
-                       "lcbb\t$R1, $XBD2, $M3", []>;
+                       "lcbb\t$R1, $XBD2, $M3",
+                       [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
+                                                      imm32zx4:$M3))]>;
 
   // Load with length.  The number of loaded bytes is only known at run time.
-  def VLL : BinaryVRSb<"vll", 0xE737, null_frag, 0>;
+  def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
 
   // Load multiple.
   def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
@@ -185,7 +188,7 @@ let Predicates = [FeatureVector] in {
   def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>;
 
   // Store with length.  The number of stored bytes is only known at run time.
-  def VSTL : StoreLengthVRSb<"vstl", 0xE73F, null_frag, 0>;
+  def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
 
   // Store multiple.
   def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>;
@@ -266,19 +269,19 @@ let Predicates = [FeatureVector] in {
   def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>;
 
   // Pack saturate.
-  defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, null_frag, null_frag,
+  defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc,
                                v128b, v128h, 1>;
-  defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, null_frag, null_frag,
+  defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc,
                                v128h, v128f, 2>;
-  defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, null_frag, null_frag,
+  defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc,
                                v128f, v128g, 3>;
 
   // Pack saturate logical.
-  defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, null_frag, null_frag,
+  defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc,
                                 v128b, v128h, 1>;
-  defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, null_frag, null_frag,
+  defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc,
                                 v128h, v128f, 2>;
-  defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, null_frag, null_frag,
+  defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc,
                                 v128f, v128g, 3>;
 
   // Sign-extend to doubleword.
@@ -344,20 +347,20 @@ let Predicates = [FeatureVector] in {
   def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
   def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
   def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
-  def VAQ : BinaryVRRc<"vaq", 0xE7F3, null_frag, v128q, v128q, 4>;
+  def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
 
   // Add compute carry.
-  def VACCB : BinaryVRRc<"vaccb", 0xE7F1, null_frag, v128b, v128b, 0>;
-  def VACCH : BinaryVRRc<"vacch", 0xE7F1, null_frag, v128h, v128h, 1>;
-  def VACCF : BinaryVRRc<"vaccf", 0xE7F1, null_frag, v128f, v128f, 2>;
-  def VACCG : BinaryVRRc<"vaccg", 0xE7F1, null_frag, v128g, v128g, 3>;
-  def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, null_frag, v128q, v128q, 4>;
+  def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
+  def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
+  def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
+  def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
+  def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
 
   // Add with carry.
-  def VACQ : TernaryVRRd<"vacq", 0xE7BB, null_frag, v128q, v128q, 4>;
+  def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
 
   // Add with carry compute carry.
-  def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, null_frag, v128q, v128q, 4>;
+  def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
 
   // And.
   def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
@@ -366,19 +369,19 @@ let Predicates = [FeatureVector] in {
   def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>;
 
   // Average.
-  def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, null_frag, v128b, v128b, 0>;
-  def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, null_frag, v128h, v128h, 1>;
-  def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, null_frag, v128f, v128f, 2>;
-  def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, null_frag, v128g, v128g, 3>;
+  def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
+  def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
+  def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
+  def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
 
   // Average logical.
-  def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, null_frag, v128b, v128b, 0>;
-  def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, null_frag, v128h, v128h, 1>;
-  def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, null_frag, v128f, v128f, 2>;
-  def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, null_frag, v128g, v128g, 3>;
+  def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
+  def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
+  def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
+  def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
 
   // Checksum.
-  def VCKSM : BinaryVRRc<"vcksm", 0xE766, null_frag, v128any, v128any>;
+  def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>;
 
   // Count leading zeros.
   def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>;
@@ -396,16 +399,16 @@ let Predicates = [FeatureVector] in {
   def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
 
   // Galois field multiply sum.
-  def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, null_frag, v128b, v128b, 0>;
-  def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, null_frag, v128h, v128h, 1>;
-  def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, null_frag, v128f, v128f, 2>;
-  def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, null_frag, v128g, v128g, 3>;
+  def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>;
+  def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>;
+  def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>;
+  def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>;
 
   // Galois field multiply sum and accumulate.
-  def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, null_frag, v128b, v128b, 0>;
-  def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, null_frag, v128h, v128h, 1>;
-  def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, null_frag, v128f, v128f, 2>;
-  def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, null_frag, v128g, v128g, 3>;
+  def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>;
+  def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>;
+  def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>;
+  def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>;
 
   // Load complement.
   def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>;
@@ -449,44 +452,44 @@ let Predicates = [FeatureVector] in {
   def VMALF  : TernaryVRRd<"vmalf",  0xE7AA, z_muladd, v128f, v128f, 2>;
 
   // Multiply and add high.
-  def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, null_frag, v128b, v128b, 0>;
-  def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, null_frag, v128h, v128h, 1>;
-  def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, null_frag, v128f, v128f, 2>;
+  def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
+  def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
+  def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
 
   // Multiply and add logical high.
-  def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, null_frag, v128b, v128b, 0>;
-  def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, null_frag, v128h, v128h, 1>;
-  def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, null_frag, v128f, v128f, 2>;
+  def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
+  def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
+  def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
 
   // Multiply and add even.
-  def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, null_frag, v128h, v128b, 0>;
-  def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, null_frag, v128f, v128h, 1>;
-  def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, null_frag, v128g, v128f, 2>;
+  def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
+  def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
+  def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
 
   // Multiply and add logical even.
-  def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, null_frag, v128h, v128b, 0>;
-  def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, null_frag, v128f, v128h, 1>;
-  def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, null_frag, v128g, v128f, 2>;
+  def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
+  def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
+  def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
 
   // Multiply and add odd.
-  def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, null_frag, v128h, v128b, 0>;
-  def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, null_frag, v128f, v128h, 1>;
-  def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, null_frag, v128g, v128f, 2>;
+  def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
+  def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
+  def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
 
   // Multiply and add logical odd.
-  def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, null_frag, v128h, v128b, 0>;
-  def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, null_frag, v128f, v128h, 1>;
-  def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, null_frag, v128g, v128f, 2>;
+  def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
+  def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
+  def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
 
   // Multiply high.
-  def VMHB : BinaryVRRc<"vmhb", 0xE7A3, null_frag, v128b, v128b, 0>;
-  def VMHH : BinaryVRRc<"vmhh", 0xE7A3, null_frag, v128h, v128h, 1>;
-  def VMHF : BinaryVRRc<"vmhf", 0xE7A3, null_frag, v128f, v128f, 2>;
+  def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
+  def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
+  def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
 
   // Multiply logical high.
-  def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, null_frag, v128b, v128b, 0>;
-  def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, null_frag, v128h, v128h, 1>;
-  def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, null_frag, v128f, v128f, 2>;
+  def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
+  def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
+  def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
 
   // Multiply low.
   def VMLB  : BinaryVRRc<"vmlb",  0xE7A2, mul, v128b, v128b, 0>;
@@ -494,24 +497,24 @@ let Predicates = [FeatureVector] in {
   def VMLF  : BinaryVRRc<"vmlf",  0xE7A2, mul, v128f, v128f, 2>;
 
   // Multiply even.
-  def VMEB : BinaryVRRc<"vmeb", 0xE7A6, null_frag, v128h, v128b, 0>;
-  def VMEH : BinaryVRRc<"vmeh", 0xE7A6, null_frag, v128f, v128h, 1>;
-  def VMEF : BinaryVRRc<"vmef", 0xE7A6, null_frag, v128g, v128f, 2>;
+  def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
+  def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
+  def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
 
   // Multiply logical even.
-  def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, null_frag, v128h, v128b, 0>;
-  def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, null_frag, v128f, v128h, 1>;
-  def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, null_frag, v128g, v128f, 2>;
+  def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
+  def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
+  def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
 
   // Multiply odd.
-  def VMOB : BinaryVRRc<"vmob", 0xE7A7, null_frag, v128h, v128b, 0>;
-  def VMOH : BinaryVRRc<"vmoh", 0xE7A7, null_frag, v128f, v128h, 1>;
-  def VMOF : BinaryVRRc<"vmof", 0xE7A7, null_frag, v128g, v128f, 2>;
+  def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
+  def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
+  def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
 
   // Multiply logical odd.
-  def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, null_frag, v128h, v128b, 0>;
-  def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, null_frag, v128f, v128h, 1>;
-  def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, null_frag, v128g, v128f, 2>;
+  def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
+  def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
+  def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
 
   // Nor.
   def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
@@ -524,22 +527,26 @@ let Predicates = [FeatureVector] in {
   def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>;
 
   // Element rotate left logical (with vector shift amount).
-  def VERLLVB : BinaryVRRc<"verllvb", 0xE773, null_frag, v128b, v128b, 0>;
-  def VERLLVH : BinaryVRRc<"verllvh", 0xE773, null_frag, v128h, v128h, 1>;
-  def VERLLVF : BinaryVRRc<"verllvf", 0xE773, null_frag, v128f, v128f, 2>;
-  def VERLLVG : BinaryVRRc<"verllvg", 0xE773, null_frag, v128g, v128g, 3>;
+  def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
+                           v128b, v128b, 0>;
+  def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
+                           v128h, v128h, 1>;
+  def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf,
+                           v128f, v128f, 2>;
+  def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg,
+                           v128g, v128g, 3>;
 
   // Element rotate left logical (with scalar shift amount).
-  def VERLLB : BinaryVRSa<"verllb", 0xE733, null_frag, v128b, v128b, 0>;
-  def VERLLH : BinaryVRSa<"verllh", 0xE733, null_frag, v128h, v128h, 1>;
-  def VERLLF : BinaryVRSa<"verllf", 0xE733, null_frag, v128f, v128f, 2>;
-  def VERLLG : BinaryVRSa<"verllg", 0xE733, null_frag, v128g, v128g, 3>;
+  def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
+  def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
+  def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
+  def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
 
   // Element rotate and insert under mask.
-  def VERIMB : QuaternaryVRId<"verimb", 0xE772, null_frag, v128b, v128b, 0>;
-  def VERIMH : QuaternaryVRId<"verimh", 0xE772, null_frag, v128h, v128h, 1>;
-  def VERIMF : QuaternaryVRId<"verimf", 0xE772, null_frag, v128f, v128f, 2>;
-  def VERIMG : QuaternaryVRId<"verimg", 0xE772, null_frag, v128g, v128g, 3>;
+  def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>;
+  def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>;
+  def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>;
+  def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>;
 
   // Element shift left (with vector shift amount).
   def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>;
@@ -578,45 +585,48 @@ let Predicates = [FeatureVector] in {
   def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>;
 
   // Shift left.
-  def VSL : BinaryVRRc<"vsl", 0xE774, null_frag, v128b, v128b>;
+  def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>;
 
   // Shift left by byte.
-  def VSLB : BinaryVRRc<"vslb", 0xE775, null_frag, v128b, v128b>;
+  def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>;
 
   // Shift left double by byte.
   def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
+  def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
+            (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
 
   // Shift right arithmetic.
-  def VSRA : BinaryVRRc<"vsra", 0xE77E, null_frag, v128b, v128b>;
+  def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
 
   // Shift right arithmetic by byte.
-  def VSRAB : BinaryVRRc<"vsrab", 0xE77F, null_frag, v128b, v128b>;
+  def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>;
 
   // Shift right logical.
-  def VSRL : BinaryVRRc<"vsrl", 0xE77C, null_frag, v128b, v128b>;
+  def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>;
 
   // Shift right logical by byte.
-  def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, null_frag, v128b, v128b>;
+  def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
 
   // Subtract.
   def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
   def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>;
   def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>;
   def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>;
-  def VSQ : BinaryVRRc<"vsq", 0xE7F7, null_frag, v128q, v128q, 4>;
+  def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>;
 
   // Subtract compute borrow indication.
-  def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, null_frag, v128b, v128b, 0>;
-  def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, null_frag, v128h, v128h, 1>;
-  def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, null_frag, v128f, v128f, 2>;
-  def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, null_frag, v128g, v128g, 3>;
-  def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, null_frag, v128q, v128q, 4>;
+  def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>;
+  def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>;
+  def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>;
+  def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>;
+  def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>;
 
   // Subtract with borrow indication.
-  def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, null_frag, v128q, v128q, 4>;
+  def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>;
 
   // Subtract with borrow compute borrow indication.
-  def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, null_frag, v128q, v128q, 4>;
+  def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq,
+                            v128q, v128q, 4>;
 
   // Sum across doubleword.
   def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>;
@@ -742,38 +752,38 @@ let Predicates = [FeatureVector] in {
   }
 
   // Compare equal.
-  defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, null_frag,
+  defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes,
                                v128b, v128b, 0>;
-  defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, null_frag,
+  defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes,
                                v128h, v128h, 1>;
-  defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, null_frag,
+  defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes,
                                v128f, v128f, 2>;
-  defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, null_frag,
+  defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes,
                                v128g, v128g, 3>;
 
   // Compare high.
-  defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, null_frag,
+  defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs,
                               v128b, v128b, 0>;
-  defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, null_frag,
+  defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs,
                               v128h, v128h, 1>;
-  defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, null_frag,
+  defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs,
                               v128f, v128f, 2>;
-  defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, null_frag,
+  defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs,
                               v128g, v128g, 3>;
 
   // Compare high logical.
-  defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, null_frag,
+  defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls,
                                v128b, v128b, 0>;
-  defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, null_frag,
+  defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls,
                                v128h, v128h, 1>;
-  defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, null_frag,
+  defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls,
                                v128f, v128f, 2>;
-  defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, null_frag,
+  defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls,
                                v128g, v128g, 3>;
 
   // Test under mask.
   let Defs = [CC] in
-    def VTM : CompareVRRa<"vtm", 0xE7D8, null_frag, v128any, 0>;
+    def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>;
 }
 
 //===----------------------------------------------------------------------===//
@@ -823,7 +833,7 @@ let Predicates = [FeatureVector] in {
   def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
 
   // Load FP integer.
-  def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, null_frag, v128db, v128db, 3, 0>;
+  def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
   def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
   defm : VectorRounding<VFIDB, v128db>;
   defm : VectorRounding<WFIDB, v64db>;
@@ -872,7 +882,7 @@ let Predicates = [FeatureVector] in {
 
   // Test data class immediate.
   let Defs = [CC] in {
-    def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, null_frag, v128g, v128db, 3, 0>;
+    def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
     def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
   }
 }
@@ -891,19 +901,19 @@ let Predicates = [FeatureVector] in {
     def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
 
   // Compare equal.
-  defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, null_frag,
+  defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
                                 v128g, v128db, 3, 0>;
   defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
                                 v64g, v64db, 3, 8>;
 
   // Compare high.
-  defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, null_frag,
+  defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
                                 v128g, v128db, 3, 0>;
   defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
                                 v64g, v64db, 3, 8>;
 
   // Compare high or equal.
-  defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, null_frag,
+  defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
                                  v128g, v128db, 3, 0>;
   defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
                                  v64g, v64db, 3, 8>;
@@ -1026,62 +1036,62 @@ let AddedComplexity = 4 in {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [FeatureVector] in {
-  defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, null_frag, null_frag,
+  defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc,
                                 v128b, v128b, 0, 0>;
-  defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, null_frag, null_frag,
+  defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc,
                                 v128h, v128h, 1, 0>;
-  defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, null_frag, null_frag,
+  defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc,
                                 v128f, v128f, 2, 0>;
-  defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, null_frag, null_frag,
+  defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc,
                                  v128b, v128b, 0, 2>;
-  defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, null_frag, null_frag,
+  defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc,
                                  v128h, v128h, 1, 2>;
-  defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, null_frag, null_frag,
+  defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc,
                                  v128f, v128f, 2, 2>;
 
-  defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, null_frag, null_frag,
+  defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc,
                                v128b, v128b, 0, 0, 1>;
-  defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, null_frag, null_frag,
+  defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc,
                                v128h, v128h, 1, 0, 1>;
-  defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, null_frag, null_frag,
+  defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc,
                                v128f, v128f, 2, 0, 1>;
-  defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, null_frag, null_frag,
+  defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc,
                                 v128b, v128b, 0, 2, 3>;
-  defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, null_frag, null_frag,
+  defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc,
                                 v128h, v128h, 1, 2, 3>;
-  defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, null_frag, null_frag,
+  defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc,
                                 v128f, v128f, 2, 2, 3>;
 
-  defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, null_frag, null_frag,
+  defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc,
                                 v128b, v128b, 0, 0, 1>;
-  defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, null_frag, null_frag,
+  defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc,
                                 v128h, v128h, 1, 0, 1>;
-  defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, null_frag, null_frag,
+  defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc,
                                 v128f, v128f, 2, 0, 1>;
-  defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, null_frag, null_frag,
-                                 v128b, v128b, 0, 2, 3>;
-  defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, null_frag, null_frag,
-                                 v128h, v128h, 1, 2, 3>;
-  defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, null_frag, null_frag,
-                                 v128f, v128f, 2, 2, 3>;
+  defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb,
+                                 z_vfenez_cc, v128b, v128b, 0, 2, 3>;
+  defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh,
+                                 z_vfenez_cc, v128h, v128h, 1, 2, 3>;
+  defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf,
+                                 z_vfenez_cc, v128f, v128f, 2, 2, 3>;
 
-  defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, null_frag, null_frag,
+  defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc,
                                v128b, v128b, 0>;
-  defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, null_frag, null_frag,
+  defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc,
                                v128h, v128h, 1>;
-  defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, null_frag, null_frag,
+  defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc,
                                v128f, v128f, 2>;
 
-  defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, null_frag, null_frag,
-                                    v128b, v128b, 0, 0>;
-  defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, null_frag, null_frag,
-                                    v128h, v128h, 1, 0>;
-  defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, null_frag, null_frag,
-                                    v128f, v128f, 2, 0>;
-  defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, null_frag, null_frag,
-                                     v128b, v128b, 0, 2>;
-  defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, null_frag, null_frag,
-                                     v128h, v128h, 1, 2>;
-  defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, null_frag, null_frag,
-                                     v128f, v128f, 2, 2>;
+  defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb,
+                                    z_vstrc_cc, v128b, v128b, 0, 0>;
+  defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch,
+                                    z_vstrc_cc, v128h, v128h, 1, 0>;
+  defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf,
+                                    z_vstrc_cc, v128f, v128f, 2, 0>;
+  defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb,
+                                     z_vstrcz_cc, v128b, v128b, 0, 2>;
+  defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh,
+                                     z_vstrcz_cc, v128h, v128h, 1, 2>;
+  defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf,
+                                     z_vstrcz_cc, v128f, v128f, 2, 2>;
 }

Modified: llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZOperators.td?rev=236527&r1=236526&r2=236527&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZOperators.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZOperators.td Tue May  5 14:31:09 2015
@@ -94,6 +94,9 @@ def SDT_ZReplicate          : SDTypeProf
 def SDT_ZVecUnaryConv       : SDTypeProfile<1, 1,
                                             [SDTCisVec<0>,
                                              SDTCisVec<1>]>;
+def SDT_ZVecUnary           : SDTypeProfile<1, 1,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>]>;
 def SDT_ZVecBinary          : SDTypeProfile<1, 2,
                                             [SDTCisVec<0>,
                                              SDTCisSameAs<0, 1>,
@@ -106,6 +109,10 @@ def SDT_ZVecBinaryConv      : SDTypeProf
                                             [SDTCisVec<0>,
                                              SDTCisVec<1>,
                                              SDTCisSameAs<1, 2>]>;
+def SDT_ZVecBinaryConvInt   : SDTypeProfile<1, 2,
+                                            [SDTCisVec<0>,
+                                             SDTCisVec<1>,
+                                             SDTCisVT<2, i32>]>;
 def SDT_ZRotateMask         : SDTypeProfile<1, 2,
                                             [SDTCisVec<0>,
                                              SDTCisVT<1, i32>,
@@ -124,6 +131,12 @@ def SDT_ZVecTernaryInt      : SDTypeProf
                                              SDTCisSameAs<0, 1>,
                                              SDTCisSameAs<0, 2>,
                                              SDTCisVT<3, i32>]>;
+def SDT_ZVecQuaternaryInt   : SDTypeProfile<1, 4,
+                                            [SDTCisVec<0>,
+                                             SDTCisSameAs<0, 1>,
+                                             SDTCisSameAs<0, 2>,
+                                             SDTCisSameAs<0, 3>,
+                                             SDTCisVT<4, i32>]>;
 
 //===----------------------------------------------------------------------===//
 // Node definitions
@@ -193,6 +206,10 @@ def z_permute_dwords    : SDNode<"System
                                  SDT_ZVecTernaryInt>;
 def z_permute           : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
 def z_pack              : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
+def z_packs_cc          : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
+def z_packls_cc         : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
 def z_unpack_high       : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>;
 def z_unpackl_high      : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>;
 def z_unpack_low        : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>;
@@ -207,11 +224,44 @@ def z_vsum              : SDNode<"System
 def z_vicmpe            : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
 def z_vicmph            : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
 def z_vicmphl           : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
+def z_vicmpes           : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vicmphs           : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vicmphls          : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
 def z_vfcmpe            : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
 def z_vfcmph            : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
 def z_vfcmphe           : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
+def z_vfcmpes           : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
+def z_vfcmphs           : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
+def z_vfcmphes          : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv,
+                                 [SDNPOutGlue]>;
 def z_vextend           : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
 def z_vround            : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
+def z_vtm               : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>;
+def z_vfae_cc           : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt,
+                                 [SDNPOutGlue]>;
+def z_vfaez_cc          : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt,
+                                 [SDNPOutGlue]>;
+def z_vfee_cc           : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vfeez_cc          : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vfene_cc          : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vfenez_cc         : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary,
+                                 [SDNPOutGlue]>;
+def z_vistr_cc          : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary,
+                                 [SDNPOutGlue]>;
+def z_vstrc_cc          : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt,
+                                 [SDNPOutGlue]>;
+def z_vstrcz_cc         : SDNode<"SystemZISD::VSTRCZ_CC",
+                                 SDT_ZVecQuaternaryInt, [SDNPOutGlue]>;
+def z_vftci             : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt,
+                                 [SDNPOutGlue]>;
 
 class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
   : SDNode<"SystemZISD::"##name, profile,

Added: llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics.ll?rev=236527&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics.ll Tue May  5 14:31:09 2015
@@ -0,0 +1,3335 @@
+; Test vector intrinsics.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare i32 @llvm.s390.lcbb(i8 *, i32)
+declare <16 x i8> @llvm.s390.vlbb(i8 *, i32)
+declare <16 x i8> @llvm.s390.vll(i32, i8 *)
+declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
+declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
+declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
+declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
+declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
+declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
+declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
+declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
+declare void @llvm.s390.vstl(<16 x i8>, i32, i8 *)
+declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
+declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
+declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
+declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>)
+declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>)
+declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>)
+declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
+declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
+declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
+declare <8 x i16> @llvm.s390.vupllb(<16 x i8>)
+declare <4 x i32> @llvm.s390.vupllh(<8 x i16>)
+declare <2 x i64> @llvm.s390.vupllf(<4 x i32>)
+declare <16 x i8> @llvm.s390.vaccb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vacch(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vaccf(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vaccg(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vaq(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vacq(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vaccq(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vacccq(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vavgb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vavgh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vavgf(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vavgg(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vavglb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vavglh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vavglf(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vavglg(<2 x i64>, <2 x i64>)
+declare <4 x i32> @llvm.s390.vcksm(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vgfmb(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vgfmh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vgfmf(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vgfmg(<2 x i64>, <2 x i64>)
+declare <8 x i16> @llvm.s390.vgfmab(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vgfmah(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vgfmaf(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vgfmag(<2 x i64>, <2 x i64>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vmahb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vmahh(<8 x i16>, <8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmahf(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vmalhb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vmalhh(<8 x i16>, <8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmalhf(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmaeb(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmaeh(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vmaef(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <8 x i16> @llvm.s390.vmaleb(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmaleh(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vmalef(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <8 x i16> @llvm.s390.vmaob(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmaoh(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vmaof(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <8 x i16> @llvm.s390.vmalob(<16 x i8>, <16 x i8>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmaloh(<8 x i16>, <8 x i16>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vmalof(<4 x i32>, <4 x i32>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vmhb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vmhh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmhf(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vmlhb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vmlhh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vmlhf(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmeb(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vmeh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vmef(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmleb(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vmleh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vmlef(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmob(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vmoh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vmof(<4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.s390.vmlob(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vmloh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vmlof(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.verllvb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.verllvh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.verllvf(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.verllvg(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.verllb(<16 x i8>, i32)
+declare <8 x i16> @llvm.s390.verllh(<8 x i16>, i32)
+declare <4 x i32> @llvm.s390.verllf(<4 x i32>, i32)
+declare <2 x i64> @llvm.s390.verllg(<2 x i64>, i32)
+declare <16 x i8> @llvm.s390.verimb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.verimh(<8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.verimf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare <2 x i64> @llvm.s390.verimg(<2 x i64>, <2 x i64>, <2 x i64>, i32)
+declare <16 x i8> @llvm.s390.vsl(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vslb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsra(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsrab(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsrl(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsrlb(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
+declare <16 x i8> @llvm.s390.vscbib(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vscbih(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vscbif(<4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.s390.vscbig(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vsq(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsbiq(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vscbiq(<16 x i8>, <16 x i8>)
+declare <16 x i8> @llvm.s390.vsbcbiq(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vsumb(<16 x i8>, <16 x i8>)
+declare <4 x i32> @llvm.s390.vsumh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vsumgh(<8 x i16>, <8 x i16>)
+declare <2 x i64> @llvm.s390.vsumgf(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vsumqf(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vsumqg(<2 x i64>, <2 x i64>)
+declare i32 @llvm.s390.vtm(<16 x i8>, <16 x i8>)
+declare {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32>, <4 x i32>)
+declare {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64>, <2 x i64>)
+declare {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32>, <4 x i32>)
+declare {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64>, <2 x i64>)
+declare {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32>, <4 x i32>)
+declare {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64>, <2 x i64>)
+declare <16 x i8> @llvm.s390.vfaeb(<16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.vfaeh(<8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.vfaef(<4 x i32>, <4 x i32>, i32)
+declare {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8>, <16 x i8>, i32)
+declare {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16>, <8 x i16>, i32)
+declare {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32>, <4 x i32>, i32)
+declare <16 x i8> @llvm.s390.vfaezb(<16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.vfaezh(<8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.vfaezf(<4 x i32>, <4 x i32>, i32)
+declare {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8>, <16 x i8>, i32)
+declare {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16>, <8 x i16>, i32)
+declare {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32>, <4 x i32>, i32)
+declare <16 x i8> @llvm.s390.vfeeb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vfeeh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vfeef(<4 x i32>, <4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vfeezb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vfeezh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vfeezf(<4 x i32>, <4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vfeneb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vfeneh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vfenef(<4 x i32>, <4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vfenezb(<16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.s390.vfenezh(<8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.s390.vfenezf(<4 x i32>, <4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8>, <16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16>, <8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32>, <4 x i32>)
+declare <16 x i8> @llvm.s390.vistrb(<16 x i8>)
+declare <8 x i16> @llvm.s390.vistrh(<8 x i16>)
+declare <4 x i32> @llvm.s390.vistrf(<4 x i32>)
+declare {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8>)
+declare {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16>)
+declare {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32>)
+declare <16 x i8> @llvm.s390.vstrcb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.vstrch(<8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.vstrcf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8>, <16 x i8>, <16 x i8>,
+                                            i32)
+declare {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16>, <8 x i16>, <8 x i16>,
+                                            i32)
+declare {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32>, <4 x i32>, <4 x i32>,
+                                            i32)
+declare <16 x i8> @llvm.s390.vstrczb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
+declare <8 x i16> @llvm.s390.vstrczh(<8 x i16>, <8 x i16>, <8 x i16>, i32)
+declare <4 x i32> @llvm.s390.vstrczf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
+declare {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8>, <16 x i8>, <16 x i8>,
+                                             i32)
+declare {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16>, <8 x i16>, <8 x i16>,
+                                             i32)
+declare {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32>, <4 x i32>, <4 x i32>,
+                                             i32)
+declare {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double>, <2 x double>)
+declare {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double>, <2 x double>)
+declare {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double>, <2 x double>)
+declare {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double>, i32)
+declare <2 x double> @llvm.s390.vfidb(<2 x double>, i32, i32)
+
+; LCBB with the lowest M3 operand.
+define i32 @test_lcbb1(i8 *%ptr) {
+; CHECK-LABEL: test_lcbb1:
+; CHECK: lcbb %r2, 0(%r2), 0
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 0)
+  ret i32 %res
+}
+
+; LCBB with the highest M3 operand.
+define i32 @test_lcbb2(i8 *%ptr) {
+; CHECK-LABEL: test_lcbb2:
+; CHECK: lcbb %r2, 0(%r2), 15
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 15)
+  ret i32 %res
+}
+
+; LCBB with a displacement and index.
+define i32 @test_lcbb3(i8 *%base, i64 %index) {
+; CHECK-LABEL: test_lcbb3:
+; CHECK: lcbb %r2, 4095({{%r2,%r3|%r3,%r2}}), 4
+; CHECK: br %r14
+  %add = add i64 %index, 4095
+  %ptr = getelementptr i8, i8 *%base, i64 %add
+  %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 4)
+  ret i32 %res
+}
+
+; LCBB with an out-of-range displacement.
+define i32 @test_lcbb4(i8 *%base) {
+; CHECK-LABEL: test_lcbb4:
+; CHECK: lcbb %r2, 0({{%r[1-5]}}), 5
+; CHECK: br %r14
+  %ptr = getelementptr i8, i8 *%base, i64 4096
+  %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 5)
+  ret i32 %res
+}
+
+; VLBB with the lowest M3 operand.
+define <16 x i8> @test_vlbb1(i8 *%ptr) {
+; CHECK-LABEL: test_vlbb1:
+; CHECK: vlbb %v24, 0(%r2), 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 0)
+  ret <16 x i8> %res
+}
+
+; VLBB with the highest M3 operand.
+define <16 x i8> @test_vlbb2(i8 *%ptr) {
+; CHECK-LABEL: test_vlbb2:
+; CHECK: vlbb %v24, 0(%r2), 15
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 15)
+  ret <16 x i8> %res
+}
+
+; VLBB with a displacement and index.
+define <16 x i8> @test_vlbb3(i8 *%base, i64 %index) {
+; CHECK-LABEL: test_vlbb3:
+; CHECK: vlbb %v24, 4095({{%r2,%r3|%r3,%r2}}), 4
+; CHECK: br %r14
+  %add = add i64 %index, 4095
+  %ptr = getelementptr i8, i8 *%base, i64 %add
+  %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 4)
+  ret <16 x i8> %res
+}
+
+; VLBB with an out-of-range displacement.
+define <16 x i8> @test_vlbb4(i8 *%base) {
+; CHECK-LABEL: test_vlbb4:
+; CHECK: vlbb %v24, 0({{%r[1-5]}}), 5
+; CHECK: br %r14
+  %ptr = getelementptr i8, i8 *%base, i64 4096
+  %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 5)
+  ret <16 x i8> %res
+}
+
+; VLL with the lowest in-range displacement.
+define <16 x i8> @test_vll1(i8 *%ptr, i32 %length) {
+; CHECK-LABEL: test_vll1:
+; CHECK: vll %v24, %r3, 0(%r2)
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
+  ret <16 x i8> %res
+}
+
+; VLL with the highest in-range displacement.
+define <16 x i8> @test_vll2(i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vll2:
+; CHECK: vll %v24, %r3, 4095(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8, i8 *%base, i64 4095
+  %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
+  ret <16 x i8> %res
+}
+
+; VLL with an out-of-range displacementa.
+define <16 x i8> @test_vll3(i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vll3:
+; CHECK: vll %v24, %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %ptr = getelementptr i8, i8 *%base, i64 4096
+  %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
+  ret <16 x i8> %res
+}
+
+; Check that VLL doesn't allow an index.
+define <16 x i8> @test_vll4(i8 *%base, i64 %index, i32 %length) {
+; CHECK-LABEL: test_vll4:
+; CHECK: vll %v24, %r4, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %ptr = getelementptr i8, i8 *%base, i64 %index
+  %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
+  ret <16 x i8> %res
+}
+
+; VPDI taking element 0 from each half.
+define <2 x i64> @test_vpdi1(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpdi1:
+; CHECK: vpdi %v24, %v24, %v26, 0
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 0)
+  ret <2 x i64> %res
+}
+
+; VPDI taking element 1 from each half.
+define <2 x i64> @test_vpdi2(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpdi2:
+; CHECK: vpdi %v24, %v24, %v26, 10
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 10)
+  ret <2 x i64> %res
+}
+
+; VPERM.
+define <16 x i8> @test_vperm(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vperm:
+; CHECK: vperm %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vperm(<16 x i8> %a, <16 x i8> %b,
+                                         <16 x i8> %c)
+  ret <16 x i8> %res
+}
+
+; VPKSH.
+define <16 x i8> @test_vpksh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vpksh:
+; CHECK: vpksh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vpksh(<8 x i16> %a, <8 x i16> %b)
+  ret <16 x i8> %res
+}
+
+; VPKSF.
+define <8 x i16> @test_vpksf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vpksf:
+; CHECK: vpksf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vpksf(<4 x i32> %a, <4 x i32> %b)
+  ret <8 x i16> %res
+}
+
+; VPKSG.
+define <4 x i32> @test_vpksg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpksg:
+; CHECK: vpksg %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vpksg(<2 x i64> %a, <2 x i64> %b)
+  ret <4 x i32> %res
+}
+
+; VPKSHS with no processing of the result.
+define <16 x i8> @test_vpkshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpkshs:
+; CHECK: vpkshs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VPKSHS, storing to %ptr if all values were saturated.
+define <16 x i8> @test_vpkshs_all_store(<8 x i16> %a, <8 x i16> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vpkshs_all_store:
+; CHECK: vpkshs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  %cmp = icmp uge i32 %cc, 3
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <16 x i8> %res
+}
+
+; VPKSFS with no processing of the result.
+define <8 x i16> @test_vpksfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpksfs:
+; CHECK: vpksfs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VPKSFS, storing to %ptr if any values were saturated.
+define <8 x i16> @test_vpksfs_any_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vpksfs_any_store:
+; CHECK: vpksfs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  %cmp = icmp ugt i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <8 x i16> %res
+}
+
+; VPKSGS with no processing of the result.
+define <4 x i32> @test_vpksgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpksgs:
+; CHECK: vpksgs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VPKSGS, storing to %ptr if no elements were saturated
+define <4 x i32> @test_vpksgs_none_store(<2 x i64> %a, <2 x i64> %b,
+                                         i32 *%ptr) {
+; CHECK-LABEL: test_vpksgs_none_store:
+; CHECK: vpksgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  %cmp = icmp sle i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <4 x i32> %res
+}
+
+; VPKLSH.
+define <16 x i8> @test_vpklsh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vpklsh:
+; CHECK: vpklsh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %a, <8 x i16> %b)
+  ret <16 x i8> %res
+}
+
+; VPKLSF.
+define <8 x i16> @test_vpklsf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vpklsf:
+; CHECK: vpklsf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %a, <4 x i32> %b)
+  ret <8 x i16> %res
+}
+
+; VPKLSG.
+define <4 x i32> @test_vpklsg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vpklsg:
+; CHECK: vpklsg %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %a, <2 x i64> %b)
+  ret <4 x i32> %res
+}
+
+; VPKLSHS with no processing of the result.
+define <16 x i8> @test_vpklshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpklshs:
+; CHECK: vpklshs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VPKLSHS, storing to %ptr if all values were saturated.
+define <16 x i8> @test_vpklshs_all_store(<8 x i16> %a, <8 x i16> %b,
+                                         i32 *%ptr) {
+; CHECK-LABEL: test_vpklshs_all_store:
+; CHECK: vpklshs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  %cmp = icmp eq i32 %cc, 3
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <16 x i8> %res
+}
+
+; VPKLSFS with no processing of the result.
+define <8 x i16> @test_vpklsfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpklsfs:
+; CHECK: vpklsfs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VPKLSFS, storing to %ptr if any values were saturated.
+define <8 x i16> @test_vpklsfs_any_store(<4 x i32> %a, <4 x i32> %b,
+                                         i32 *%ptr) {
+; CHECK-LABEL: test_vpklsfs_any_store:
+; CHECK: vpklsfs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  %cmp = icmp ne i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <8 x i16> %res
+}
+
+; VPKLSGS with no processing of the result.
+define <4 x i32> @test_vpklsgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vpklsgs:
+; CHECK: vpklsgs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VPKLSGS, storing to %ptr if no elements were saturated
+define <4 x i32> @test_vpklsgs_none_store(<2 x i64> %a, <2 x i64> %b,
+                                          i32 *%ptr) {
+; CHECK-LABEL: test_vpklsgs_none_store:
+; CHECK: vpklsgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  %cmp = icmp eq i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <4 x i32> %res
+}
+
+; VSTL with the lowest in-range displacement.
+define void @test_vstl1(<16 x i8> %vec, i8 *%ptr, i32 %length) {
+; CHECK-LABEL: test_vstl1:
+; CHECK: vstl %v24, %r3, 0(%r2)
+; CHECK: br %r14
+  call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+  ret void
+}
+
+; VSTL with the highest in-range displacement.
+define void @test_vstl2(<16 x i8> %vec, i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vstl2:
+; CHECK: vstl %v24, %r3, 4095(%r2)
+; CHECK: br %r14
+  %ptr = getelementptr i8, i8 *%base, i64 4095
+  call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+  ret void
+}
+
+; VSTL with an out-of-range displacement.
+define void @test_vstl3(<16 x i8> %vec, i8 *%base, i32 %length) {
+; CHECK-LABEL: test_vstl3:
+; CHECK: vstl %v24, %r3, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %ptr = getelementptr i8, i8 *%base, i64 4096
+  call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+  ret void
+}
+
+; Check that VSTL doesn't allow an index.
+define void @test_vstl4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) {
+; CHECK-LABEL: test_vstl4:
+; CHECK: vstl %v24, %r4, 0({{%r[1-5]}})
+; CHECK: br %r14
+  %ptr = getelementptr i8, i8 *%base, i64 %index
+  call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
+  ret void
+}
+
+; VUPHB.
+define <8 x i16> @test_vuphb(<16 x i8> %a) {
+; CHECK-LABEL: test_vuphb:
+; CHECK: vuphb %v24, %v24
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vuphb(<16 x i8> %a)
+  ret <8 x i16> %res
+}
+
+; VUPHH.
+define <4 x i32> @test_vuphh(<8 x i16> %a) {
+; CHECK-LABEL: test_vuphh:
+; CHECK: vuphh %v24, %v24
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vuphh(<8 x i16> %a)
+  ret <4 x i32> %res
+}
+
+; VUPHF.
+define <2 x i64> @test_vuphf(<4 x i32> %a) {
+; CHECK-LABEL: test_vuphf:
+; CHECK: vuphf %v24, %v24
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vuphf(<4 x i32> %a)
+  ret <2 x i64> %res
+}
+
+; VUPLHB.
+define <8 x i16> @test_vuplhb(<16 x i8> %a) {
+; CHECK-LABEL: test_vuplhb:
+; CHECK: vuplhb %v24, %v24
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> %a)
+  ret <8 x i16> %res
+}
+
+; VUPLHH.
+define <4 x i32> @test_vuplhh(<8 x i16> %a) {
+; CHECK-LABEL: test_vuplhh:
+; CHECK: vuplhh %v24, %v24
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> %a)
+  ret <4 x i32> %res
+}
+
+; VUPLHF.
+define <2 x i64> @test_vuplhf(<4 x i32> %a) {
+; CHECK-LABEL: test_vuplhf:
+; CHECK: vuplhf %v24, %v24
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> %a)
+  ret <2 x i64> %res
+}
+
+; VUPLB.
+define <8 x i16> @test_vuplb(<16 x i8> %a) {
+; CHECK-LABEL: test_vuplb:
+; CHECK: vuplb %v24, %v24
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vuplb(<16 x i8> %a)
+  ret <8 x i16> %res
+}
+
+; VUPLHW.
+define <4 x i32> @test_vuplhw(<8 x i16> %a) {
+; CHECK-LABEL: test_vuplhw:
+; CHECK: vuplhw %v24, %v24
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> %a)
+  ret <4 x i32> %res
+}
+
+; VUPLF.
+define <2 x i64> @test_vuplf(<4 x i32> %a) {
+; CHECK-LABEL: test_vuplf:
+; CHECK: vuplf %v24, %v24
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vuplf(<4 x i32> %a)
+  ret <2 x i64> %res
+}
+
+; VUPLLB.
+define <8 x i16> @test_vupllb(<16 x i8> %a) {
+; CHECK-LABEL: test_vupllb:
+; CHECK: vupllb %v24, %v24
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vupllb(<16 x i8> %a)
+  ret <8 x i16> %res
+}
+
+; VUPLLH.
+define <4 x i32> @test_vupllh(<8 x i16> %a) {
+; CHECK-LABEL: test_vupllh:
+; CHECK: vupllh %v24, %v24
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vupllh(<8 x i16> %a)
+  ret <4 x i32> %res
+}
+
+; VUPLLF.
+define <2 x i64> @test_vupllf(<4 x i32> %a) {
+; CHECK-LABEL: test_vupllf:
+; CHECK: vupllf %v24, %v24
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vupllf(<4 x i32> %a)
+  ret <2 x i64> %res
+}
+
+; VACCB.
+define <16 x i8> @test_vaccb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaccb:
+; CHECK: vaccb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vaccb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VACCH.
+define <8 x i16> @test_vacch(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vacch:
+; CHECK: vacch %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vacch(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VACCF.
+define <4 x i32> @test_vaccf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vaccf:
+; CHECK: vaccf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vaccf(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VACCG.
+define <2 x i64> @test_vaccg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vaccg:
+; CHECK: vaccg %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vaccg(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %res
+}
+
+; VAQ.
+define <16 x i8> @test_vaq(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaq:
+; CHECK: vaq %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vaq(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VACQ.
+define <16 x i8> @test_vacq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vacq:
+; CHECK: vacq %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vacq(<16 x i8> %a, <16 x i8> %b,
+                                        <16 x i8> %c)
+  ret <16 x i8> %res
+}
+
+; VACCQ.
+define <16 x i8> @test_vaccq(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vaccq:
+; CHECK: vaccq %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vaccq(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VACCCQ.
+define <16 x i8> @test_vacccq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vacccq:
+; CHECK: vacccq %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vacccq(<16 x i8> %a, <16 x i8> %b,
+                                          <16 x i8> %c)
+  ret <16 x i8> %res
+}
+
+; VAVGB.
+define <16 x i8> @test_vavgb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vavgb:
+; CHECK: vavgb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vavgb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VAVGH.
+define <8 x i16> @test_vavgh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vavgh:
+; CHECK: vavgh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vavgh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VAVGF.
+define <4 x i32> @test_vavgf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vavgf:
+; CHECK: vavgf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vavgf(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VAVGG.
+define <2 x i64> @test_vavgg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vavgg:
+; CHECK: vavgg %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vavgg(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %res
+}
+
+; VAVGLB.
+define <16 x i8> @test_vavglb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vavglb:
+; CHECK: vavglb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vavglb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VAVGLH.
+define <8 x i16> @test_vavglh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vavglh:
+; CHECK: vavglh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vavglh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VAVGLF.
+define <4 x i32> @test_vavglf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vavglf:
+; CHECK: vavglf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vavglf(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VAVGLG.
+define <2 x i64> @test_vavglg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vavglg:
+; CHECK: vavglg %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vavglg(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %res
+}
+
+; VCKSM.
+define <4 x i32> @test_vcksm(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vcksm:
+; CHECK: vcksm %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vcksm(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VGFMB.
+define <8 x i16> @test_vgfmb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vgfmb:
+; CHECK: vgfmb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %a, <16 x i8> %b)
+  ret <8 x i16> %res
+}
+
+; VGFMH.
+define <4 x i32> @test_vgfmh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vgfmh:
+; CHECK: vgfmh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vgfmh(<8 x i16> %a, <8 x i16> %b)
+  ret <4 x i32> %res
+}
+
+; VGFMF.
+define <2 x i64> @test_vgfmf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vgfmf:
+; CHECK: vgfmf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %a, <4 x i32> %b)
+  ret <2 x i64> %res
+}
+
+; VGFMG.
+define <16 x i8> @test_vgfmg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vgfmg:
+; CHECK: vgfmg %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vgfmg(<2 x i64> %a, <2 x i64> %b)
+  ret <16 x i8> %res
+}
+
+; VGFMAB.
+define <8 x i16> @test_vgfmab(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vgfmab:
+; CHECK: vgfmab %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vgfmab(<16 x i8> %a, <16 x i8> %b,
+                                          <8 x i16> %c)
+  ret <8 x i16> %res
+}
+
+; VGFMAH.
+define <4 x i32> @test_vgfmah(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vgfmah:
+; CHECK: vgfmah %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vgfmah(<8 x i16> %a, <8 x i16> %b,
+                                          <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+; VGFMAF.
+define <2 x i64> @test_vgfmaf(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vgfmaf:
+; CHECK: vgfmaf %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %a, <4 x i32> %b,
+                                          <2 x i64> %c)
+  ret <2 x i64> %res
+}
+
+; VGFMAG.
+define <16 x i8> @test_vgfmag(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vgfmag:
+; CHECK: vgfmag %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vgfmag(<2 x i64> %a, <2 x i64> %b,
+                                          <16 x i8> %c)
+  ret <16 x i8> %res
+}
+
+; VMAHB.
+define <16 x i8> @test_vmahb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmahb:
+; CHECK: vmahb %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vmahb(<16 x i8> %a, <16 x i8> %b,
+                                         <16 x i8> %c)
+  ret <16 x i8> %res
+}
+
+; VMAHH.
+define <8 x i16> @test_vmahh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmahh:
+; CHECK: vmahh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmahh(<8 x i16> %a, <8 x i16> %b,
+                                         <8 x i16> %c)
+  ret <8 x i16> %res
+}
+
+; VMAHF.
+define <4 x i32> @test_vmahf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmahf:
+; CHECK: vmahf %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmahf(<4 x i32> %a, <4 x i32> %b,
+                                         <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+; VMALHB.
+define <16 x i8> @test_vmalhb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vmalhb:
+; CHECK: vmalhb %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vmalhb(<16 x i8> %a, <16 x i8> %b,
+                                          <16 x i8> %c)
+  ret <16 x i8> %res
+}
+
+; VMALHH.
+define <8 x i16> @test_vmalhh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmalhh:
+; CHECK: vmalhh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmalhh(<8 x i16> %a, <8 x i16> %b,
+                                          <8 x i16> %c)
+  ret <8 x i16> %res
+}
+
+; VMALHF.
+define <4 x i32> @test_vmalhf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmalhf:
+; CHECK: vmalhf %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmalhf(<4 x i32> %a, <4 x i32> %b,
+                                          <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+; VMAEB.
+define <8 x i16> @test_vmaeb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmaeb:
+; CHECK: vmaeb %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmaeb(<16 x i8> %a, <16 x i8> %b,
+                                         <8 x i16> %c)
+  ret <8 x i16> %res
+}
+
+; VMAEH.
+define <4 x i32> @test_vmaeh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmaeh:
+; CHECK: vmaeh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmaeh(<8 x i16> %a, <8 x i16> %b,
+                                         <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+; VMAEF.
+define <2 x i64> @test_vmaef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmaef:
+; CHECK: vmaef %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vmaef(<4 x i32> %a, <4 x i32> %b,
+                                         <2 x i64> %c)
+  ret <2 x i64> %res
+}
+
+; VMALEB.
+define <8 x i16> @test_vmaleb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmaleb:
+; CHECK: vmaleb %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmaleb(<16 x i8> %a, <16 x i8> %b,
+                                          <8 x i16> %c)
+  ret <8 x i16> %res
+}
+
+; VMALEH.
+define <4 x i32> @test_vmaleh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmaleh:
+; CHECK: vmaleh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmaleh(<8 x i16> %a, <8 x i16> %b,
+                                          <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+; VMALEF.
+define <2 x i64> @test_vmalef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmalef:
+; CHECK: vmalef %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vmalef(<4 x i32> %a, <4 x i32> %b,
+                                          <2 x i64> %c)
+  ret <2 x i64> %res
+}
+
+; VMAOB.
+define <8 x i16> @test_vmaob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmaob:
+; CHECK: vmaob %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmaob(<16 x i8> %a, <16 x i8> %b,
+                                         <8 x i16> %c)
+  ret <8 x i16> %res
+}
+
+; VMAOH.
+define <4 x i32> @test_vmaoh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmaoh:
+; CHECK: vmaoh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmaoh(<8 x i16> %a, <8 x i16> %b,
+                                         <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+; VMAOF.
+define <2 x i64> @test_vmaof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmaof:
+; CHECK: vmaof %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vmaof(<4 x i32> %a, <4 x i32> %b,
+                                         <2 x i64> %c)
+  ret <2 x i64> %res
+}
+
+; VMALOB.
+define <8 x i16> @test_vmalob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vmalob:
+; CHECK: vmalob %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmalob(<16 x i8> %a, <16 x i8> %b,
+                                          <8 x i16> %c)
+  ret <8 x i16> %res
+}
+
+; VMALOH.
+define <4 x i32> @test_vmaloh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vmaloh:
+; CHECK: vmaloh %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmaloh(<8 x i16> %a, <8 x i16> %b,
+                                          <4 x i32> %c)
+  ret <4 x i32> %res
+}
+
+; VMALOF.
+define <2 x i64> @test_vmalof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmalof:
+; CHECK: vmalof %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vmalof(<4 x i32> %a, <4 x i32> %b,
+                                          <2 x i64> %c)
+  ret <2 x i64> %res
+}
+
+; VMHB.
+define <16 x i8> @test_vmhb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmhb:
+; CHECK: vmhb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vmhb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VMHH.
+define <8 x i16> @test_vmhh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmhh:
+; CHECK: vmhh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmhh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VMHF.
+define <4 x i32> @test_vmhf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmhf:
+; CHECK: vmhf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmhf(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VMLHB.
+define <16 x i8> @test_vmlhb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmlhb:
+; CHECK: vmlhb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vmlhb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VMLHH.
+define <8 x i16> @test_vmlhh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmlhh:
+; CHECK: vmlhh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmlhh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VMLHF.
+define <4 x i32> @test_vmlhf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmlhf:
+; CHECK: vmlhf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmlhf(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VMEB.
+define <8 x i16> @test_vmeb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmeb:
+; CHECK: vmeb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmeb(<16 x i8> %a, <16 x i8> %b)
+  ret <8 x i16> %res
+}
+
+; VMEH.
+define <4 x i32> @test_vmeh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmeh:
+; CHECK: vmeh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmeh(<8 x i16> %a, <8 x i16> %b)
+  ret <4 x i32> %res
+}
+
+; VMEF.
+define <2 x i64> @test_vmef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmef:
+; CHECK: vmef %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vmef(<4 x i32> %a, <4 x i32> %b)
+  ret <2 x i64> %res
+}
+
+; VMLEB.
+define <8 x i16> @test_vmleb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmleb:
+; CHECK: vmleb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmleb(<16 x i8> %a, <16 x i8> %b)
+  ret <8 x i16> %res
+}
+
+; VMLEH.
+define <4 x i32> @test_vmleh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmleh:
+; CHECK: vmleh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmleh(<8 x i16> %a, <8 x i16> %b)
+  ret <4 x i32> %res
+}
+
+; VMLEF.
+define <2 x i64> @test_vmlef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmlef:
+; CHECK: vmlef %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vmlef(<4 x i32> %a, <4 x i32> %b)
+  ret <2 x i64> %res
+}
+
+; VMOB.
+define <8 x i16> @test_vmob(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmob:
+; CHECK: vmob %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmob(<16 x i8> %a, <16 x i8> %b)
+  ret <8 x i16> %res
+}
+
+; VMOH.
+define <4 x i32> @test_vmoh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmoh:
+; CHECK: vmoh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmoh(<8 x i16> %a, <8 x i16> %b)
+  ret <4 x i32> %res
+}
+
+; VMOF.
+define <2 x i64> @test_vmof(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmof:
+; CHECK: vmof %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vmof(<4 x i32> %a, <4 x i32> %b)
+  ret <2 x i64> %res
+}
+
+; VMLOB.
+define <8 x i16> @test_vmlob(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vmlob:
+; CHECK: vmlob %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vmlob(<16 x i8> %a, <16 x i8> %b)
+  ret <8 x i16> %res
+}
+
+; VMLOH.
+define <4 x i32> @test_vmloh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vmloh:
+; CHECK: vmloh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vmloh(<8 x i16> %a, <8 x i16> %b)
+  ret <4 x i32> %res
+}
+
+; VMLOF.
+define <2 x i64> @test_vmlof(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vmlof:
+; CHECK: vmlof %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vmlof(<4 x i32> %a, <4 x i32> %b)
+  ret <2 x i64> %res
+}
+
+; VERLLVB.
+define <16 x i8> @test_verllvb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_verllvb:
+; CHECK: verllvb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.verllvb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VERLLVH.
+define <8 x i16> @test_verllvh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_verllvh:
+; CHECK: verllvh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.verllvh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VERLLVF.
+define <4 x i32> @test_verllvf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_verllvf:
+; CHECK: verllvf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.verllvf(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VERLLVG.
+define <2 x i64> @test_verllvg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_verllvg:
+; CHECK: verllvg %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.verllvg(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %res
+}
+
+; VERLLB.
+define <16 x i8> @test_verllb(<16 x i8> %a, i32 %b) {
+; CHECK-LABEL: test_verllb:
+; CHECK: verllb %v24, %v24, 0(%r2)
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 %b)
+  ret <16 x i8> %res
+}
+
+; VERLLH.
+define <8 x i16> @test_verllh(<8 x i16> %a, i32 %b) {
+; CHECK-LABEL: test_verllh:
+; CHECK: verllh %v24, %v24, 0(%r2)
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.verllh(<8 x i16> %a, i32 %b)
+  ret <8 x i16> %res
+}
+
+; VERLLF.
+define <4 x i32> @test_verllf(<4 x i32> %a, i32 %b) {
+; CHECK-LABEL: test_verllf:
+; CHECK: verllf %v24, %v24, 0(%r2)
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.verllf(<4 x i32> %a, i32 %b)
+  ret <4 x i32> %res
+}
+
+; VERLLG.
+define <2 x i64> @test_verllg(<2 x i64> %a, i32 %b) {
+; CHECK-LABEL: test_verllg:
+; CHECK: verllg %v24, %v24, 0(%r2)
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.verllg(<2 x i64> %a, i32 %b)
+  ret <2 x i64> %res
+}
+
+; VERLLB with the smallest count.
+define <16 x i8> @test_verllb_1(<16 x i8> %a) {
+; CHECK-LABEL: test_verllb_1:
+; CHECK: verllb %v24, %v24, 1
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 1)
+  ret <16 x i8> %res
+}
+
+; VERLLB with the largest count.
+define <16 x i8> @test_verllb_4095(<16 x i8> %a) {
+; CHECK-LABEL: test_verllb_4095:
+; CHECK: verllb %v24, %v24, 4095
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4095)
+  ret <16 x i8> %res
+}
+
+; VERLLB with the largest count + 1.
+define <16 x i8> @test_verllb_4096(<16 x i8> %a) {
+; CHECK-LABEL: test_verllb_4096:
+; CHECK: lhi [[REG:%r[1-5]]], 4096
+; CHECK: verllb %v24, %v24, 0([[REG]])
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4096)
+  ret <16 x i8> %res
+}
+
+; VERIMB.
+define <16 x i8> @test_verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_verimb:
+; CHECK: verimb %v24, %v26, %v28, 1
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1)
+  ret <16 x i8> %res
+}
+
+; VERIMH.
+define <8 x i16> @test_verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_verimh:
+; CHECK: verimh %v24, %v26, %v28, 1
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, i32 1)
+  ret <8 x i16> %res
+}
+
+; VERIMF.
+define <4 x i32> @test_verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_verimf:
+; CHECK: verimf %v24, %v26, %v28, 1
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i32 1)
+  ret <4 x i32> %res
+}
+
+; VERIMG.
+define <2 x i64> @test_verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_verimg:
+; CHECK: verimg %v24, %v26, %v28, 1
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, i32 1)
+  ret <2 x i64> %res
+}
+
+; VERIMB with a different mask.
+define <16 x i8> @test_verimb_254(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_verimb_254:
+; CHECK: verimb %v24, %v26, %v28, 254
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 254)
+  ret <16 x i8> %res
+}
+
+; VSL.
+define <16 x i8> @test_vsl(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsl:
+; CHECK: vsl %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsl(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSLB.
+define <16 x i8> @test_vslb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vslb:
+; CHECK: vslb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vslb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSRA.
+define <16 x i8> @test_vsra(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsra:
+; CHECK: vsra %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsra(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSRAB.
+define <16 x i8> @test_vsrab(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsrab:
+; CHECK: vsrab %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsrab(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSRL.
+define <16 x i8> @test_vsrl(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsrl:
+; CHECK: vsrl %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsrl(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSRLB.
+define <16 x i8> @test_vsrlb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsrlb:
+; CHECK: vsrlb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSLDB with the minimum useful value.
+define <16 x i8> @test_vsldb_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsldb_1:
+; CHECK: vsldb %v24, %v24, %v26, 1
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 1)
+  ret <16 x i8> %res
+}
+
+; VSLDB with the maximum value.
+define <16 x i8> @test_vsldb_15(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsldb_15:
+; CHECK: vsldb %v24, %v24, %v26, 15
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 15)
+  ret <16 x i8> %res
+}
+
+; VSCBIB.
+define <16 x i8> @test_vscbib(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vscbib:
+; CHECK: vscbib %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vscbib(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSCBIH.
+define <8 x i16> @test_vscbih(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vscbih:
+; CHECK: vscbih %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vscbih(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VSCBIF.
+define <4 x i32> @test_vscbif(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vscbif:
+; CHECK: vscbif %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vscbif(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VSCBIG.
+define <2 x i64> @test_vscbig(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vscbig:
+; CHECK: vscbig %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vscbig(<2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %res
+}
+
+; VSQ.
+define <16 x i8> @test_vsq(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsq:
+; CHECK: vsq %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsq(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSBIQ.
+define <16 x i8> @test_vsbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vsbiq:
+; CHECK: vsbiq %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsbiq(<16 x i8> %a, <16 x i8> %b,
+                                         <16 x i8> %c)
+  ret <16 x i8> %res
+}
+
+; VSCBIQ.
+define <16 x i8> @test_vscbiq(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vscbiq:
+; CHECK: vscbiq %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vscbiq(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VSBCBIQ.
+define <16 x i8> @test_vsbcbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vsbcbiq:
+; CHECK: vsbcbiq %v24, %v24, %v26, %v28
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsbcbiq(<16 x i8> %a, <16 x i8> %b,
+                                           <16 x i8> %c)
+  ret <16 x i8> %res
+}
+
+; VSUMB.
+define <4 x i32> @test_vsumb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsumb:
+; CHECK: vsumb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vsumb(<16 x i8> %a, <16 x i8> %b)
+  ret <4 x i32> %res
+}
+
+; VSUMH.
+define <4 x i32> @test_vsumh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsumh:
+; CHECK: vsumh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vsumh(<8 x i16> %a, <8 x i16> %b)
+  ret <4 x i32> %res
+}
+
+; VSUMGH.
+define <2 x i64> @test_vsumgh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vsumgh:
+; CHECK: vsumgh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vsumgh(<8 x i16> %a, <8 x i16> %b)
+  ret <2 x i64> %res
+}
+
+; VSUMGF.
+define <2 x i64> @test_vsumgf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsumgf:
+; CHECK: vsumgf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %a, <4 x i32> %b)
+  ret <2 x i64> %res
+}
+
+; VSUMQF.
+define <16 x i8> @test_vsumqf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vsumqf:
+; CHECK: vsumqf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsumqf(<4 x i32> %a, <4 x i32> %b)
+  ret <16 x i8> %res
+}
+
+; VSUMQG.
+define <16 x i8> @test_vsumqg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vsumqg:
+; CHECK: vsumqg %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vsumqg(<2 x i64> %a, <2 x i64> %b)
+  ret <16 x i8> %res
+}
+
+; VTM with no processing of the result.
+define i32 @test_vtm(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vtm:
+; CHECK: vtm %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b)
+  ret i32 %res
+}
+
+; VTM, storing to %ptr if all bits are set.
+define void @test_vtm_all_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vtm_all_store:
+; CHECK-NOT: %r
+; CHECK: vtm %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b)
+  %cmp = icmp sge i32 %res, 3
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret void
+}
+
+; VCEQBS with no processing of the result.
+define i32 @test_vceqbs(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vceqbs:
+; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCEQBS, returning 1 if any elements are equal (CC != 3).
+define i32 @test_vceqbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vceqbs_any_bool:
+; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -536870912
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 1
+  %cmp = icmp ne i32 %res, 3
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCEQBS, storing to %ptr if any elements are equal.
+define <16 x i8> @test_vceqbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vceqbs_any_store:
+; CHECK-NOT: %r
+; CHECK: vceqbs %v24, %v24, %v26
+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  %cmp = icmp ule i32 %cc, 2
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <16 x i8> %res
+}
+
+; VCEQHS with no processing of the result.
+define i32 @test_vceqhs(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vceqhs:
+; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCEQHS, returning 1 if not all elements are equal.
+define i32 @test_vceqhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vceqhs_notall_bool:
+; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 1
+  %cmp = icmp sge i32 %res, 1
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCEQHS, storing to %ptr if not all elements are equal.
+define <8 x i16> @test_vceqhs_notall_store(<8 x i16> %a, <8 x i16> %b,
+                                           i32 *%ptr) {
+; CHECK-LABEL: test_vceqhs_notall_store:
+; CHECK-NOT: %r
+; CHECK: vceqhs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  %cmp = icmp ugt i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <8 x i16> %res
+}
+
+; VCEQFS with no processing of the result.
+define i32 @test_vceqfs(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vceqfs:
+; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCEQFS, returning 1 if no elements are equal.
+define i32 @test_vceqfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vceqfs_none_bool:
+; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 1
+  %cmp = icmp eq i32 %res, 3
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCEQFS, storing to %ptr if no elements are equal.
+define <4 x i32> @test_vceqfs_none_store(<4 x i32> %a, <4 x i32> %b,
+                                         i32 *%ptr) {
+; CHECK-LABEL: test_vceqfs_none_store:
+; CHECK-NOT: %r
+; CHECK: vceqfs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  %cmp = icmp uge i32 %cc, 3
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <4 x i32> %res
+}
+
+; VCEQGS with no processing of the result.
+define i32 @test_vceqgs(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vceqgs:
+; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCEQGS returning 1 if all elements are equal (CC == 0).
+define i32 @test_vceqgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vceqgs_all_bool:
+; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -268435456
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp ult i32 %res, 1
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCEQGS, storing to %ptr if all elements are equal.
+define <2 x i64> @test_vceqgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vceqgs_all_store:
+; CHECK-NOT: %r
+; CHECK: vceqgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 0
+  %cc = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp sle i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <2 x i64> %res
+}
+
+; VCHBS with no processing of the result.
+define i32 @test_vchbs(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vchbs:
+; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCHBS, returning 1 if any elements are higher (CC != 3).
+define i32 @test_vchbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vchbs_any_bool:
+; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -536870912
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 1
+  %cmp = icmp ne i32 %res, 3
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCHBS, storing to %ptr if any elements are higher.
+define <16 x i8> @test_vchbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchbs_any_store:
+; CHECK-NOT: %r
+; CHECK: vchbs %v24, %v24, %v26
+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  %cmp = icmp ule i32 %cc, 2
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <16 x i8> %res
+}
+
+; VCHHS with no processing of the result.
+define i32 @test_vchhs(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vchhs:
+; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCHHS, returning 1 if not all elements are higher.
+define i32 @test_vchhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vchhs_notall_bool:
+; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 1
+  %cmp = icmp sge i32 %res, 1
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCHHS, storing to %ptr if not all elements are higher.
+define <8 x i16> @test_vchhs_notall_store(<8 x i16> %a, <8 x i16> %b,
+                                          i32 *%ptr) {
+; CHECK-LABEL: test_vchhs_notall_store:
+; CHECK-NOT: %r
+; CHECK: vchhs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  %cmp = icmp ugt i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <8 x i16> %res
+}
+
+; VCHFS with no processing of the result.
+define i32 @test_vchfs(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vchfs:
+; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCHFS, returning 1 if no elements are higher.
+define i32 @test_vchfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vchfs_none_bool:
+; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 1
+  %cmp = icmp eq i32 %res, 3
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCHFS, storing to %ptr if no elements are higher.
+define <4 x i32> @test_vchfs_none_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchfs_none_store:
+; CHECK-NOT: %r
+; CHECK: vchfs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  %cmp = icmp uge i32 %cc, 3
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <4 x i32> %res
+}
+
+; VCHGS with no processing of the result.
+define i32 @test_vchgs(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vchgs:
+; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCHGS returning 1 if all elements are higher (CC == 0).
+define i32 @test_vchgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vchgs_all_bool:
+; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -268435456
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp ult i32 %res, 1
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCHGS, storing to %ptr if all elements are higher.
+define <2 x i64> @test_vchgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchgs_all_store:
+; CHECK-NOT: %r
+; CHECK: vchgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 0
+  %cc = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp sle i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <2 x i64> %res
+}
+
+; VCHLBS with no processing of the result.
+define i32 @test_vchlbs(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vchlbs:
+; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCHLBS, returning 1 if any elements are higher (CC != 3).
+define i32 @test_vchlbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vchlbs_any_bool:
+; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -536870912
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 1
+  %cmp = icmp ne i32 %res, 3
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCHLBS, storing to %ptr if any elements are higher.
+define <16 x i8> @test_vchlbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchlbs_any_store:
+; CHECK-NOT: %r
+; CHECK: vchlbs %v24, %v24, %v26
+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  %cmp = icmp sle i32 %cc, 2
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <16 x i8> %res
+}
+
+; VCHLHS with no processing of the result.
+define i32 @test_vchlhs(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vchlhs:
+; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCHLHS, returning 1 if not all elements are higher.
+define i32 @test_vchlhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vchlhs_notall_bool:
+; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 1
+  %cmp = icmp uge i32 %res, 1
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCHLHS, storing to %ptr if not all elements are higher.
+define <8 x i16> @test_vchlhs_notall_store(<8 x i16> %a, <8 x i16> %b,
+                                           i32 *%ptr) {
+; CHECK-LABEL: test_vchlhs_notall_store:
+; CHECK-NOT: %r
+; CHECK: vchlhs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  %cmp = icmp sgt i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <8 x i16> %res
+}
+
+; VCHLFS with no processing of the result.
+define i32 @test_vchlfs(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vchlfs:
+; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCHLFS, returning 1 if no elements are higher.
+define i32 @test_vchlfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vchlfs_none_bool:
+; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 1
+  %cmp = icmp eq i32 %res, 3
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCHLFS, storing to %ptr if no elements are higher.
+define <4 x i32> @test_vchlfs_none_store(<4 x i32> %a, <4 x i32> %b,
+                                         i32 *%ptr) {
+; CHECK-LABEL: test_vchlfs_none_store:
+; CHECK-NOT: %r
+; CHECK: vchlfs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  %cmp = icmp sge i32 %cc, 3
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <4 x i32> %res
+}
+
+; VCHLGS with no processing of the result.
+define i32 @test_vchlgs(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vchlgs:
+; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  ret i32 %res
+}
+
+; VCHLGS returning 1 if all elements are higher (CC == 0).
+define i32 @test_vchlgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vchlgs_all_bool:
+; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -268435456
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp slt i32 %res, 1
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VCHLGS, storing to %ptr if all elements are higher.
+define <2 x i64> @test_vchlgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
+; CHECK-LABEL: test_vchlgs_all_store:
+; CHECK-NOT: %r
+; CHECK: vchlgs %v24, %v24, %v26
+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 0
+  %cc = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp ule i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <2 x i64> %res
+}
+
+; VFAEB with !IN !RT.
+define <16 x i8> @test_vfaeb_0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_0:
+; CHECK: vfaeb %v24, %v24, %v26, 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 0)
+  ret <16 x i8> %res
+}
+
+; VFAEB with !IN RT.
+define <16 x i8> @test_vfaeb_4(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_4:
+; CHECK: vfaeb %v24, %v24, %v26, 4
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 4)
+  ret <16 x i8> %res
+}
+
+; VFAEB with IN !RT.
+define <16 x i8> @test_vfaeb_8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_8:
+; CHECK: vfaeb %v24, %v24, %v26, 8
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 8)
+  ret <16 x i8> %res
+}
+
+; VFAEB with IN RT.
+define <16 x i8> @test_vfaeb_12(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_12:
+; CHECK: vfaeb %v24, %v24, %v26, 12
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 12)
+  ret <16 x i8> %res
+}
+
+; VFAEB with CS -- should be ignored.
+define <16 x i8> @test_vfaeb_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaeb_1:
+; CHECK: vfaeb %v24, %v24, %v26, 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 1)
+  ret <16 x i8> %res
+}
+
+; VFAEH.
+define <8 x i16> @test_vfaeh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfaeh:
+; CHECK: vfaeh %v24, %v24, %v26, 4
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %a, <8 x i16> %b, i32 4)
+  ret <8 x i16> %res
+}
+
+; VFAEF.
+define <4 x i32> @test_vfaef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfaef:
+; CHECK: vfaef %v24, %v24, %v26, 8
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vfaef(<4 x i32> %a, <4 x i32> %b, i32 8)
+  ret <4 x i32> %res
+}
+
+; VFAEBS.
+define <16 x i8> @test_vfaebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaebs:
+; CHECK: vfaebs %v24, %v24, %v26, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8> %a, <16 x i8> %b,
+                                                  i32 0)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VFAEHS.
+define <8 x i16> @test_vfaehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaehs:
+; CHECK: vfaehs %v24, %v24, %v26, 4
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16> %a, <8 x i16> %b,
+                                                  i32 4)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VFAEFS.
+define <4 x i32> @test_vfaefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaefs:
+; CHECK: vfaefs %v24, %v24, %v26, 8
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32> %a, <4 x i32> %b,
+                                                  i32 8)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VFAEZB with !IN !RT.
+define <16 x i8> @test_vfaezb_0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_0:
+; CHECK: vfaezb %v24, %v24, %v26, 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 0)
+  ret <16 x i8> %res
+}
+
+; VFAEZB with !IN RT.
+define <16 x i8> @test_vfaezb_4(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_4:
+; CHECK: vfaezb %v24, %v24, %v26, 4
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 4)
+  ret <16 x i8> %res
+}
+
+; VFAEZB with IN !RT.
+define <16 x i8> @test_vfaezb_8(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_8:
+; CHECK: vfaezb %v24, %v24, %v26, 8
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 8)
+  ret <16 x i8> %res
+}
+
+; VFAEZB with IN RT.
+define <16 x i8> @test_vfaezb_12(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_12:
+; CHECK: vfaezb %v24, %v24, %v26, 12
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 12)
+  ret <16 x i8> %res
+}
+
+; VFAEZB with CS -- should be ignored.
+define <16 x i8> @test_vfaezb_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfaezb_1:
+; CHECK: vfaezb %v24, %v24, %v26, 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 1)
+  ret <16 x i8> %res
+}
+
+; VFAEZH.
+define <8 x i16> @test_vfaezh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfaezh:
+; CHECK: vfaezh %v24, %v24, %v26, 4
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %a, <8 x i16> %b, i32 4)
+  ret <8 x i16> %res
+}
+
+; VFAEZF.
+define <4 x i32> @test_vfaezf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfaezf:
+; CHECK: vfaezf %v24, %v24, %v26, 8
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %a, <4 x i32> %b, i32 8)
+  ret <4 x i32> %res
+}
+
+; VFAEZBS.
+define <16 x i8> @test_vfaezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaezbs:
+; CHECK: vfaezbs %v24, %v24, %v26, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8> %a, <16 x i8> %b,
+                                                   i32 0)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VFAEZHS.
+define <8 x i16> @test_vfaezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaezhs:
+; CHECK: vfaezhs %v24, %v24, %v26, 4
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16> %a, <8 x i16> %b,
+                                                   i32 4)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VFAEZFS.
+define <4 x i32> @test_vfaezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfaezfs:
+; CHECK: vfaezfs %v24, %v24, %v26, 8
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32> %a, <4 x i32> %b,
+                                                   i32 8)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VFEEB.
+define <16 x i8> @test_vfeeb_0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfeeb_0:
+; CHECK: vfeeb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VFEEH.
+define <8 x i16> @test_vfeeh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfeeh:
+; CHECK: vfeeh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VFEEF.
+define <4 x i32> @test_vfeef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfeef:
+; CHECK: vfeef %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vfeef(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VFEEBS.
+define <16 x i8> @test_vfeebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeebs:
+; CHECK: vfeebs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VFEEHS.
+define <8 x i16> @test_vfeehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeehs:
+; CHECK: vfeehs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VFEEFS.
+define <4 x i32> @test_vfeefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeefs:
+; CHECK: vfeefs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VFEEZB.
+define <16 x i8> @test_vfeezb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfeezb:
+; CHECK: vfeezb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VFEEZH.
+define <8 x i16> @test_vfeezh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfeezh:
+; CHECK: vfeezh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VFEEZF.
+define <4 x i32> @test_vfeezf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfeezf:
+; CHECK: vfeezf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VFEEZBS.
+define <16 x i8> @test_vfeezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeezbs:
+; CHECK: vfeezbs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VFEEZHS.
+define <8 x i16> @test_vfeezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeezhs:
+; CHECK: vfeezhs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VFEEZFS.
+define <4 x i32> @test_vfeezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfeezfs:
+; CHECK: vfeezfs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VFENEB.
+define <16 x i8> @test_vfeneb_0(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfeneb_0:
+; CHECK: vfeneb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VFENEH.
+define <8 x i16> @test_vfeneh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfeneh:
+; CHECK: vfeneh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VFENEF.
+define <4 x i32> @test_vfenef(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfenef:
+; CHECK: vfenef %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vfenef(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VFENEBS.
+define <16 x i8> @test_vfenebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenebs:
+; CHECK: vfenebs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VFENEHS.
+define <8 x i16> @test_vfenehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenehs:
+; CHECK: vfenehs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VFENEFS.
+define <4 x i32> @test_vfenefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenefs:
+; CHECK: vfenefs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VFENEZB.
+define <16 x i8> @test_vfenezb(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vfenezb:
+; CHECK: vfenezb %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %res
+}
+
+; VFENEZH.
+define <8 x i16> @test_vfenezh(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: test_vfenezh:
+; CHECK: vfenezh %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %res
+}
+
+; VFENEZF.
+define <4 x i32> @test_vfenezf(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: test_vfenezf:
+; CHECK: vfenezf %v24, %v24, %v26
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %res
+}
+
+; VFENEZBS.
+define <16 x i8> @test_vfenezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenezbs:
+; CHECK: vfenezbs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8> %a, <16 x i8> %b)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VFENEZHS.
+define <8 x i16> @test_vfenezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenezhs:
+; CHECK: vfenezhs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16> %a, <8 x i16> %b)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VFENEZFS.
+define <4 x i32> @test_vfenezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
+; CHECK-LABEL: test_vfenezfs:
+; CHECK: vfenezfs %v24, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32> %a, <4 x i32> %b)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VISTRB.
+define <16 x i8> @test_vistrb(<16 x i8> %a) {
+; CHECK-LABEL: test_vistrb:
+; CHECK: vistrb %v24, %v24
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vistrb(<16 x i8> %a)
+  ret <16 x i8> %res
+}
+
+; VISTRH.
+define <8 x i16> @test_vistrh(<8 x i16> %a) {
+; CHECK-LABEL: test_vistrh:
+; CHECK: vistrh %v24, %v24
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vistrh(<8 x i16> %a)
+  ret <8 x i16> %res
+}
+
+; VISTRF.
+define <4 x i32> @test_vistrf(<4 x i32> %a) {
+; CHECK-LABEL: test_vistrf:
+; CHECK: vistrf %v24, %v24
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vistrf(<4 x i32> %a)
+  ret <4 x i32> %res
+}
+
+; VISTRBS.
+define <16 x i8> @test_vistrbs(<16 x i8> %a, i32 *%ccptr) {
+; CHECK-LABEL: test_vistrbs:
+; CHECK: vistrbs %v24, %v24
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8> %a)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VISTRHS.
+define <8 x i16> @test_vistrhs(<8 x i16> %a, i32 *%ccptr) {
+; CHECK-LABEL: test_vistrhs:
+; CHECK: vistrhs %v24, %v24
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16> %a)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VISTRFS.
+define <4 x i32> @test_vistrfs(<4 x i32> %a, i32 *%ccptr) {
+; CHECK-LABEL: test_vistrfs:
+; CHECK: vistrfs %v24, %v24
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32> %a)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VSTRCB with !IN !RT.
+define <16 x i8> @test_vstrcb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_0:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+                                          <16 x i8> %c, i32 0)
+  ret <16 x i8> %res
+}
+
+; VSTRCB with !IN RT.
+define <16 x i8> @test_vstrcb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_4:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+                                          <16 x i8> %c, i32 4)
+  ret <16 x i8> %res
+}
+
+; VSTRCB with IN !RT.
+define <16 x i8> @test_vstrcb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_8:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 8
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+                                          <16 x i8> %c, i32 8)
+  ret <16 x i8> %res
+}
+
+; VSTRCB with IN RT.
+define <16 x i8> @test_vstrcb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_12:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 12
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+                                          <16 x i8> %c, i32 12)
+  ret <16 x i8> %res
+}
+
+; VSTRCB with CS -- should be ignored.
+define <16 x i8> @test_vstrcb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrcb_1:
+; CHECK: vstrcb %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
+                                          <16 x i8> %c, i32 1)
+  ret <16 x i8> %res
+}
+
+; VSTRCH.
+define <8 x i16> @test_vstrch(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vstrch:
+; CHECK: vstrch %v24, %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vstrch(<8 x i16> %a, <8 x i16> %b,
+                                          <8 x i16> %c, i32 4)
+  ret <8 x i16> %res
+}
+
+; VSTRCF.
+define <4 x i32> @test_vstrcf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vstrcf:
+; CHECK: vstrcf %v24, %v24, %v26, %v28, 8
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %a, <4 x i32> %b,
+                                          <4 x i32> %c, i32 8)
+  ret <4 x i32> %res
+}
+
+; VSTRCBS.
+define <16 x i8> @test_vstrcbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
+                               i32 *%ccptr) {
+; CHECK-LABEL: test_vstrcbs:
+; CHECK: vstrcbs %v24, %v24, %v26, %v28, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8> %a, <16 x i8> %b,
+                                                   <16 x i8> %c, i32 0)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VSTRCHS.
+define <8 x i16> @test_vstrchs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,
+                               i32 *%ccptr) {
+; CHECK-LABEL: test_vstrchs:
+; CHECK: vstrchs %v24, %v24, %v26, %v28, 4
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16> %a, <8 x i16> %b,
+                                                   <8 x i16> %c, i32 4)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VSTRCFS.
+define <4 x i32> @test_vstrcfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
+                               i32 *%ccptr) {
+; CHECK-LABEL: test_vstrcfs:
+; CHECK: vstrcfs %v24, %v24, %v26, %v28, 8
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32> %a, <4 x i32> %b,
+                                                   <4 x i32> %c, i32 8)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VSTRCZB with !IN !RT.
+define <16 x i8> @test_vstrczb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_0:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+                                           <16 x i8> %c, i32 0)
+  ret <16 x i8> %res
+}
+
+; VSTRCZB with !IN RT.
+define <16 x i8> @test_vstrczb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_4:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+                                           <16 x i8> %c, i32 4)
+  ret <16 x i8> %res
+}
+
+; VSTRCZB with IN !RT.
+define <16 x i8> @test_vstrczb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_8:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 8
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+                                           <16 x i8> %c, i32 8)
+  ret <16 x i8> %res
+}
+
+; VSTRCZB with IN RT.
+define <16 x i8> @test_vstrczb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_12:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 12
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+                                           <16 x i8> %c, i32 12)
+  ret <16 x i8> %res
+}
+
+; VSTRCZB with CS -- should be ignored.
+define <16 x i8> @test_vstrczb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_vstrczb_1:
+; CHECK: vstrczb %v24, %v24, %v26, %v28, 0
+; CHECK: br %r14
+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
+                                           <16 x i8> %c, i32 1)
+  ret <16 x i8> %res
+}
+
+; VSTRCZH.
+define <8 x i16> @test_vstrczh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
+; CHECK-LABEL: test_vstrczh:
+; CHECK: vstrczh %v24, %v24, %v26, %v28, 4
+; CHECK: br %r14
+  %res = call <8 x i16> @llvm.s390.vstrczh(<8 x i16> %a, <8 x i16> %b,
+                                           <8 x i16> %c,  i32 4)
+  ret <8 x i16> %res
+}
+
+; VSTRCZF.
+define <4 x i32> @test_vstrczf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: test_vstrczf:
+; CHECK: vstrczf %v24, %v24, %v26, %v28, 8
+; CHECK: br %r14
+  %res = call <4 x i32> @llvm.s390.vstrczf(<4 x i32> %a, <4 x i32> %b,
+                                           <4 x i32> %c, i32 8)
+  ret <4 x i32> %res
+}
+
+; VSTRCZBS.
+define <16 x i8> @test_vstrczbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
+                                i32 *%ccptr) {
+; CHECK-LABEL: test_vstrczbs:
+; CHECK: vstrczbs %v24, %v24, %v26, %v28, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8> %a, <16 x i8> %b,
+                                                    <16 x i8> %c, i32 0)
+  %res = extractvalue {<16 x i8>, i32} %call, 0
+  %cc = extractvalue {<16 x i8>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <16 x i8> %res
+}
+
+; VSTRCZHS.
+define <8 x i16> @test_vstrczhs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,
+                                i32 *%ccptr) {
+; CHECK-LABEL: test_vstrczhs:
+; CHECK: vstrczhs %v24, %v24, %v26, %v28, 4
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16> %a, <8 x i16> %b,
+                                                    <8 x i16> %c, i32 4)
+  %res = extractvalue {<8 x i16>, i32} %call, 0
+  %cc = extractvalue {<8 x i16>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <8 x i16> %res
+}
+
+; VSTRCZFS.
+define <4 x i32> @test_vstrczfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
+                                i32 *%ccptr) {
+; CHECK-LABEL: test_vstrczfs:
+; CHECK: vstrczfs %v24, %v24, %v26, %v28, 8
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+  %call = call {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32> %a, <4 x i32> %b,
+                                                    <4 x i32> %c, i32 8)
+  %res = extractvalue {<4 x i32>, i32} %call, 0
+  %cc = extractvalue {<4 x i32>, i32} %call, 1
+  store i32 %cc, i32 *%ccptr
+  ret <4 x i32> %res
+}
+
+; VFCEDBS with no processing of the result.
+define i32 @test_vfcedbs(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfcedbs:
+; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
+                                                   <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  ret i32 %res
+}
+
+; VFCEDBS, returning 1 if any elements are equal (CC != 3).
+define i32 @test_vfcedbs_any_bool(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfcedbs_any_bool:
+; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: afi %r2, -536870912
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
+                                                   <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp ne i32 %res, 3
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VFCEDBS, storing to %ptr if any elements are equal.
+define <2 x i64> @test_vfcedbs_any_store(<2 x double> %a, <2 x double> %b,
+                                         i32 *%ptr) {
+; CHECK-LABEL: test_vfcedbs_any_store:
+; CHECK-NOT: %r
+; CHECK: vfcedbs %v24, %v24, %v26
+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
+                                                   <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 0
+  %cc = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp ule i32 %cc, 2
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <2 x i64> %res
+}
+
+; VFCHDBS with no processing of the result.
+define i32 @test_vfchdbs(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfchdbs:
+; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
+                                                   <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  ret i32 %res
+}
+
+; VFCHDBS, returning 1 if not all elements are higher.
+define i32 @test_vfchdbs_notall_bool(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfchdbs_notall_bool:
+; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 36
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
+                                                   <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp sge i32 %res, 1
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VFCHDBS, storing to %ptr if not all elements are higher.
+define <2 x i64> @test_vfchdbs_notall_store(<2 x double> %a, <2 x double> %b,
+                                            i32 *%ptr) {
+; CHECK-LABEL: test_vfchdbs_notall_store:
+; CHECK-NOT: %r
+; CHECK: vfchdbs %v24, %v24, %v26
+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
+                                                   <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 0
+  %cc = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp ugt i32 %cc, 0
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <2 x i64> %res
+}
+
+; VFCHEDBS with no processing of the result.
+define i32 @test_vfchedbs(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfchedbs:
+; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
+						    <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  ret i32 %res
+}
+
+; VFCHEDBS, returning 1 if neither element is higher or equal.
+define i32 @test_vfchedbs_none_bool(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: test_vfchedbs_none_bool:
+; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: risblg %r2, [[REG]], 31, 159, 35
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
+						    <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp eq i32 %res, 3
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VFCHEDBS, storing to %ptr if neither element is higher or equal.
+define <2 x i64> @test_vfchedbs_none_store(<2 x double> %a, <2 x double> %b,
+                                           i32 *%ptr) {
+; CHECK-LABEL: test_vfchedbs_none_store:
+; CHECK-NOT: %r
+; CHECK: vfchedbs %v24, %v24, %v26
+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
+; CHECK: mvhi 0(%r2), 0
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
+						    <2 x double> %b)
+  %res = extractvalue {<2 x i64>, i32} %call, 0
+  %cc = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp uge i32 %cc, 3
+  br i1 %cmp, label %store, label %exit
+
+store:
+  store i32 0, i32 *%ptr
+  br label %exit
+
+exit:
+  ret <2 x i64> %res
+}
+
+; VFTCIDB with the lowest useful class selector and no processing of the result.
+define i32 @test_vftcidb(<2 x double> %a) {
+; CHECK-LABEL: test_vftcidb:
+; CHECK: vftcidb {{%v[0-9]+}}, %v24, 1
+; CHECK: ipm %r2
+; CHECK: srl %r2, 28
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 1)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  ret i32 %res
+}
+
+; VFTCIDB with the highest useful class selector, returning 1 if all elements
+; have the right class (CC == 0).
+define i32 @test_vftcidb_all_bool(<2 x double> %a) {
+; CHECK-LABEL: test_vftcidb_all_bool:
+; CHECK: vftcidb {{%v[0-9]+}}, %v24, 4094
+; CHECK: afi %r2, -268435456
+; CHECK: srl %r2, 31
+; CHECK: br %r14
+  %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 4094)
+  %res = extractvalue {<2 x i64>, i32} %call, 1
+  %cmp = icmp eq i32 %res, 0
+  %ext = zext i1 %cmp to i32
+  ret i32 %ext
+}
+
+; VFIDB with a rounding mode not usable via standard intrinsics.
+define <2 x double> @test_vfidb_0_4(<2 x double> %a) {
+; CHECK-LABEL: test_vfidb_0_4:
+; CHECK: vfidb %v24, %v24, 0, 4
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 0, i32 4)
+  ret <2 x double> %res
+}
+
+; VFIDB with IEEE-inexact exception suppressed.
+define <2 x double> @test_vfidb_4_0(<2 x double> %a) {
+; CHECK-LABEL: test_vfidb_4_0:
+; CHECK: vfidb %v24, %v24, 4, 0
+; CHECK: br %r14
+  %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 4, i32 0)
+  ret <2 x double> %res
+}
+





More information about the llvm-commits mailing list