[llvm] 8424bf2 - [SystemZ] Add support for new cpu architecture - arch15
Ulrich Weigand via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 20 10:30:51 PST 2025
Author: Ulrich Weigand
Date: 2025-01-20T19:30:21+01:00
New Revision: 8424bf207efd89eacf2fe893b67be98d535e1db6
URL: https://github.com/llvm/llvm-project/commit/8424bf207efd89eacf2fe893b67be98d535e1db6
DIFF: https://github.com/llvm/llvm-project/commit/8424bf207efd89eacf2fe893b67be98d535e1db6.diff
LOG: [SystemZ] Add support for new cpu architecture - arch15
This patch adds support for the next-generation arch15
CPU architecture to the SystemZ backend.
This includes:
- Basic support for the new processor and its features.
- Detection of arch15 as host processor.
- Assembler/disassembler support for new instructions.
- Exploitation of new instructions for code generation.
- New vector (signed|unsigned|bool) __int128 data types.
- New LLVM intrinsics for certain new instructions.
- Support for low-level builtins mapped to new LLVM intrinsics.
- New high-level intrinsics in vecintrin.h.
- Indicate support by defining __VEC__ == 10305.
Note: No currently available Z system supports the arch15
architecture. Once new systems become available, the
official system name will be added as supported -march name.
Added:
clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c
clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c
clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll
llvm/test/CodeGen/SystemZ/int-abs-03.ll
llvm/test/CodeGen/SystemZ/int-cmp-64.ll
llvm/test/CodeGen/SystemZ/int-conv-15.ll
llvm/test/CodeGen/SystemZ/int-div-08.ll
llvm/test/CodeGen/SystemZ/int-max-02.ll
llvm/test/CodeGen/SystemZ/int-min-02.ll
llvm/test/CodeGen/SystemZ/int-mul-14.ll
llvm/test/CodeGen/SystemZ/int-mul-15.ll
llvm/test/CodeGen/SystemZ/int-mul-16.ll
llvm/test/CodeGen/SystemZ/int-neg-04.ll
llvm/test/CodeGen/SystemZ/llxa-01.ll
llvm/test/CodeGen/SystemZ/llxa-02.ll
llvm/test/CodeGen/SystemZ/llxa-03.ll
llvm/test/CodeGen/SystemZ/llxa-04.ll
llvm/test/CodeGen/SystemZ/llxa-05.ll
llvm/test/CodeGen/SystemZ/lxa-01.ll
llvm/test/CodeGen/SystemZ/lxa-02.ll
llvm/test/CodeGen/SystemZ/lxa-03.ll
llvm/test/CodeGen/SystemZ/lxa-04.ll
llvm/test/CodeGen/SystemZ/lxa-05.ll
llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll
llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll
llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll
llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll
llvm/test/CodeGen/SystemZ/vec-cmp-09.ll
llvm/test/CodeGen/SystemZ/vec-div-03.ll
llvm/test/CodeGen/SystemZ/vec-eval.ll
llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll
llvm/test/CodeGen/SystemZ/vec-mul-06.ll
llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt
llvm/test/MC/SystemZ/insn-bad-arch15.s
llvm/test/MC/SystemZ/insn-good-arch15.s
Modified:
clang/include/clang/Basic/BuiltinsSystemZ.def
clang/lib/Basic/Targets/SystemZ.cpp
clang/lib/Basic/Targets/SystemZ.h
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/vecintrin.h
clang/lib/Sema/DeclSpec.cpp
clang/lib/Sema/SemaSystemZ.cpp
clang/test/CodeGen/SystemZ/builtins-systemz-zvector-error.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-error.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector3-error.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector3.c
clang/test/CodeGen/SystemZ/systemz-abi-vector.c
clang/test/CodeGen/SystemZ/systemz-abi.c
clang/test/CodeGen/SystemZ/zvector.c
clang/test/Driver/systemz-march.c
clang/test/Misc/target-invalid-cpu-note/systemz.c
clang/test/Preprocessor/predefined-arch-macros.c
clang/test/Sema/zvector.c
llvm/include/llvm/IR/IntrinsicsSystemZ.td
llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp
llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h
llvm/lib/Target/SystemZ/SystemZFeatures.td
llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/lib/Target/SystemZ/SystemZISelLowering.h
llvm/lib/Target/SystemZ/SystemZInstrFormats.td
llvm/lib/Target/SystemZ/SystemZInstrInfo.td
llvm/lib/Target/SystemZ/SystemZInstrVector.td
llvm/lib/Target/SystemZ/SystemZOperands.td
llvm/lib/Target/SystemZ/SystemZOperators.td
llvm/lib/Target/SystemZ/SystemZProcessors.td
llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
llvm/lib/TargetParser/Host.cpp
llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll
llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
llvm/test/CodeGen/SystemZ/args-12.ll
llvm/test/CodeGen/SystemZ/args-13.ll
llvm/test/CodeGen/SystemZ/int-add-19.ll
llvm/test/CodeGen/SystemZ/int-sub-12.ll
llvm/test/MC/SystemZ/insn-bad-z16.s
llvm/unittests/TargetParser/Host.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsSystemZ.def b/clang/include/clang/Basic/BuiltinsSystemZ.def
index c564dd9e486bc6..ba94c1a130f956 100644
--- a/clang/include/clang/Basic/BuiltinsSystemZ.def
+++ b/clang/include/clang/Basic/BuiltinsSystemZ.def
@@ -286,6 +286,7 @@ TARGET_BUILTIN(__builtin_s390_vstrszf, "V16UcV4UiV4UiV16Uci*", "nc", "vector-enh
TARGET_BUILTIN(__builtin_s390_vlbrh, "V8UsV8Us", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vlbrf, "V4UiV4Ui", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_vlbrg, "V2ULLiV2ULLi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_vlbrq, "ULLLiULLLi", "nc", "vector")
// NNP-assist facility intrinsics.
TARGET_BUILTIN(__builtin_s390_vclfnhs, "V4fV8UsIi", "nc", "nnp-assist")
@@ -294,5 +295,44 @@ TARGET_BUILTIN(__builtin_s390_vcrnfs, "V8UsV4fV4fIi", "nc", "nnp-assist")
TARGET_BUILTIN(__builtin_s390_vcfn, "V8UsV8UsIi", "nc", "nnp-assist")
TARGET_BUILTIN(__builtin_s390_vcnf, "V8UsV8UsIi", "nc", "nnp-assist")
+// Miscellaneous instruction extensions facility 4 intrinsics.
+TARGET_BUILTIN(__builtin_s390_bdepg, "ULiULiULi", "nc", "miscellaneous-extensions-4")
+TARGET_BUILTIN(__builtin_s390_bextg, "ULiULiULi", "nc", "miscellaneous-extensions-4")
+
+// Vector-enhancements facility 3 intrinsics.
+TARGET_BUILTIN(__builtin_s390_vgemb, "V16UcV8Us", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vgemh, "V8UsV16Uc", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vgemf, "V4UiV16Uc", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vgemg, "V2ULLiV16Uc", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vgemq, "ULLLiV16Uc", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vuplg, "SLLLiV2SLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vupllg, "ULLLiV2ULLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vuphg, "SLLLiV2SLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vuplhg, "ULLLiV2ULLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vavgq, "SLLLiSLLLiSLLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vavglq, "ULLLiULLLiULLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_veval, "V16UcV16UcV16UcV16UcIi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmahg, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmahq, "SLLLiSLLLiSLLLiSLLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmalhg, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmalhq, "ULLLiULLLiULLLiULLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmaeg, "SLLLiV2SLLiV2SLLiSLLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmaleg, "ULLLiV2ULLiV2ULLiULLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmaog, "SLLLiV2SLLiV2SLLiSLLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmalog, "ULLLiV2ULLiV2ULLiULLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmhg, "V2SLLiV2SLLiV2SLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmhq, "SLLLiSLLLiSLLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmlhg, "V2ULLiV2ULLiV2ULLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmlhq, "ULLLiULLLiULLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmeg, "SLLLiV2SLLiV2SLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmleg, "ULLLiV2ULLiV2ULLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmog, "SLLLiV2SLLiV2SLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vmlog, "ULLLiV2ULLiV2ULLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vceqqs, "SLLLiULLLiULLLii*", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vchqs, "SLLLiSLLLiSLLLii*", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vchlqs, "SLLLiULLLiULLLii*", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vclzq, "ULLLiULLLi", "nc", "vector-enhancements-3")
+TARGET_BUILTIN(__builtin_s390_vctzq, "ULLLiULLLi", "nc", "vector-enhancements-3")
+
#undef BUILTIN
#undef TARGET_BUILTIN
diff --git a/clang/lib/Basic/Targets/SystemZ.cpp b/clang/lib/Basic/Targets/SystemZ.cpp
index 06f08db2eadd47..c836d110d26d5f 100644
--- a/clang/lib/Basic/Targets/SystemZ.cpp
+++ b/clang/lib/Basic/Targets/SystemZ.cpp
@@ -105,6 +105,7 @@ static constexpr ISANameRevision ISARevisions[] = {
{{"arch12"}, 12}, {{"z14"}, 12},
{{"arch13"}, 13}, {{"z15"}, 13},
{{"arch14"}, 14}, {{"z16"}, 14},
+ {{"arch15"}, 15},
};
int SystemZTargetInfo::getISARevision(StringRef Name) const {
@@ -133,6 +134,7 @@ bool SystemZTargetInfo::hasFeature(StringRef Feature) const {
.Case("arch12", ISARevision >= 12)
.Case("arch13", ISARevision >= 13)
.Case("arch14", ISARevision >= 14)
+ .Case("arch15", ISARevision >= 15)
.Case("htm", HasTransactionalExecution)
.Case("vx", HasVector)
.Default(false);
@@ -167,7 +169,7 @@ void SystemZTargetInfo::getTargetDefines(const LangOptions &Opts,
if (HasVector)
Builder.defineMacro("__VX__");
if (Opts.ZVector)
- Builder.defineMacro("__VEC__", "10304");
+ Builder.defineMacro("__VEC__", "10305");
}
ArrayRef<Builtin::Info> SystemZTargetInfo::getTargetBuiltins() const {
diff --git a/clang/lib/Basic/Targets/SystemZ.h b/clang/lib/Basic/Targets/SystemZ.h
index e6405f174f660f..d05948586c467b 100644
--- a/clang/lib/Basic/Targets/SystemZ.h
+++ b/clang/lib/Basic/Targets/SystemZ.h
@@ -186,6 +186,10 @@ class LLVM_LIBRARY_VISIBILITY SystemZTargetInfo : public TargetInfo {
Features["vector-enhancements-2"] = true;
if (ISARevision >= 14)
Features["nnp-assist"] = true;
+ if (ISARevision >= 15) {
+ Features["miscellaneous-extensions-4"] = true;
+ Features["vector-enhancements-3"] = true;
+ }
return TargetInfo::initFeatureMap(Features, Diags, CPU, FeaturesVec);
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b80833fd91884d..aeabed8a385ad0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -20601,7 +20601,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
case SystemZ::BI__builtin_s390_vclzb:
case SystemZ::BI__builtin_s390_vclzh:
case SystemZ::BI__builtin_s390_vclzf:
- case SystemZ::BI__builtin_s390_vclzg: {
+ case SystemZ::BI__builtin_s390_vclzg:
+ case SystemZ::BI__builtin_s390_vclzq: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
@@ -20612,7 +20613,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
case SystemZ::BI__builtin_s390_vctzb:
case SystemZ::BI__builtin_s390_vctzh:
case SystemZ::BI__builtin_s390_vctzf:
- case SystemZ::BI__builtin_s390_vctzg: {
+ case SystemZ::BI__builtin_s390_vctzg:
+ case SystemZ::BI__builtin_s390_vctzq: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
@@ -20856,7 +20858,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
case SystemZ::BI__builtin_s390_vlbrh:
case SystemZ::BI__builtin_s390_vlbrf:
- case SystemZ::BI__builtin_s390_vlbrg: {
+ case SystemZ::BI__builtin_s390_vlbrg:
+ case SystemZ::BI__builtin_s390_vlbrq: {
llvm::Type *ResultType = ConvertType(E->getType());
Value *X = EmitScalarExpr(E->getArg(0));
Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
@@ -20881,16 +20884,19 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
INTRINSIC_WITH_CC(s390_vceqhs);
INTRINSIC_WITH_CC(s390_vceqfs);
INTRINSIC_WITH_CC(s390_vceqgs);
+ INTRINSIC_WITH_CC(s390_vceqqs);
INTRINSIC_WITH_CC(s390_vchbs);
INTRINSIC_WITH_CC(s390_vchhs);
INTRINSIC_WITH_CC(s390_vchfs);
INTRINSIC_WITH_CC(s390_vchgs);
+ INTRINSIC_WITH_CC(s390_vchqs);
INTRINSIC_WITH_CC(s390_vchlbs);
INTRINSIC_WITH_CC(s390_vchlhs);
INTRINSIC_WITH_CC(s390_vchlfs);
INTRINSIC_WITH_CC(s390_vchlgs);
+ INTRINSIC_WITH_CC(s390_vchlqs);
INTRINSIC_WITH_CC(s390_vfaebs);
INTRINSIC_WITH_CC(s390_vfaehs);
diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h
index c842edd6756f2d..a14c39f9f73132 100644
--- a/clang/lib/Headers/vecintrin.h
+++ b/clang/lib/Headers/vecintrin.h
@@ -468,6 +468,27 @@ vec_perm(__vector __bool long long __a, __vector __bool long long __b,
(__vector unsigned char)__a, (__vector unsigned char)__b, __c);
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_perm(__vector signed __int128 __a, __vector signed __int128 __b,
+ __vector unsigned char __c) {
+ return (__vector signed __int128)__builtin_s390_vperm(
+ (__vector unsigned char)__a, (__vector unsigned char)__b, __c);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_perm(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned char __c) {
+ return (__vector unsigned __int128)__builtin_s390_vperm(
+ (__vector unsigned char)__a, (__vector unsigned char)__b, __c);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_perm(__vector __bool __int128 __a, __vector __bool __int128 __b,
+ __vector unsigned char __c) {
+ return (__vector __bool __int128)__builtin_s390_vperm(
+ (__vector unsigned char)__a, (__vector unsigned char)__b, __c);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_perm(__vector float __a, __vector float __b,
@@ -514,9 +535,19 @@ vec_permi(__vector double __a, __vector double __b, int __c)
(__vector unsigned long long)(Y), \
(((Z) & 2) << 1) | ((Z) & 1)))
+/*-- vec_bperm --------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_ai __vector unsigned long long
+vec_bperm(__vector unsigned __int128 __a, __vector unsigned char __b) {
+ return __builtin_s390_vbperm((__vector unsigned char)__a, __b);
+}
+#endif
+
/*-- vec_bperm_u128 ---------------------------------------------------------*/
#if __ARCH__ >= 12
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned long long
vec_bperm_u128(__vector unsigned char __a, __vector unsigned char __b) {
return __builtin_s390_vbperm(__a, __b);
@@ -558,6 +589,18 @@ vec_revb(__vector unsigned long long __vec) {
return __builtin_s390_vlbrg(__vec);
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_revb(__vector signed __int128 __vec) {
+ return (__vector signed __int128)
+ __builtin_s390_vlbrq((unsigned __int128)__vec);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_revb(__vector unsigned __int128 __vec) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vlbrq((unsigned __int128)__vec);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_revb(__vector float __vec) {
@@ -820,6 +863,46 @@ vec_sel(__vector unsigned long long __a, __vector unsigned long long __b,
(~(__vector unsigned long long)__c & __a));
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_sel(__vector signed __int128 __a, __vector signed __int128 __b,
+ __vector unsigned __int128 __c) {
+ return (((__vector signed __int128)__c & __b) |
+ (~(__vector signed __int128)__c & __a));
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_sel(__vector signed __int128 __a, __vector signed __int128 __b,
+ __vector __bool __int128 __c) {
+ return (((__vector signed __int128)__c & __b) |
+ (~(__vector signed __int128)__c & __a));
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_sel(__vector __bool __int128 __a, __vector __bool __int128 __b,
+ __vector unsigned __int128 __c) {
+ return (((__vector __bool __int128)__c & __b) |
+ (~(__vector __bool __int128)__c & __a));
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_sel(__vector __bool __int128 __a, __vector __bool __int128 __b,
+ __vector __bool __int128 __c) {
+ return (__c & __b) | (~__c & __a);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_sel(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c) {
+ return (__c & __b) | (~__c & __a);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_sel(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector __bool __int128 __c) {
+ return (((__vector unsigned __int128)__c & __b) |
+ (~(__vector unsigned __int128)__c & __a));
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_sel(__vector float __a, __vector float __b, __vector unsigned int __c) {
@@ -1078,6 +1161,22 @@ vec_xl(long __offset, const unsigned long long *__ptr) {
return V;
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_xl(long __offset, const signed __int128 *__ptr) {
+ __vector signed __int128 V;
+ __builtin_memcpy(&V, ((const char *)__ptr + __offset),
+ sizeof(__vector signed __int128));
+ return V;
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_xl(long __offset, const unsigned __int128 *__ptr) {
+ __vector unsigned __int128 V;
+ __builtin_memcpy(&V, ((const char *)__ptr + __offset),
+ sizeof(__vector unsigned __int128));
+ return V;
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_xl(long __offset, const float *__ptr) {
@@ -1294,6 +1393,22 @@ vec_xst(__vector unsigned long long __vec, long __offset,
sizeof(__vector unsigned long long));
}
+static inline __ATTRS_o_ai void
+vec_xst(__vector signed __int128 __vec, long __offset,
+ signed __int128 *__ptr) {
+ __vector signed __int128 V = __vec;
+ __builtin_memcpy(((char *)__ptr + __offset), &V,
+ sizeof(__vector signed __int128));
+}
+
+static inline __ATTRS_o_ai void
+vec_xst(__vector unsigned __int128 __vec, long __offset,
+ unsigned __int128 *__ptr) {
+ __vector unsigned __int128 V = __vec;
+ __builtin_memcpy(((char *)__ptr + __offset), &V,
+ sizeof(__vector unsigned __int128));
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai void
vec_xst(__vector float __vec, long __offset, float *__ptr) {
@@ -1465,6 +1580,14 @@ extern __ATTRS_o __vector unsigned long long
vec_load_bndry(const unsigned long long *__ptr, unsigned short __len)
__constant_pow2_range(__len, 64, 4096);
+extern __ATTRS_o __vector signed __int128
+vec_load_bndry(const signed __int128 *__ptr, unsigned short __len)
+ __constant_pow2_range(__len, 64, 4096);
+
+extern __ATTRS_o __vector unsigned __int128
+vec_load_bndry(const unsigned __int128 *__ptr, unsigned short __len)
+ __constant_pow2_range(__len, 64, 4096);
+
#if __ARCH__ >= 12
extern __ATTRS_o __vector float
vec_load_bndry(const float *__ptr, unsigned short __len)
@@ -1496,43 +1619,51 @@ vec_load_len(const unsigned char *__ptr, unsigned int __len) {
return (__vector unsigned char)__builtin_s390_vll(__len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed short
vec_load_len(const signed short *__ptr, unsigned int __len) {
return (__vector signed short)__builtin_s390_vll(__len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned short
vec_load_len(const unsigned short *__ptr, unsigned int __len) {
return (__vector unsigned short)__builtin_s390_vll(__len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed int
vec_load_len(const signed int *__ptr, unsigned int __len) {
return (__vector signed int)__builtin_s390_vll(__len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned int
vec_load_len(const unsigned int *__ptr, unsigned int __len) {
return (__vector unsigned int)__builtin_s390_vll(__len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed long long
vec_load_len(const signed long long *__ptr, unsigned int __len) {
return (__vector signed long long)__builtin_s390_vll(__len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned long long
vec_load_len(const unsigned long long *__ptr, unsigned int __len) {
return (__vector unsigned long long)__builtin_s390_vll(__len, __ptr);
}
#if __ARCH__ >= 12
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector float
vec_load_len(const float *__ptr, unsigned int __len) {
return (__vector float)__builtin_s390_vll(__len, __ptr);
}
#endif
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector double
vec_load_len(const double *__ptr, unsigned int __len) {
return (__vector double)__builtin_s390_vll(__len, __ptr);
@@ -1541,7 +1672,12 @@ vec_load_len(const double *__ptr, unsigned int __len) {
/*-- vec_load_len_r ---------------------------------------------------------*/
#if __ARCH__ >= 12
-static inline __ATTRS_ai __vector unsigned char
+static inline __ATTRS_o_ai __vector signed char
+vec_load_len_r(const signed char *__ptr, unsigned int __len) {
+ return (__vector signed char)__builtin_s390_vlrlr(__len, __ptr);
+}
+
+static inline __ATTRS_o_ai __vector unsigned char
vec_load_len_r(const unsigned char *__ptr, unsigned int __len) {
return (__vector unsigned char)__builtin_s390_vlrlr(__len, __ptr);
}
@@ -1561,36 +1697,42 @@ vec_store_len(__vector unsigned char __vec, unsigned char *__ptr,
__builtin_s390_vstl((__vector signed char)__vec, __len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai void
vec_store_len(__vector signed short __vec, signed short *__ptr,
unsigned int __len) {
__builtin_s390_vstl((__vector signed char)__vec, __len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai void
vec_store_len(__vector unsigned short __vec, unsigned short *__ptr,
unsigned int __len) {
__builtin_s390_vstl((__vector signed char)__vec, __len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai void
vec_store_len(__vector signed int __vec, signed int *__ptr,
unsigned int __len) {
__builtin_s390_vstl((__vector signed char)__vec, __len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai void
vec_store_len(__vector unsigned int __vec, unsigned int *__ptr,
unsigned int __len) {
__builtin_s390_vstl((__vector signed char)__vec, __len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai void
vec_store_len(__vector signed long long __vec, signed long long *__ptr,
unsigned int __len) {
__builtin_s390_vstl((__vector signed char)__vec, __len, __ptr);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai void
vec_store_len(__vector unsigned long long __vec, unsigned long long *__ptr,
unsigned int __len) {
@@ -1598,6 +1740,7 @@ vec_store_len(__vector unsigned long long __vec, unsigned long long *__ptr,
}
#if __ARCH__ >= 12
+// This prototype is deprecated.
static inline __ATTRS_o_ai void
vec_store_len(__vector float __vec, float *__ptr,
unsigned int __len) {
@@ -1605,6 +1748,7 @@ vec_store_len(__vector float __vec, float *__ptr,
}
#endif
+// This prototype is deprecated.
static inline __ATTRS_o_ai void
vec_store_len(__vector double __vec, double *__ptr,
unsigned int __len) {
@@ -1614,7 +1758,13 @@ vec_store_len(__vector double __vec, double *__ptr,
/*-- vec_store_len_r --------------------------------------------------------*/
#if __ARCH__ >= 12
-static inline __ATTRS_ai void
+static inline __ATTRS_o_ai void
+vec_store_len_r(__vector signed char __vec, signed char *__ptr,
+ unsigned int __len) {
+ __builtin_s390_vstrlr(__vec, __len, __ptr);
+}
+
+static inline __ATTRS_o_ai void
vec_store_len_r(__vector unsigned char __vec, unsigned char *__ptr,
unsigned int __len) {
__builtin_s390_vstrlr((__vector signed char)__vec, __len, __ptr);
@@ -1711,6 +1861,35 @@ vec_genmasks_64(unsigned char __first, unsigned char __last)
return (__vector unsigned long long)__value;
}
+/*-- vec_gen_element_masks_* ------------------------------------------------*/
+
+#if __ARCH__ >= 15
+static inline __ATTRS_ai __vector unsigned char
+vec_gen_element_masks_8(__vector unsigned short __mask) {
+ return __builtin_s390_vgemb(__mask);
+}
+
+static inline __ATTRS_ai __vector unsigned short
+vec_gen_element_masks_16(__vector unsigned char __mask) {
+ return __builtin_s390_vgemh(__mask);
+}
+
+static inline __ATTRS_ai __vector unsigned int
+vec_gen_element_masks_32(__vector unsigned char __mask) {
+ return __builtin_s390_vgemf(__mask);
+}
+
+static inline __ATTRS_ai __vector unsigned long long
+vec_gen_element_masks_64(__vector unsigned char __mask) {
+ return __builtin_s390_vgemg(__mask);
+}
+
+static inline __ATTRS_ai __vector unsigned __int128
+vec_gen_element_masks_128(__vector unsigned char __mask) {
+ return (__vector unsigned __int128)__builtin_s390_vgemq(__mask);
+}
+#endif
+
/*-- vec_splat --------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed char
@@ -1894,6 +2073,16 @@ vec_splats(unsigned long long __scalar) {
return (__vector unsigned long long)__scalar;
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_splats(signed __int128 __scalar) {
+ return (__vector signed __int128)__scalar;
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_splats(unsigned __int128 __scalar) {
+ return (__vector unsigned __int128)__scalar;
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_splats(float __scalar) {
@@ -2166,6 +2355,27 @@ vec_pack(__vector unsigned long long __a, __vector unsigned long long __b) {
return (__vector unsigned int)(__ac[1], __ac[3], __bc[1], __bc[3]);
}
+static inline __ATTRS_o_ai __vector signed long long
+vec_pack(__vector signed __int128 __a, __vector signed __int128 __b) {
+ __vector signed long long __ac = (__vector signed long long)__a;
+ __vector signed long long __bc = (__vector signed long long)__b;
+ return (__vector signed long long)(__ac[1], __bc[1]);
+}
+
+static inline __ATTRS_o_ai __vector __bool long long
+vec_pack(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ __vector __bool long long __ac = (__vector __bool long long)__a;
+ __vector __bool long long __bc = (__vector __bool long long)__b;
+ return (__vector __bool long long)(__ac[1], __bc[1]);
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_pack(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ __vector unsigned long long __ac = (__vector unsigned long long)__a;
+ __vector unsigned long long __bc = (__vector unsigned long long)__b;
+ return (__vector unsigned long long)(__ac[1], __bc[1]);
+}
+
/*-- vec_packs --------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed char
@@ -2344,6 +2554,24 @@ vec_unpackh(__vector unsigned int __a) {
return __builtin_s390_vuplhf(__a);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed __int128
+vec_unpackh(__vector signed long long __a) {
+ return (__vector signed __int128)__builtin_s390_vuphg(__a);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_unpackh(__vector __bool long long __a) {
+ return ((__vector __bool __int128)
+ __builtin_s390_vuphg((__vector signed long long)__a));
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_unpackh(__vector unsigned long long __a) {
+ return (__vector unsigned __int128)__builtin_s390_vuplhg(__a);
+}
+#endif
+
/*-- vec_unpackl ------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed short
@@ -2394,6 +2622,24 @@ vec_unpackl(__vector unsigned int __a) {
return __builtin_s390_vupllf(__a);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed __int128
+vec_unpackl(__vector signed long long __a) {
+ return (__vector signed __int128)__builtin_s390_vuplg(__a);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_unpackl(__vector __bool long long __a) {
+ return ((__vector __bool __int128)
+ __builtin_s390_vuplg((__vector signed long long)__a));
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_unpackl(__vector unsigned long long __a) {
+ return (__vector unsigned __int128)__builtin_s390_vupllg(__a);
+}
+#endif
+
/*-- vec_cmpeq --------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector __bool char
@@ -2456,6 +2702,21 @@ vec_cmpeq(__vector unsigned long long __a, __vector unsigned long long __b) {
return (__vector __bool long long)(__a == __b);
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmpeq(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return (__vector __bool __int128)(__a == __b);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmpeq(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return (__vector __bool __int128)(__a == __b);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmpeq(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector __bool __int128)(__a == __b);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector __bool int
vec_cmpeq(__vector float __a, __vector float __b) {
@@ -2510,6 +2771,16 @@ vec_cmpge(__vector unsigned long long __a, __vector unsigned long long __b) {
return (__vector __bool long long)(__a >= __b);
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmpge(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return (__vector __bool __int128)(__a >= __b);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmpge(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector __bool __int128)(__a >= __b);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector __bool int
vec_cmpge(__vector float __a, __vector float __b) {
@@ -2564,6 +2835,16 @@ vec_cmpgt(__vector unsigned long long __a, __vector unsigned long long __b) {
return (__vector __bool long long)(__a > __b);
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmpgt(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return (__vector __bool __int128)(__a > __b);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmpgt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector __bool __int128)(__a > __b);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector __bool int
vec_cmpgt(__vector float __a, __vector float __b) {
@@ -2618,6 +2899,16 @@ vec_cmple(__vector unsigned long long __a, __vector unsigned long long __b) {
return (__vector __bool long long)(__a <= __b);
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmple(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return (__vector __bool __int128)(__a <= __b);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmple(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector __bool __int128)(__a <= __b);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector __bool int
vec_cmple(__vector float __a, __vector float __b) {
@@ -2672,6 +2963,16 @@ vec_cmplt(__vector unsigned long long __a, __vector unsigned long long __b) {
return (__vector __bool long long)(__a < __b);
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmplt(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return (__vector __bool __int128)(__a < __b);
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_cmplt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector __bool __int128)(__a < __b);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector __bool int
vec_cmplt(__vector float __a, __vector float __b) {
@@ -2914,6 +3215,29 @@ vec_all_eq(__vector __bool long long __a, __vector __bool long long __b) {
return __cc == 0;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_all_eq(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc == 0;
+}
+
+static inline __ATTRS_o_ai int
+vec_all_eq(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc == 0;
+}
+
+static inline __ATTRS_o_ai int
+vec_all_eq(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc == 0;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_all_eq(__vector float __a, __vector float __b) {
@@ -3161,6 +3485,29 @@ vec_all_ne(__vector __bool long long __a, __vector __bool long long __b) {
return __cc == 3;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_all_ne(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc == 3;
+}
+
+static inline __ATTRS_o_ai int
+vec_all_ne(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc == 3;
+}
+
+static inline __ATTRS_o_ai int
+vec_all_ne(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc == 3;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_all_ne(__vector float __a, __vector float __b) {
@@ -3399,6 +3746,22 @@ vec_all_ge(__vector __bool long long __a, __vector __bool long long __b) {
return __cc == 3;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_all_ge(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vchqs((signed __int128)__b, (signed __int128)__a, &__cc);
+ return __cc == 3;
+}
+
+static inline __ATTRS_o_ai int
+vec_all_ge(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vchlqs((unsigned __int128)__b, (unsigned __int128)__a, &__cc);
+ return __cc == 3;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_all_ge(__vector float __a, __vector float __b) {
@@ -3637,6 +4000,22 @@ vec_all_gt(__vector __bool long long __a, __vector __bool long long __b) {
return __cc == 0;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_all_gt(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vchqs((signed __int128)__a, (signed __int128)__b, &__cc);
+ return __cc == 0;
+}
+
+static inline __ATTRS_o_ai int
+vec_all_gt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vchlqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc == 0;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_all_gt(__vector float __a, __vector float __b) {
@@ -3875,6 +4254,22 @@ vec_all_le(__vector __bool long long __a, __vector __bool long long __b) {
return __cc == 3;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_all_le(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vchqs((signed __int128)__a, (signed __int128)__b, &__cc);
+ return __cc == 3;
+}
+
+static inline __ATTRS_o_ai int
+vec_all_le(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vchlqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc == 3;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_all_le(__vector float __a, __vector float __b) {
@@ -4113,6 +4508,22 @@ vec_all_lt(__vector __bool long long __a, __vector __bool long long __b) {
return __cc == 0;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_all_lt(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vchqs((signed __int128)__b, (signed __int128)__a, &__cc);
+ return __cc == 0;
+}
+
+static inline __ATTRS_o_ai int
+vec_all_lt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vchlqs((unsigned __int128)__b, (unsigned __int128)__a, &__cc);
+ return __cc == 0;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_all_lt(__vector float __a, __vector float __b) {
@@ -4467,6 +4878,29 @@ vec_any_eq(__vector __bool long long __a, __vector __bool long long __b) {
return __cc <= 1;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_any_eq(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc <= 1;
+}
+
+static inline __ATTRS_o_ai int
+vec_any_eq(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc <= 1;
+}
+
+static inline __ATTRS_o_ai int
+vec_any_eq(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ int __cc;
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc <= 1;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_any_eq(__vector float __a, __vector float __b) {
@@ -4713,28 +5147,51 @@ vec_any_ne(__vector __bool long long __a, __vector __bool long long __b) {
return __cc != 0;
}
-#if __ARCH__ >= 12
+#if __ARCH__ >= 15
static inline __ATTRS_o_ai int
-vec_any_ne(__vector float __a, __vector float __b) {
+vec_any_ne(__vector signed __int128 __a, __vector signed __int128 __b) {
int __cc;
- __builtin_s390_vfcesbs(__a, __b, &__cc);
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
return __cc != 0;
}
-#endif
static inline __ATTRS_o_ai int
-vec_any_ne(__vector double __a, __vector double __b) {
+vec_any_ne(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
int __cc;
- __builtin_s390_vfcedbs(__a, __b, &__cc);
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
return __cc != 0;
}
-/*-- vec_any_ge -------------------------------------------------------------*/
-
static inline __ATTRS_o_ai int
-vec_any_ge(__vector signed char __a, __vector signed char __b) {
+vec_any_ne(__vector __bool __int128 __a, __vector __bool __int128 __b) {
int __cc;
- __builtin_s390_vchbs(__b, __a, &__cc);
+ __builtin_s390_vceqqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc != 0;
+}
+#endif
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_ne(__vector float __a, __vector float __b) {
+ int __cc;
+ __builtin_s390_vfcesbs(__a, __b, &__cc);
+ return __cc != 0;
+}
+#endif
+
+static inline __ATTRS_o_ai int
+vec_any_ne(__vector double __a, __vector double __b) {
+ int __cc;
+ __builtin_s390_vfcedbs(__a, __b, &__cc);
+ return __cc != 0;
+}
+
+/*-- vec_any_ge -------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai int
+vec_any_ge(__vector signed char __a, __vector signed char __b) {
+ int __cc;
+ __builtin_s390_vchbs(__b, __a, &__cc);
return __cc != 0;
}
@@ -4951,6 +5408,22 @@ vec_any_ge(__vector __bool long long __a, __vector __bool long long __b) {
return __cc != 0;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_any_ge(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vchqs((signed __int128)__b, (signed __int128)__a, &__cc);
+ return __cc != 0;
+}
+
+static inline __ATTRS_o_ai int
+vec_any_ge(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vchlqs((unsigned __int128)__b, (unsigned __int128)__a, &__cc);
+ return __cc != 0;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_any_ge(__vector float __a, __vector float __b) {
@@ -5189,6 +5662,22 @@ vec_any_gt(__vector __bool long long __a, __vector __bool long long __b) {
return __cc <= 1;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_any_gt(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vchqs((signed __int128)__a, (signed __int128)__b, &__cc);
+ return __cc <= 1;
+}
+
+static inline __ATTRS_o_ai int
+vec_any_gt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vchlqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc <= 1;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_any_gt(__vector float __a, __vector float __b) {
@@ -5427,6 +5916,22 @@ vec_any_le(__vector __bool long long __a, __vector __bool long long __b) {
return __cc != 0;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_any_le(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vchqs((signed __int128)__a, (signed __int128)__b, &__cc);
+ return __cc != 0;
+}
+
+static inline __ATTRS_o_ai int
+vec_any_le(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vchlqs((unsigned __int128)__a, (unsigned __int128)__b, &__cc);
+ return __cc != 0;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_any_le(__vector float __a, __vector float __b) {
@@ -5665,6 +6170,22 @@ vec_any_lt(__vector __bool long long __a, __vector __bool long long __b) {
return __cc <= 1;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai int
+vec_any_lt(__vector signed __int128 __a, __vector signed __int128 __b) {
+ int __cc;
+ __builtin_s390_vchqs((signed __int128)__b, (signed __int128)__a, &__cc);
+ return __cc <= 1;
+}
+
+static inline __ATTRS_o_ai int
+vec_any_lt(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ int __cc;
+ __builtin_s390_vchlqs((unsigned __int128)__b, (unsigned __int128)__a, &__cc);
+ return __cc <= 1;
+}
+#endif
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_any_lt(__vector float __a, __vector float __b) {
@@ -5753,40 +6274,419 @@ vec_any_nlt(__vector double __a, __vector double __b) {
return __cc != 0;
}
-/*-- vec_any_nan ------------------------------------------------------------*/
+/*-- vec_any_nan ------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_nan(__vector float __a) {
+ int __cc;
+ __builtin_s390_vftcisb(__a, 15, &__cc);
+ return __cc != 3;
+}
+#endif
+
+static inline __ATTRS_o_ai int
+vec_any_nan(__vector double __a) {
+ int __cc;
+ __builtin_s390_vftcidb(__a, 15, &__cc);
+ return __cc != 3;
+}
+
+/*-- vec_any_numeric --------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai int
+vec_any_numeric(__vector float __a) {
+ int __cc;
+ __builtin_s390_vftcisb(__a, 15, &__cc);
+ return __cc != 0;
+}
+#endif
+
+static inline __ATTRS_o_ai int
+vec_any_numeric(__vector double __a) {
+ int __cc;
+ __builtin_s390_vftcidb(__a, 15, &__cc);
+ return __cc != 0;
+}
+
+/*-- vec_blend --------------------------------------------------------------*/
+
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed char
+vec_blend(__vector signed char __a, __vector signed char __b,
+ __vector signed char __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed char)0));
+}
+
+static inline __ATTRS_o_ai __vector __bool char
+vec_blend(__vector __bool char __a, __vector __bool char __b,
+ __vector signed char __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed char)0));
+}
+
+static inline __ATTRS_o_ai __vector unsigned char
+vec_blend(__vector unsigned char __a, __vector unsigned char __b,
+ __vector signed char __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed char)0));
+}
+
+static inline __ATTRS_o_ai __vector signed short
+vec_blend(__vector signed short __a, __vector signed short __b,
+ __vector signed short __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed short)0));
+}
+
+static inline __ATTRS_o_ai __vector __bool short
+vec_blend(__vector __bool short __a, __vector __bool short __b,
+ __vector signed short __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed short)0));
+}
+
+static inline __ATTRS_o_ai __vector unsigned short
+vec_blend(__vector unsigned short __a, __vector unsigned short __b,
+ __vector signed short __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed short)0));
+}
+
+static inline __ATTRS_o_ai __vector signed int
+vec_blend(__vector signed int __a, __vector signed int __b,
+ __vector signed int __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed int)0));
+}
+
+static inline __ATTRS_o_ai __vector __bool int
+vec_blend(__vector __bool int __a, __vector __bool int __b,
+ __vector signed int __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed int)0));
+}
+
+static inline __ATTRS_o_ai __vector unsigned int
+vec_blend(__vector unsigned int __a, __vector unsigned int __b,
+ __vector signed int __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed int)0));
+}
+
+static inline __ATTRS_o_ai __vector signed long long
+vec_blend(__vector signed long long __a, __vector signed long long __b,
+ __vector signed long long __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed long long)0));
+}
+
+static inline __ATTRS_o_ai __vector __bool long long
+vec_blend(__vector __bool long long __a, __vector __bool long long __b,
+ __vector signed long long __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed long long)0));
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_blend(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector signed long long __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed long long)0));
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_blend(__vector signed __int128 __a, __vector signed __int128 __b,
+ __vector signed __int128 __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed __int128)0));
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_blend(__vector __bool __int128 __a, __vector __bool __int128 __b,
+ __vector signed __int128 __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed __int128)0));
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_blend(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector signed __int128 __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed __int128)0));
+}
+
+static inline __ATTRS_o_ai __vector float
+vec_blend(__vector float __a, __vector float __b,
+ __vector signed int __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed int)0));
+}
+
+static inline __ATTRS_o_ai __vector double
+vec_blend(__vector double __a, __vector double __b,
+ __vector signed long long __c) {
+ return vec_sel(__a, __b, vec_cmplt(__c, (__vector signed long long)0));
+}
+#endif
+
+/*-- vec_and ---------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai __vector __bool char
+vec_and(__vector __bool char __a, __vector __bool char __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector signed char
+vec_and(__vector signed char __a, __vector signed char __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned char
+vec_and(__vector unsigned char __a, __vector unsigned char __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool short
+vec_and(__vector __bool short __a, __vector __bool short __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector signed short
+vec_and(__vector signed short __a, __vector signed short __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned short
+vec_and(__vector unsigned short __a, __vector unsigned short __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool int
+vec_and(__vector __bool int __a, __vector __bool int __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector signed int
+vec_and(__vector signed int __a, __vector signed int __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned int
+vec_and(__vector unsigned int __a, __vector unsigned int __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool long long
+vec_and(__vector __bool long long __a, __vector __bool long long __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector signed long long
+vec_and(__vector signed long long __a, __vector signed long long __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_and(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_and(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_and(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return __a & __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_and(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return __a & __b;
+}
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai __vector float
+vec_and(__vector float __a, __vector float __b) {
+ return (__vector float)((__vector unsigned int)__a &
+ (__vector unsigned int)__b);
+}
+#endif
+
+static inline __ATTRS_o_ai __vector double
+vec_and(__vector double __a, __vector double __b) {
+ return (__vector double)((__vector unsigned long long)__a &
+ (__vector unsigned long long)__b);
+}
+
+/*-- vec_or ----------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai __vector __bool char
+vec_or(__vector __bool char __a, __vector __bool char __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector signed char
+vec_or(__vector signed char __a, __vector signed char __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned char
+vec_or(__vector unsigned char __a, __vector unsigned char __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool short
+vec_or(__vector __bool short __a, __vector __bool short __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector signed short
+vec_or(__vector signed short __a, __vector signed short __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned short
+vec_or(__vector unsigned short __a, __vector unsigned short __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool int
+vec_or(__vector __bool int __a, __vector __bool int __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector signed int
+vec_or(__vector signed int __a, __vector signed int __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned int
+vec_or(__vector unsigned int __a, __vector unsigned int __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool long long
+vec_or(__vector __bool long long __a, __vector __bool long long __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector signed long long
+vec_or(__vector signed long long __a, __vector signed long long __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_or(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_or(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_or(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return __a | __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_or(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return __a | __b;
+}
+
+#if __ARCH__ >= 12
+static inline __ATTRS_o_ai __vector float
+vec_or(__vector float __a, __vector float __b) {
+ return (__vector float)((__vector unsigned int)__a |
+ (__vector unsigned int)__b);
+}
+#endif
+
+static inline __ATTRS_o_ai __vector double
+vec_or(__vector double __a, __vector double __b) {
+ return (__vector double)((__vector unsigned long long)__a |
+ (__vector unsigned long long)__b);
+}
+
+/*-- vec_xor ----------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai __vector __bool char
+vec_xor(__vector __bool char __a, __vector __bool char __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector signed char
+vec_xor(__vector signed char __a, __vector signed char __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned char
+vec_xor(__vector unsigned char __a, __vector unsigned char __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool short
+vec_xor(__vector __bool short __a, __vector __bool short __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector signed short
+vec_xor(__vector signed short __a, __vector signed short __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned short
+vec_xor(__vector unsigned short __a, __vector unsigned short __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool int
+vec_xor(__vector __bool int __a, __vector __bool int __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector signed int
+vec_xor(__vector signed int __a, __vector signed int __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned int
+vec_xor(__vector unsigned int __a, __vector unsigned int __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector __bool long long
+vec_xor(__vector __bool long long __a, __vector __bool long long __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector signed long long
+vec_xor(__vector signed long long __a, __vector signed long long __b) {
+ return __a ^ __b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_xor(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return __a ^ __b;
+}
-#if __ARCH__ >= 12
-static inline __ATTRS_o_ai int
-vec_any_nan(__vector float __a) {
- int __cc;
- __builtin_s390_vftcisb(__a, 15, &__cc);
- return __cc != 3;
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_xor(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return __a ^ __b;
}
-#endif
-static inline __ATTRS_o_ai int
-vec_any_nan(__vector double __a) {
- int __cc;
- __builtin_s390_vftcidb(__a, 15, &__cc);
- return __cc != 3;
+static inline __ATTRS_o_ai __vector signed __int128
+vec_xor(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return __a ^ __b;
}
-/*-- vec_any_numeric --------------------------------------------------------*/
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_xor(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return __a ^ __b;
+}
#if __ARCH__ >= 12
-static inline __ATTRS_o_ai int
-vec_any_numeric(__vector float __a) {
- int __cc;
- __builtin_s390_vftcisb(__a, 15, &__cc);
- return __cc != 0;
+static inline __ATTRS_o_ai __vector float
+vec_xor(__vector float __a, __vector float __b) {
+ return (__vector float)((__vector unsigned int)__a ^
+ (__vector unsigned int)__b);
}
#endif
-static inline __ATTRS_o_ai int
-vec_any_numeric(__vector double __a) {
- int __cc;
- __builtin_s390_vftcidb(__a, 15, &__cc);
- return __cc != 0;
+static inline __ATTRS_o_ai __vector double
+vec_xor(__vector double __a, __vector double __b) {
+ return (__vector double)((__vector unsigned long long)__a ^
+ (__vector unsigned long long)__b);
}
/*-- vec_andc ---------------------------------------------------------------*/
@@ -5947,6 +6847,21 @@ vec_andc(__vector unsigned long long __a, __vector __bool long long __b) {
return __a & ~__b;
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_andc(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return __a & ~__b;
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_andc(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return __a & ~__b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_andc(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return __a & ~__b;
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_andc(__vector float __a, __vector float __b) {
@@ -6133,6 +7048,21 @@ vec_nor(__vector unsigned long long __a, __vector __bool long long __b) {
return ~(__a | __b);
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_nor(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return ~(__a | __b);
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_nor(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return ~(__a | __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_nor(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return ~(__a | __b);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_nor(__vector float __a, __vector float __b) {
@@ -6224,6 +7154,21 @@ vec_orc(__vector unsigned long long __a, __vector unsigned long long __b) {
return __a | ~__b;
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_orc(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_orc(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return __a | ~__b;
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_orc(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return __a | ~__b;
+}
+
static inline __ATTRS_o_ai __vector float
vec_orc(__vector float __a, __vector float __b) {
return (__vector float)((__vector unsigned int)__a |
@@ -6300,6 +7245,21 @@ vec_nand(__vector unsigned long long __a, __vector unsigned long long __b) {
return ~(__a & __b);
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_nand(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_nand(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return ~(__a & __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_nand(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return ~(__a & __b);
+}
+
static inline __ATTRS_o_ai __vector float
vec_nand(__vector float __a, __vector float __b) {
return (__vector float)~((__vector unsigned int)__a &
@@ -6376,6 +7336,21 @@ vec_eqv(__vector unsigned long long __a, __vector unsigned long long __b) {
return ~(__a ^ __b);
}
+static inline __ATTRS_o_ai __vector __bool __int128
+vec_eqv(__vector __bool __int128 __a, __vector __bool __int128 __b) {
+ return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_eqv(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return ~(__a ^ __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_eqv(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return ~(__a ^ __b);
+}
+
static inline __ATTRS_o_ai __vector float
vec_eqv(__vector float __a, __vector float __b) {
return (__vector float)~((__vector unsigned int)__a ^
@@ -6389,6 +7364,91 @@ vec_eqv(__vector double __a, __vector double __b) {
}
#endif
+/*-- vec_evaluate -----------------------------------------------------------*/
+
+#if __ARCH__ >= 15
+extern __ATTRS_o __vector signed char
+vec_evaluate(__vector signed char __a, __vector signed char __b,
+ __vector signed char __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector unsigned char
+vec_evaluate(__vector unsigned char __a, __vector unsigned char __b,
+ __vector unsigned char __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector __bool char
+vec_evaluate(__vector __bool char __a, __vector __bool char __b,
+ __vector __bool char __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector signed short
+vec_evaluate(__vector signed short __a, __vector signed short __b,
+ __vector signed short __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector unsigned short
+vec_evaluate(__vector unsigned short __a, __vector unsigned short __b,
+ __vector unsigned short __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector __bool short
+vec_evaluate(__vector __bool short __a, __vector __bool short __b,
+ __vector __bool short __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector signed int
+vec_evaluate(__vector signed int __a, __vector signed int __b,
+ __vector signed int __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector unsigned int
+vec_evaluate(__vector unsigned int __a, __vector unsigned int __b,
+ __vector unsigned int __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector __bool int
+vec_evaluate(__vector __bool int __a, __vector __bool int __b,
+ __vector __bool int __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector signed long long
+vec_evaluate(__vector signed long long __a, __vector signed long long __b,
+ __vector signed long long __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector unsigned long long
+vec_evaluate(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector unsigned long long __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector __bool long long
+vec_evaluate(__vector __bool long long __a, __vector __bool long long __b,
+ __vector __bool long long __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector signed __int128
+vec_evaluate(__vector signed __int128 __a, __vector signed __int128 __b,
+ __vector signed __int128 __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector unsigned __int128
+vec_evaluate(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c, unsigned char __d)
+ __constant(__d);
+
+extern __ATTRS_o __vector __bool __int128
+vec_evaluate(__vector __bool __int128 __a, __vector __bool __int128 __b,
+ __vector __bool __int128 __c, unsigned char __d)
+ __constant(__d);
+
+#define vec_evaluate(A, B, C, D) \
+ ((__typeof__((vec_evaluate)((A), (B), (C), (D)))) \
+ __builtin_s390_veval((__vector unsigned char)(A), \
+ (__vector unsigned char)(B), \
+ (__vector unsigned char)(C), (D)))
+#endif
+
/*-- vec_cntlz --------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector unsigned char
@@ -6431,6 +7491,20 @@ vec_cntlz(__vector unsigned long long __a) {
return __builtin_s390_vclzg(__a);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_cntlz(__vector signed __int128 __a) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vclzq((unsigned __int128)__a);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_cntlz(__vector unsigned __int128 __a) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vclzq((unsigned __int128)__a);
+}
+#endif
+
/*-- vec_cnttz --------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector unsigned char
@@ -6473,6 +7547,20 @@ vec_cnttz(__vector unsigned long long __a) {
return __builtin_s390_vctzg(__a);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_cnttz(__vector signed __int128 __a) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vctzq((unsigned __int128)__a);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_cnttz(__vector unsigned __int128 __a) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vctzq((unsigned __int128)__a);
+}
+#endif
+
/*-- vec_popcnt -------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector unsigned char
@@ -6904,8 +7992,21 @@ vec_sll(__vector unsigned long long __a, __vector unsigned int __b) {
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_sll(__vector signed __int128 __a, __vector unsigned char __b) {
+ return (__vector signed __int128)__builtin_s390_vsl(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_sll(__vector unsigned __int128 __a, __vector unsigned char __b) {
+ return (__vector unsigned __int128)__builtin_s390_vsl(
+ (__vector unsigned char)__a, __b);
+}
+
/*-- vec_slb ----------------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed char
vec_slb(__vector signed char __a, __vector signed char __b) {
return (__vector signed char)__builtin_s390_vslb(
@@ -6918,6 +8019,7 @@ vec_slb(__vector signed char __a, __vector unsigned char __b) {
(__vector unsigned char)__a, __b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned char
vec_slb(__vector unsigned char __a, __vector signed char __b) {
return __builtin_s390_vslb(__a, (__vector unsigned char)__b);
@@ -6928,110 +8030,187 @@ vec_slb(__vector unsigned char __a, __vector unsigned char __b) {
return __builtin_s390_vslb(__a, __b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed short
vec_slb(__vector signed short __a, __vector signed short __b) {
return (__vector signed short)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed short
vec_slb(__vector signed short __a, __vector unsigned short __b) {
return (__vector signed short)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed short
+vec_slb(__vector signed short __a, __vector unsigned char __b) {
+ return (__vector signed short)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned short
vec_slb(__vector unsigned short __a, __vector signed short __b) {
return (__vector unsigned short)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned short
vec_slb(__vector unsigned short __a, __vector unsigned short __b) {
return (__vector unsigned short)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector unsigned short
+vec_slb(__vector unsigned short __a, __vector unsigned char __b) {
+ return (__vector unsigned short)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed int
vec_slb(__vector signed int __a, __vector signed int __b) {
return (__vector signed int)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed int
vec_slb(__vector signed int __a, __vector unsigned int __b) {
return (__vector signed int)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed int
+vec_slb(__vector signed int __a, __vector unsigned char __b) {
+ return (__vector signed int)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned int
vec_slb(__vector unsigned int __a, __vector signed int __b) {
return (__vector unsigned int)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned int
vec_slb(__vector unsigned int __a, __vector unsigned int __b) {
return (__vector unsigned int)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector unsigned int
+vec_slb(__vector unsigned int __a, __vector unsigned char __b) {
+ return (__vector unsigned int)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed long long
vec_slb(__vector signed long long __a, __vector signed long long __b) {
return (__vector signed long long)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed long long
vec_slb(__vector signed long long __a, __vector unsigned long long __b) {
return (__vector signed long long)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed long long
+vec_slb(__vector signed long long __a, __vector unsigned char __b) {
+ return (__vector signed long long)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned long long
vec_slb(__vector unsigned long long __a, __vector signed long long __b) {
return (__vector unsigned long long)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned long long
vec_slb(__vector unsigned long long __a, __vector unsigned long long __b) {
return (__vector unsigned long long)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_slb(__vector unsigned long long __a, __vector unsigned char __b) {
+ return (__vector unsigned long long)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_slb(__vector signed __int128 __a, __vector unsigned char __b) {
+ return (__vector signed __int128)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_slb(__vector unsigned __int128 __a, __vector unsigned char __b) {
+ return (__vector unsigned __int128)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
#if __ARCH__ >= 12
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector float
vec_slb(__vector float __a, __vector signed int __b) {
return (__vector float)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector float
vec_slb(__vector float __a, __vector unsigned int __b) {
return (__vector float)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+
+static inline __ATTRS_o_ai __vector float
+vec_slb(__vector float __a, __vector unsigned char __b) {
+ return (__vector float)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
#endif
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector double
vec_slb(__vector double __a, __vector signed long long __b) {
return (__vector double)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector double
vec_slb(__vector double __a, __vector unsigned long long __b) {
return (__vector double)__builtin_s390_vslb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector double
+vec_slb(__vector double __a, __vector unsigned char __b) {
+ return (__vector double)__builtin_s390_vslb(
+ (__vector unsigned char)__a, __b);
+}
+
/*-- vec_sld ----------------------------------------------------------------*/
extern __ATTRS_o __vector signed char
vec_sld(__vector signed char __a, __vector signed char __b, int __c)
__constant_range(__c, 0, 15);
+// This prototype is deprecated.
extern __ATTRS_o __vector __bool char
vec_sld(__vector __bool char __a, __vector __bool char __b, int __c)
__constant_range(__c, 0, 15);
@@ -7044,6 +8223,7 @@ extern __ATTRS_o __vector signed short
vec_sld(__vector signed short __a, __vector signed short __b, int __c)
__constant_range(__c, 0, 15);
+// This prototype is deprecated.
extern __ATTRS_o __vector __bool short
vec_sld(__vector __bool short __a, __vector __bool short __b, int __c)
__constant_range(__c, 0, 15);
@@ -7056,6 +8236,7 @@ extern __ATTRS_o __vector signed int
vec_sld(__vector signed int __a, __vector signed int __b, int __c)
__constant_range(__c, 0, 15);
+// This prototype is deprecated.
extern __ATTRS_o __vector __bool int
vec_sld(__vector __bool int __a, __vector __bool int __b, int __c)
__constant_range(__c, 0, 15);
@@ -7068,6 +8249,7 @@ extern __ATTRS_o __vector signed long long
vec_sld(__vector signed long long __a, __vector signed long long __b, int __c)
__constant_range(__c, 0, 15);
+// This prototype is deprecated.
extern __ATTRS_o __vector __bool long long
vec_sld(__vector __bool long long __a, __vector __bool long long __b, int __c)
__constant_range(__c, 0, 15);
@@ -7077,6 +8259,15 @@ vec_sld(__vector unsigned long long __a, __vector unsigned long long __b,
int __c)
__constant_range(__c, 0, 15);
+extern __ATTRS_o __vector signed __int128
+vec_sld(__vector signed __int128 __a, __vector signed __int128 __b, int __c)
+ __constant_range(__c, 0, 15);
+
+extern __ATTRS_o __vector unsigned __int128
+vec_sld(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ int __c)
+ __constant_range(__c, 0, 15);
+
#if __ARCH__ >= 12
extern __ATTRS_o __vector float
vec_sld(__vector float __a, __vector float __b, int __c)
@@ -7126,6 +8317,15 @@ vec_sldw(__vector unsigned long long __a, __vector unsigned long long __b,
int __c)
__constant_range(__c, 0, 3);
+extern __ATTRS_o __vector signed __int128
+vec_sldw(__vector signed __int128 __a, __vector signed __int128 __b, int __c)
+ __constant_range(__c, 0, 3);
+
+extern __ATTRS_o __vector unsigned __int128
+vec_sldw(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ int __c)
+ __constant_range(__c, 0, 3);
+
// This prototype is deprecated.
extern __ATTRS_o __vector double
vec_sldw(__vector double __a, __vector double __b, int __c)
@@ -7172,6 +8372,15 @@ vec_sldb(__vector unsigned long long __a, __vector unsigned long long __b,
int __c)
__constant_range(__c, 0, 7);
+extern __ATTRS_o __vector signed __int128
+vec_sldb(__vector signed __int128 __a, __vector signed __int128 __b, int __c)
+ __constant_range(__c, 0, 7);
+
+extern __ATTRS_o __vector unsigned __int128
+vec_sldb(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ int __c)
+ __constant_range(__c, 0, 7);
+
extern __ATTRS_o __vector float
vec_sldb(__vector float __a, __vector float __b, int __c)
__constant_range(__c, 0, 7);
@@ -7429,8 +8638,21 @@ vec_sral(__vector unsigned long long __a, __vector unsigned int __b) {
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_sral(__vector signed __int128 __a, __vector unsigned char __b) {
+ return (__vector signed __int128)__builtin_s390_vsra(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_sral(__vector unsigned __int128 __a, __vector unsigned char __b) {
+ return (__vector unsigned __int128)__builtin_s390_vsra(
+ (__vector unsigned char)__a, __b);
+}
+
/*-- vec_srab ---------------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed char
vec_srab(__vector signed char __a, __vector signed char __b) {
return (__vector signed char)__builtin_s390_vsrab(
@@ -7443,6 +8665,7 @@ vec_srab(__vector signed char __a, __vector unsigned char __b) {
(__vector unsigned char)__a, __b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned char
vec_srab(__vector unsigned char __a, __vector signed char __b) {
return __builtin_s390_vsrab(__a, (__vector unsigned char)__b);
@@ -7453,104 +8676,180 @@ vec_srab(__vector unsigned char __a, __vector unsigned char __b) {
return __builtin_s390_vsrab(__a, __b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed short
vec_srab(__vector signed short __a, __vector signed short __b) {
return (__vector signed short)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed short
vec_srab(__vector signed short __a, __vector unsigned short __b) {
return (__vector signed short)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed short
+vec_srab(__vector signed short __a, __vector unsigned char __b) {
+ return (__vector signed short)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned short
vec_srab(__vector unsigned short __a, __vector signed short __b) {
return (__vector unsigned short)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned short
vec_srab(__vector unsigned short __a, __vector unsigned short __b) {
return (__vector unsigned short)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector unsigned short
+vec_srab(__vector unsigned short __a, __vector unsigned char __b) {
+ return (__vector unsigned short)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed int
vec_srab(__vector signed int __a, __vector signed int __b) {
return (__vector signed int)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed int
vec_srab(__vector signed int __a, __vector unsigned int __b) {
return (__vector signed int)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed int
+vec_srab(__vector signed int __a, __vector unsigned char __b) {
+ return (__vector signed int)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned int
vec_srab(__vector unsigned int __a, __vector signed int __b) {
return (__vector unsigned int)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned int
vec_srab(__vector unsigned int __a, __vector unsigned int __b) {
return (__vector unsigned int)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector unsigned int
+vec_srab(__vector unsigned int __a, __vector unsigned char __b) {
+ return (__vector unsigned int)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed long long
vec_srab(__vector signed long long __a, __vector signed long long __b) {
return (__vector signed long long)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed long long
vec_srab(__vector signed long long __a, __vector unsigned long long __b) {
return (__vector signed long long)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed long long
+vec_srab(__vector signed long long __a, __vector unsigned char __b) {
+ return (__vector signed long long)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned long long
vec_srab(__vector unsigned long long __a, __vector signed long long __b) {
return (__vector unsigned long long)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
-static inline __ATTRS_o_ai __vector unsigned long long
-vec_srab(__vector unsigned long long __a, __vector unsigned long long __b) {
- return (__vector unsigned long long)__builtin_s390_vsrab(
- (__vector unsigned char)__a, (__vector unsigned char)__b);
+// This prototype is deprecated.
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_srab(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return (__vector unsigned long long)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, (__vector unsigned char)__b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_srab(__vector unsigned long long __a, __vector unsigned char __b) {
+ return (__vector unsigned long long)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_srab(__vector signed __int128 __a, __vector unsigned char __b) {
+ return (__vector signed __int128)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_srab(__vector unsigned __int128 __a, __vector unsigned char __b) {
+ return (__vector unsigned __int128)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
}
#if __ARCH__ >= 12
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector float
vec_srab(__vector float __a, __vector signed int __b) {
return (__vector float)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector float
vec_srab(__vector float __a, __vector unsigned int __b) {
return (__vector float)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+
+static inline __ATTRS_o_ai __vector float
+vec_srab(__vector float __a, __vector unsigned char __b) {
+ return (__vector float)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
#endif
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector double
vec_srab(__vector double __a, __vector signed long long __b) {
return (__vector double)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector double
vec_srab(__vector double __a, __vector unsigned long long __b) {
return (__vector double)__builtin_s390_vsrab(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector double
+vec_srab(__vector double __a, __vector unsigned char __b) {
+ return (__vector double)__builtin_s390_vsrab(
+ (__vector unsigned char)__a, __b);
+}
+
/*-- vec_srl ----------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed char
@@ -7794,8 +9093,21 @@ vec_srl(__vector unsigned long long __a, __vector unsigned int __b) {
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_srl(__vector signed __int128 __a, __vector unsigned char __b) {
+ return (__vector signed __int128)__builtin_s390_vsrl(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_srl(__vector unsigned __int128 __a, __vector unsigned char __b) {
+ return (__vector unsigned __int128)__builtin_s390_vsrl(
+ (__vector unsigned char)__a, __b);
+}
+
/*-- vec_srb ----------------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed char
vec_srb(__vector signed char __a, __vector signed char __b) {
return (__vector signed char)__builtin_s390_vsrlb(
@@ -7808,6 +9120,7 @@ vec_srb(__vector signed char __a, __vector unsigned char __b) {
(__vector unsigned char)__a, __b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned char
vec_srb(__vector unsigned char __a, __vector signed char __b) {
return __builtin_s390_vsrlb(__a, (__vector unsigned char)__b);
@@ -7818,104 +9131,180 @@ vec_srb(__vector unsigned char __a, __vector unsigned char __b) {
return __builtin_s390_vsrlb(__a, __b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed short
vec_srb(__vector signed short __a, __vector signed short __b) {
return (__vector signed short)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed short
vec_srb(__vector signed short __a, __vector unsigned short __b) {
return (__vector signed short)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed short
+vec_srb(__vector signed short __a, __vector unsigned char __b) {
+ return (__vector signed short)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned short
vec_srb(__vector unsigned short __a, __vector signed short __b) {
return (__vector unsigned short)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned short
vec_srb(__vector unsigned short __a, __vector unsigned short __b) {
return (__vector unsigned short)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector unsigned short
+vec_srb(__vector unsigned short __a, __vector unsigned char __b) {
+ return (__vector unsigned short)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed int
vec_srb(__vector signed int __a, __vector signed int __b) {
return (__vector signed int)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed int
vec_srb(__vector signed int __a, __vector unsigned int __b) {
return (__vector signed int)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed int
+vec_srb(__vector signed int __a, __vector unsigned char __b) {
+ return (__vector signed int)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned int
vec_srb(__vector unsigned int __a, __vector signed int __b) {
return (__vector unsigned int)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned int
vec_srb(__vector unsigned int __a, __vector unsigned int __b) {
return (__vector unsigned int)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector unsigned int
+vec_srb(__vector unsigned int __a, __vector unsigned char __b) {
+ return (__vector unsigned int)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed long long
vec_srb(__vector signed long long __a, __vector signed long long __b) {
return (__vector signed long long)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector signed long long
vec_srb(__vector signed long long __a, __vector unsigned long long __b) {
return (__vector signed long long)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector signed long long
+vec_srb(__vector signed long long __a, __vector unsigned char __b) {
+ return (__vector signed long long)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned long long
vec_srb(__vector unsigned long long __a, __vector signed long long __b) {
return (__vector unsigned long long)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned long long
vec_srb(__vector unsigned long long __a, __vector unsigned long long __b) {
return (__vector unsigned long long)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_srb(__vector unsigned long long __a, __vector unsigned char __b) {
+ return (__vector unsigned long long)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_srb(__vector signed __int128 __a, __vector unsigned char __b) {
+ return (__vector signed __int128)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_srb(__vector unsigned __int128 __a, __vector unsigned char __b) {
+ return (__vector unsigned __int128)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
#if __ARCH__ >= 12
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector float
vec_srb(__vector float __a, __vector signed int __b) {
return (__vector float)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector float
vec_srb(__vector float __a, __vector unsigned int __b) {
return (__vector float)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+
+static inline __ATTRS_o_ai __vector float
+vec_srb(__vector float __a, __vector unsigned char __b) {
+ return (__vector float)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
#endif
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector double
vec_srb(__vector double __a, __vector signed long long __b) {
return (__vector double)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector double
vec_srb(__vector double __a, __vector unsigned long long __b) {
return (__vector double)__builtin_s390_vsrlb(
(__vector unsigned char)__a, (__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai __vector double
+vec_srb(__vector double __a, __vector unsigned char __b) {
+ return (__vector double)__builtin_s390_vsrlb(
+ (__vector unsigned char)__a, __b);
+}
+
/*-- vec_srdb ---------------------------------------------------------------*/
#if __ARCH__ >= 13
@@ -7953,6 +9342,15 @@ vec_srdb(__vector unsigned long long __a, __vector unsigned long long __b,
int __c)
__constant_range(__c, 0, 7);
+extern __ATTRS_o __vector signed __int128
+vec_srdb(__vector signed __int128 __a, __vector signed __int128 __b, int __c)
+ __constant_range(__c, 0, 7);
+
+extern __ATTRS_o __vector unsigned __int128
+vec_srdb(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ int __c)
+ __constant_range(__c, 0, 7);
+
extern __ATTRS_o __vector float
vec_srdb(__vector float __a, __vector float __b, int __c)
__constant_range(__c, 0, 7);
@@ -7989,6 +9387,11 @@ vec_abs(__vector signed long long __a) {
return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed long long)0));
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_abs(__vector signed __int128 __a) {
+ return vec_sel(__a, -__a, vec_cmplt(__a, (__vector signed __int128)0));
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_abs(__vector float __a) {
@@ -8169,6 +9572,16 @@ vec_max(__vector __bool long long __a, __vector unsigned long long __b) {
return vec_sel(__b, __ac, vec_cmpgt(__ac, __b));
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_max(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return vec_sel(__b, __a, vec_cmpgt(__a, __b));
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_max(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return vec_sel(__b, __a, vec_cmpgt(__a, __b));
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_max(__vector float __a, __vector float __b) {
@@ -8339,6 +9752,16 @@ vec_min(__vector __bool long long __a, __vector unsigned long long __b) {
return vec_sel(__ac, __b, vec_cmpgt(__ac, __b));
}
+static inline __ATTRS_o_ai __vector signed __int128
+vec_min(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return vec_sel(__a, __b, vec_cmpgt(__a, __b));
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_min(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return vec_sel(__a, __b, vec_cmpgt(__a, __b));
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai __vector float
vec_min(__vector float __a, __vector float __b) {
@@ -8357,10 +9780,10 @@ vec_min(__vector double __a, __vector double __b) {
/*-- vec_add_u128 -----------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned char
vec_add_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
((__int128)__a + (__int128)__b);
}
@@ -8386,33 +9809,59 @@ vec_addc(__vector unsigned long long __a, __vector unsigned long long __b) {
return __builtin_s390_vaccg(__a, __b);
}
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_addc(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b);
+}
+
/*-- vec_addc_u128 ----------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned char
vec_addc_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vaccq((unsigned __int128)__a, (unsigned __int128)__b);
}
+/*-- vec_adde ---------------------------------------------------------------*/
+
+static inline __ATTRS_ai __vector unsigned __int128
+vec_adde(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b,
+ (unsigned __int128)__c);
+}
+
/*-- vec_adde_u128 ----------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned char
vec_adde_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vacq((unsigned __int128)__a, (unsigned __int128)__b,
(unsigned __int128)__c);
}
+/*-- vec_addec --------------------------------------------------------------*/
+
+static inline __ATTRS_ai __vector unsigned __int128
+vec_addec(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b,
+ (unsigned __int128)__c);
+}
+
/*-- vec_addec_u128 ---------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned char
vec_addec_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vacccq((unsigned __int128)__a, (unsigned __int128)__b,
(unsigned __int128)__c);
}
@@ -8439,6 +9888,14 @@ vec_avg(__vector signed long long __a, __vector signed long long __b) {
return __builtin_s390_vavgg(__a, __b);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed __int128
+vec_avg(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return (__vector signed __int128)
+ __builtin_s390_vavgq((signed __int128)__a, (signed __int128)__b);
+}
+#endif
+
static inline __ATTRS_o_ai __vector unsigned char
vec_avg(__vector unsigned char __a, __vector unsigned char __b) {
return __builtin_s390_vavglb(__a, __b);
@@ -8459,6 +9916,14 @@ vec_avg(__vector unsigned long long __a, __vector unsigned long long __b) {
return __builtin_s390_vavglg(__a, __b);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_avg(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vavglq((unsigned __int128)__a, (unsigned __int128)__b);
+}
+#endif
+
/*-- vec_checksum -----------------------------------------------------------*/
static inline __ATTRS_ai __vector unsigned int
@@ -8483,13 +9948,18 @@ vec_gfmsum(__vector unsigned int __a, __vector unsigned int __b) {
return __builtin_s390_vgfmf(__a, __b);
}
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_gfmsum(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return (__vector unsigned __int128)__builtin_s390_vgfmg(__a, __b);
+}
+
/*-- vec_gfmsum_128 ---------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned char
vec_gfmsum_128(__vector unsigned long long __a,
__vector unsigned long long __b) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vgfmg(__a, __b);
}
@@ -8513,14 +9983,21 @@ vec_gfmsum_accum(__vector unsigned int __a, __vector unsigned int __b,
return __builtin_s390_vgfmaf(__a, __b, __c);
}
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_gfmsum_accum(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector unsigned __int128 __c) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c);
+}
+
/*-- vec_gfmsum_accum_128 ---------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned char
vec_gfmsum_accum_128(__vector unsigned long long __a,
__vector unsigned long long __b,
__vector unsigned char __c) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vgfmag(__a, __b, (unsigned __int128)__c);
}
@@ -8598,6 +10075,56 @@ vec_mladd(__vector unsigned int __a, __vector unsigned int __b,
return __a * __b + __c;
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed long long
+vec_mladd(__vector signed long long __a, __vector signed long long __b,
+ __vector signed long long __c) {
+ return __a * __b + __c;
+}
+
+static inline __ATTRS_o_ai __vector signed long long
+vec_mladd(__vector unsigned long long __a, __vector signed long long __b,
+ __vector signed long long __c) {
+ return (__vector signed long long)__a * __b + __c;
+}
+
+static inline __ATTRS_o_ai __vector signed long long
+vec_mladd(__vector signed long long __a, __vector unsigned long long __b,
+ __vector unsigned long long __c) {
+ return __a * (__vector signed long long)__b + (__vector signed long long)__c;
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_mladd(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector unsigned long long __c) {
+ return __a * __b + __c;
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_mladd(__vector signed __int128 __a, __vector signed __int128 __b,
+ __vector signed __int128 __c) {
+ return __a * __b + __c;
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_mladd(__vector unsigned __int128 __a, __vector signed __int128 __b,
+ __vector signed __int128 __c) {
+ return (__vector signed __int128)__a * __b + __c;
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_mladd(__vector signed __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c) {
+ return __a * (__vector signed __int128)__b + (__vector signed __int128)__c;
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_mladd(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c) {
+ return __a * __b + __c;
+}
+#endif
+
/*-- vec_mhadd --------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed char
@@ -8636,6 +10163,34 @@ vec_mhadd(__vector unsigned int __a, __vector unsigned int __b,
return __builtin_s390_vmalhf(__a, __b, __c);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed long long
+vec_mhadd(__vector signed long long __a, __vector signed long long __b,
+ __vector signed long long __c) {
+ return __builtin_s390_vmahg(__a, __b, __c);
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_mhadd(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector unsigned long long __c) {
+ return __builtin_s390_vmalhg(__a, __b, __c);
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_mhadd(__vector signed __int128 __a, __vector signed __int128 __b,
+ __vector signed __int128 __c) {
+ return (__vector signed __int128)
+ __builtin_s390_vmahq((signed __int128)__a, (signed __int128)__b, (signed __int128)__c);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_mhadd(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vmalhq((unsigned __int128)__a, (unsigned __int128)__b, (unsigned __int128)__c);
+}
+#endif
+
/*-- vec_meadd --------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed short
@@ -8674,6 +10229,22 @@ vec_meadd(__vector unsigned int __a, __vector unsigned int __b,
return __builtin_s390_vmalef(__a, __b, __c);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed __int128
+vec_meadd(__vector signed long long __a, __vector signed long long __b,
+ __vector signed __int128 __c) {
+ return (__vector signed __int128)
+ __builtin_s390_vmaeg(__a, __b, (signed __int128)__c);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_meadd(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector unsigned __int128 __c) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vmaleg(__a, __b, (unsigned __int128)__c);
+}
+#endif
+
/*-- vec_moadd --------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed short
@@ -8712,6 +10283,22 @@ vec_moadd(__vector unsigned int __a, __vector unsigned int __b,
return __builtin_s390_vmalof(__a, __b, __c);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed __int128
+vec_moadd(__vector signed long long __a, __vector signed long long __b,
+ __vector signed __int128 __c) {
+ return (__vector signed __int128)
+ __builtin_s390_vmaog(__a, __b, (signed __int128)__c);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_moadd(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector unsigned __int128 __c) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vmalog(__a, __b, (unsigned __int128)__c);
+}
+#endif
+
/*-- vec_mulh ---------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed char
@@ -8744,6 +10331,30 @@ vec_mulh(__vector unsigned int __a, __vector unsigned int __b) {
return __builtin_s390_vmlhf(__a, __b);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed long long
+vec_mulh(__vector signed long long __a, __vector signed long long __b) {
+ return __builtin_s390_vmhg(__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned long long
+vec_mulh(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return __builtin_s390_vmlhg(__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector signed __int128
+vec_mulh(__vector signed __int128 __a, __vector signed __int128 __b) {
+ return (__vector signed __int128)
+ __builtin_s390_vmhq((signed __int128)__a, (signed __int128)__b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_mulh(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vmlhq((unsigned __int128)__a, (unsigned __int128)__b);
+}
+#endif
+
/*-- vec_mule ---------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed short
@@ -8776,6 +10387,18 @@ vec_mule(__vector unsigned int __a, __vector unsigned int __b) {
return __builtin_s390_vmlef(__a, __b);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed __int128
+vec_mule(__vector signed long long __a, __vector signed long long __b) {
+ return (__vector signed __int128)__builtin_s390_vmeg(__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_mule(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return (__vector unsigned __int128)__builtin_s390_vmleg(__a, __b);
+}
+#endif
+
/*-- vec_mulo ---------------------------------------------------------------*/
static inline __ATTRS_o_ai __vector signed short
@@ -8808,9 +10431,35 @@ vec_mulo(__vector unsigned int __a, __vector unsigned int __b) {
return __builtin_s390_vmlof(__a, __b);
}
+#if __ARCH__ >= 15
+static inline __ATTRS_o_ai __vector signed __int128
+vec_mulo(__vector signed long long __a, __vector signed long long __b) {
+ return (__vector signed __int128)__builtin_s390_vmog(__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_mulo(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return (__vector unsigned __int128)__builtin_s390_vmlog(__a, __b);
+}
+#endif
+
+/*-- vec_msum ---------------------------------------------------------------*/
+
+#if __ARCH__ >= 12
+extern __ATTRS_o __vector unsigned __int128
+vec_msum(__vector unsigned long long __a, __vector unsigned long long __b,
+ __vector unsigned __int128 __c, int __d)
+ __constant_range(__d, 0, 15);
+
+#define vec_msum(X, Y, Z, W) \
+ ((__typeof__((vec_msum)((X), (Y), (Z), (W)))) \
+ __builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W)))
+#endif
+
/*-- vec_msum_u128 ----------------------------------------------------------*/
#if __ARCH__ >= 12
+// This prototype is deprecated.
extern __ATTRS_o __vector unsigned char
vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
__vector unsigned char __c, int __d)
@@ -8818,16 +10467,16 @@ vec_msum_u128(__vector unsigned long long __a, __vector unsigned long long __b,
#define vec_msum_u128(X, Y, Z, W) \
((__typeof__((vec_msum_u128)((X), (Y), (Z), (W)))) \
- (unsigned __int128 __attribute__((__vector_size__(16)))) \
+ (__vector unsigned __int128) \
__builtin_s390_vmslg((X), (Y), (unsigned __int128)(Z), (W)))
#endif
/*-- vec_sub_u128 -----------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned char
vec_sub_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
((__int128)__a - (__int128)__b);
}
@@ -8853,33 +10502,59 @@ vec_subc(__vector unsigned long long __a, __vector unsigned long long __b) {
return __builtin_s390_vscbig(__a, __b);
}
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_subc(__vector unsigned __int128 __a, __vector unsigned __int128 __b) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b);
+}
+
/*-- vec_subc_u128 ----------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned char
vec_subc_u128(__vector unsigned char __a, __vector unsigned char __b) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vscbiq((unsigned __int128)__a, (unsigned __int128)__b);
}
+/*-- vec_sube ---------------------------------------------------------------*/
+
+static inline __ATTRS_ai __vector unsigned __int128
+vec_sube(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b,
+ (unsigned __int128)__c);
+}
+
/*-- vec_sube_u128 ----------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned char
vec_sube_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vsbiq((unsigned __int128)__a, (unsigned __int128)__b,
(unsigned __int128)__c);
}
+/*-- vec_subec --------------------------------------------------------------*/
+
+static inline __ATTRS_ai __vector unsigned __int128
+vec_subec(__vector unsigned __int128 __a, __vector unsigned __int128 __b,
+ __vector unsigned __int128 __c) {
+ return (__vector unsigned __int128)
+ __builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b,
+ (unsigned __int128)__c);
+}
+
/*-- vec_subec_u128 ---------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_ai __vector unsigned char
vec_subec_u128(__vector unsigned char __a, __vector unsigned char __b,
__vector unsigned char __c) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vsbcbiq((unsigned __int128)__a, (unsigned __int128)__b,
(unsigned __int128)__c);
}
@@ -8896,19 +10571,31 @@ vec_sum2(__vector unsigned int __a, __vector unsigned int __b) {
return __builtin_s390_vsumgf(__a, __b);
}
+/*-- vec_sum ----------------------------------------------------------------*/
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_sum(__vector unsigned int __a, __vector unsigned int __b) {
+ return (__vector unsigned __int128)__builtin_s390_vsumqf(__a, __b);
+}
+
+static inline __ATTRS_o_ai __vector unsigned __int128
+vec_sum(__vector unsigned long long __a, __vector unsigned long long __b) {
+ return (__vector unsigned __int128)__builtin_s390_vsumqg(__a, __b);
+}
+
/*-- vec_sum_u128 -----------------------------------------------------------*/
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned char
vec_sum_u128(__vector unsigned int __a, __vector unsigned int __b) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vsumqf(__a, __b);
}
+// This prototype is deprecated.
static inline __ATTRS_o_ai __vector unsigned char
vec_sum_u128(__vector unsigned long long __a, __vector unsigned long long __b) {
- return (__vector unsigned char)
- (unsigned __int128 __attribute__((__vector_size__(16))))
+ return (__vector unsigned char)(__vector unsigned __int128)
__builtin_s390_vsumqg(__a, __b);
}
@@ -8974,6 +10661,19 @@ vec_test_mask(__vector unsigned long long __a,
(__vector unsigned char)__b);
}
+static inline __ATTRS_o_ai int
+vec_test_mask(__vector signed __int128 __a, __vector unsigned __int128 __b) {
+ return __builtin_s390_vtm((__vector unsigned char)__a,
+ (__vector unsigned char)__b);
+}
+
+static inline __ATTRS_o_ai int
+vec_test_mask(__vector unsigned __int128 __a,
+ __vector unsigned __int128 __b) {
+ return __builtin_s390_vtm((__vector unsigned char)__a,
+ (__vector unsigned char)__b);
+}
+
#if __ARCH__ >= 12
static inline __ATTRS_o_ai int
vec_test_mask(__vector float __a, __vector unsigned int __b) {
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index 47644680b720bf..95e14ca0fa3b76 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -1201,9 +1201,10 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
!S.getLangOpts().ZVector)
S.Diag(TSWRange.getBegin(), diag::err_invalid_vector_long_long_decl_spec);
- // No vector __int128 prior to Power8.
+ // No vector __int128 prior to Power8 (or ZVector).
if ((TypeSpecType == TST_int128) &&
- !S.Context.getTargetInfo().hasFeature("power8-vector"))
+ !S.Context.getTargetInfo().hasFeature("power8-vector") &&
+ !S.getLangOpts().ZVector)
S.Diag(TSTLoc, diag::err_invalid_vector_int128_decl_spec);
// Complex vector types are not supported.
@@ -1225,9 +1226,10 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
<< (TypeAltiVecPixel ? "__pixel" :
getSpecifierName((TST)TypeSpecType, Policy));
}
- // vector bool __int128 requires Power10.
+ // vector bool __int128 requires Power10 (or ZVector).
if ((TypeSpecType == TST_int128) &&
- (!S.Context.getTargetInfo().hasFeature("power10-vector")))
+ (!S.Context.getTargetInfo().hasFeature("power10-vector") &&
+ !S.getLangOpts().ZVector))
S.Diag(TSTLoc, diag::err_invalid_vector_bool_int128_decl_spec);
// Only 'short' and 'long long' are valid with vector bool. (PIM 2.1)
diff --git a/clang/lib/Sema/SemaSystemZ.cpp b/clang/lib/Sema/SemaSystemZ.cpp
index 7e836adbee6595..535cb8243d7238 100644
--- a/clang/lib/Sema/SemaSystemZ.cpp
+++ b/clang/lib/Sema/SemaSystemZ.cpp
@@ -38,6 +38,7 @@ bool SemaSystemZ::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
switch (BuiltinID) {
default: return false;
case SystemZ::BI__builtin_s390_lcbb: i = 1; l = 0; u = 15; break;
+ case SystemZ::BI__builtin_s390_veval:
case SystemZ::BI__builtin_s390_verimb:
case SystemZ::BI__builtin_s390_verimh:
case SystemZ::BI__builtin_s390_verimf:
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c b/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c
new file mode 100644
index 00000000000000..5b4051c8d6f17f
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-bitop.c
@@ -0,0 +1,16 @@
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -Wall -Wno-unused -Werror -emit-llvm -x c++ %s -o - | FileCheck %s
+
+unsigned long test_bdepg(unsigned long a, unsigned long b) {
+// CHECK-LABEL: test_bdepg
+// CHECK: call i64 @llvm.s390.bdepg(i64 {{.*}}, i64 {{.*}})
+ return __builtin_s390_bdepg(a, b);
+}
+
+unsigned long test_bextg(unsigned long a, unsigned long b) {
+// CHECK-LABEL: test_bextg
+// CHECK: call i64 @llvm.s390.bextg(i64 {{.*}}, i64 {{.*}})
+ return __builtin_s390_bextg(a, b);
+}
+
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c
new file mode 100644
index 00000000000000..3943a15af9d2fb
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5-error.c
@@ -0,0 +1,34 @@
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-unknown-unknown \
+// RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s
+
+typedef __attribute__((vector_size(16))) signed char vec_schar;
+typedef __attribute__((vector_size(16))) signed short vec_sshort;
+typedef __attribute__((vector_size(16))) signed int vec_sint;
+typedef __attribute__((vector_size(16))) signed long long vec_slong;
+typedef __attribute__((vector_size(16))) unsigned char vec_uchar;
+typedef __attribute__((vector_size(16))) unsigned short vec_ushort;
+typedef __attribute__((vector_size(16))) unsigned int vec_uint;
+typedef __attribute__((vector_size(16))) unsigned long long vec_ulong;
+typedef __attribute__((vector_size(16))) double vec_double;
+
+volatile vec_schar vsc;
+volatile vec_sshort vss;
+volatile vec_sint vsi;
+volatile vec_slong vsl;
+volatile vec_uchar vuc;
+volatile vec_ushort vus;
+volatile vec_uint vui;
+volatile vec_ulong vul;
+volatile vec_double vd;
+
+volatile unsigned int len;
+const void * volatile cptr;
+int cc;
+
+void test_integer(void) {
+ __builtin_s390_veval(vuc, vuc, vuc, -1); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_s390_veval(vuc, vuc, vuc, 256); // expected-error-re {{argument value {{.*}} is outside the valid range}}
+ __builtin_s390_veval(vuc, vuc, vuc, len); // expected-error {{must be a constant integer}}
+}
+
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c
new file mode 100644
index 00000000000000..c3621819e71f98
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector5.c
@@ -0,0 +1,103 @@
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-ibm-linux -flax-vector-conversions=none \
+// RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
+
+typedef __attribute__((vector_size(16))) signed char vec_schar;
+typedef __attribute__((vector_size(16))) signed short vec_sshort;
+typedef __attribute__((vector_size(16))) signed int vec_sint;
+typedef __attribute__((vector_size(16))) signed long long vec_slong;
+typedef __attribute__((vector_size(16))) signed __int128 vec_sint128;
+typedef __attribute__((vector_size(16))) unsigned char vec_uchar;
+typedef __attribute__((vector_size(16))) unsigned short vec_ushort;
+typedef __attribute__((vector_size(16))) unsigned int vec_uint;
+typedef __attribute__((vector_size(16))) unsigned long long vec_ulong;
+typedef __attribute__((vector_size(16))) unsigned __int128 vec_uint128;
+typedef __attribute__((vector_size(16))) double vec_double;
+
+volatile vec_schar vsc;
+volatile vec_sshort vss;
+volatile vec_sint vsi;
+volatile vec_slong vsl;
+volatile vec_uchar vuc;
+volatile vec_ushort vus;
+volatile vec_uint vui;
+volatile vec_ulong vul;
+volatile signed __int128 si128;
+volatile unsigned __int128 ui128;
+
+int cc;
+
+void test_core(void) {
+ vuc = __builtin_s390_vgemb(vus);
+ // CHECK: call <16 x i8> @llvm.s390.vgemb(<8 x i16> %{{.*}})
+ vus = __builtin_s390_vgemh(vuc);
+ // CHECK: call <8 x i16> @llvm.s390.vgemh(<16 x i8> %{{.*}})
+ vui = __builtin_s390_vgemf(vuc);
+ // CHECK: call <4 x i32> @llvm.s390.vgemf(<16 x i8> %{{.*}})
+ vul = __builtin_s390_vgemg(vuc);
+ // CHECK: call <2 x i64> @llvm.s390.vgemg(<16 x i8> %{{.*}})
+ ui128 = __builtin_s390_vgemq(vuc);
+ // CHECK: call i128 @llvm.s390.vgemq(<16 x i8> %{{.*}})
+
+ si128 = __builtin_s390_vuphg(vsl);
+ // CHECK: call i128 @llvm.s390.vuphg(<2 x i64> %{{.*}})
+ si128 = __builtin_s390_vuplg(vsl);
+ // CHECK: call i128 @llvm.s390.vuplg(<2 x i64> %{{.*}})
+ ui128 = __builtin_s390_vuplhg(vul);
+ // CHECK: call i128 @llvm.s390.vuplhg(<2 x i64> %{{.*}})
+ ui128 = __builtin_s390_vupllg(vul);
+ // CHECK: call i128 @llvm.s390.vupllg(<2 x i64> %{{.*}})
+}
+
+void test_integer(void) {
+ si128 = __builtin_s390_vavgq(si128, si128);
+ // CHECK: call i128 @llvm.s390.vavgq(i128 %{{.*}}, i128 %{{.*}})
+ ui128 = __builtin_s390_vavglq(ui128, ui128);
+ // CHECK: call i128 @llvm.s390.vavglq(i128 %{{.*}}, i128 %{{.*}})
+
+ vuc = __builtin_s390_veval(vuc, vuc, vuc, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+
+ vsl = __builtin_s390_vmahg(vsl, vsl, vsl);
+ // CHECK: call <2 x i64> @llvm.s390.vmahg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ si128 = __builtin_s390_vmahq(si128, si128, si128);
+ // CHECK: call i128 @llvm.s390.vmahq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+ vul = __builtin_s390_vmalhg(vul, vul, vul);
+ // CHECK: call <2 x i64> @llvm.s390.vmalhg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ ui128 = __builtin_s390_vmalhq(ui128, ui128, ui128);
+ // CHECK: call i128 @llvm.s390.vmalhq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+
+ si128 = __builtin_s390_vmaeg(vsl, vsl, si128);
+ // CHECK: call i128 @llvm.s390.vmaeg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+ ui128 = __builtin_s390_vmaleg(vul, vul, ui128);
+ // CHECK: call i128 @llvm.s390.vmaleg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+ si128 = __builtin_s390_vmaog(vsl, vsl, si128);
+ // CHECK: call i128 @llvm.s390.vmaog(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+ ui128 = __builtin_s390_vmalog(vul, vul, ui128);
+ // CHECK: call i128 @llvm.s390.vmalog(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+
+ vsl = __builtin_s390_vmhg(vsl, vsl);
+ // CHECK: call <2 x i64> @llvm.s390.vmhg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ si128 = __builtin_s390_vmhq(si128, si128);
+ // CHECK: call i128 @llvm.s390.vmhq(i128 %{{.*}}, i128 %{{.*}})
+ vul = __builtin_s390_vmlhg(vul, vul);
+ // CHECK: call <2 x i64> @llvm.s390.vmlhg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ ui128 = __builtin_s390_vmlhq(ui128, ui128);
+ // CHECK: call i128 @llvm.s390.vmlhq(i128 %{{.*}}, i128 %{{.*}})
+
+ si128 = __builtin_s390_vmeg(vsl, vsl);
+ // CHECK: call i128 @llvm.s390.vmeg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ ui128 = __builtin_s390_vmleg(vul, vul);
+ // CHECK: call i128 @llvm.s390.vmleg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ si128 = __builtin_s390_vmog(vsl, vsl);
+ // CHECK: call i128 @llvm.s390.vmog(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ ui128 = __builtin_s390_vmlog(vul, vul);
+ // CHECK: call i128 @llvm.s390.vmlog(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+
+ si128 = __builtin_s390_vceqqs(ui128, ui128, &cc);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ si128 = __builtin_s390_vchqs(si128, si128, &cc);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ si128 = __builtin_s390_vchlqs(ui128, ui128, &cc);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+}
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-error.c
index 77e90b5ad4b848..2ec1d960aa5bb1 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-error.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector-error.c
@@ -9,10 +9,12 @@ volatile vector signed char vsc;
volatile vector signed short vss;
volatile vector signed int vsi;
volatile vector signed long long vsl;
+volatile vector signed __int128 vslll;
volatile vector unsigned char vuc;
volatile vector unsigned short vus;
volatile vector unsigned int vui;
volatile vector unsigned long long vul;
+volatile vector unsigned __int128 vulll;
volatile vector bool char vbc;
volatile vector bool short vbs;
volatile vector bool int vbi;
@@ -34,10 +36,12 @@ const signed char * volatile cptrsc;
const signed short * volatile cptrss;
const signed int * volatile cptrsi;
const signed long long * volatile cptrsl;
+const signed __int128 * volatile cptrslll;
const unsigned char * volatile cptruc;
const unsigned short * volatile cptrus;
const unsigned int * volatile cptrui;
const unsigned long long * volatile cptrul;
+const unsigned __int128 * volatile cptrulll;
const float * volatile cptrf;
const double * volatile cptrd;
@@ -233,27 +237,31 @@ void test_core(void) {
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 1}}
vsc = vec_load_bndry(cptrsc, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vsc = vec_load_bndry(cptrsc, 200); // expected-error {{no matching function}} expected-error {{argument value -1 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vsc = vec_load_bndry(cptrsc, 32); // expected-error {{no matching function}} expected-error {{argument value -1 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vsc = vec_load_bndry(cptrsc, 8192); // expected-error {{no matching function}} expected-error {{argument value -1 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vuc = vec_load_bndry(cptruc, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vss = vec_load_bndry(cptrss, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vus = vec_load_bndry(cptrus, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vsi = vec_load_bndry(cptrsi, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vui = vec_load_bndry(cptrui, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vsl = vec_load_bndry(cptrsl, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vul = vec_load_bndry(cptrul, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{must be a constant power of 2 from 64 to 4096}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
+ vslll = vec_load_bndry(cptrslll, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
+ vulll = vec_load_bndry(cptrulll, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vlbb' must be a constant integer}}
+ // expected-note at vecintrin.h:* 11 {{must be a constant power of 2 from 64 to 4096}}
vuc = vec_genmask(idx); // expected-error {{no matching function}}
// expected-note at vecintrin.h:* {{must be a constant integer}}
@@ -478,83 +486,95 @@ void test_integer(void) {
// expected-note at vecintrin.h:* 1 {{must be a constant integer}}
vsc = vec_sld(vsc, vsc, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 12 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vsc = vec_sld(vsc, vsc, -1); // expected-error {{no matching function}} expected-error {{argument value -1 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 12 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vsc = vec_sld(vsc, vsc, 16); // expected-error {{no matching function}} expected-error {{argument value 16 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 12 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vuc = vec_sld(vuc, vuc, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
// expected-note at vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
vuc = vec_sld(vuc, vuc, -1); // expected-error {{no matching function}} expected-error {{argument value -1 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
// expected-note at vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
vuc = vec_sld(vuc, vuc, 16); // expected-error {{no matching function}} expected-error {{argument value 16 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
// expected-note at vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
vss = vec_sld(vss, vss, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 12 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vus = vec_sld(vus, vus, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
// expected-note at vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
vsi = vec_sld(vsi, vsi, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 12 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vui = vec_sld(vui, vui, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
// expected-note at vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
vsl = vec_sld(vsl, vsl, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 12 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vul = vec_sld(vul, vul, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
// expected-note at vecintrin.h:* 2 {{must be a constant integer from 0 to 15}}
+ vslll = vec_sld(vslll, vslll, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+ vulll = vec_sld(vulll, vulll, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vd = vec_sld(vd, vd, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 12 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vsc = vec_sldw(vsc, vsc, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vsc = vec_sldw(vsc, vsc, -1); // expected-error {{no matching function}} expected-error {{argument value -4 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vsc = vec_sldw(vsc, vsc, 4); // expected-error {{no matching function}} expected-error {{argument value 16 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vuc = vec_sldw(vuc, vuc, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vuc = vec_sldw(vuc, vuc, -1); // expected-error {{no matching function}} expected-error {{argument value -4 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vuc = vec_sldw(vuc, vuc, 4); // expected-error {{no matching function}} expected-error {{argument value 16 is outside the valid range [0, 15]}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vss = vec_sldw(vss, vss, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vus = vec_sldw(vus, vus, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vsi = vec_sldw(vsi, vsi, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vui = vec_sldw(vui, vui, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vsl = vec_sldw(vsl, vsl, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vul = vec_sldw(vul, vul, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+ vslll = vec_sldw(vslll, vslll, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
+ vulll = vec_sldw(vulll, vulll, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
vd = vec_sldw(vd, vd, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 8 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 10 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 3}}
}
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
index 06fc1ee05d67ff..775733ad3b9482 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@@ -12,24 +12,29 @@ volatile vector signed char vsc;
volatile vector signed short vss;
volatile vector signed int vsi;
volatile vector signed long long vsl;
+volatile vector signed __int128 vslll;
volatile vector unsigned char vuc;
volatile vector unsigned short vus;
volatile vector unsigned int vui;
volatile vector unsigned long long vul;
+volatile vector unsigned __int128 vulll;
volatile vector bool char vbc;
volatile vector bool short vbs;
volatile vector bool int vbi;
volatile vector bool long long vbl;
+volatile vector bool __int128 vblll;
volatile vector double vd;
volatile signed char sc;
volatile signed short ss;
volatile signed int si;
volatile signed long long sl;
+volatile signed __int128 slll;
volatile unsigned char uc;
volatile unsigned short us;
volatile unsigned int ui;
volatile unsigned long long ul;
+volatile unsigned __int128 ulll;
volatile double d;
const void * volatile cptr;
@@ -37,10 +42,12 @@ const signed char * volatile cptrsc;
const signed short * volatile cptrss;
const signed int * volatile cptrsi;
const signed long long * volatile cptrsl;
+const signed __int128 * volatile cptrslll;
const unsigned char * volatile cptruc;
const unsigned short * volatile cptrus;
const unsigned int * volatile cptrui;
const unsigned long long * volatile cptrul;
+const unsigned __int128 * volatile cptrulll;
const float * volatile cptrf;
const double * volatile cptrd;
@@ -49,10 +56,12 @@ signed char * volatile ptrsc;
signed short * volatile ptrss;
signed int * volatile ptrsi;
signed long long * volatile ptrsl;
+signed __int128 * volatile ptrslll;
unsigned char * volatile ptruc;
unsigned short * volatile ptrus;
unsigned int * volatile ptrui;
unsigned long long * volatile ptrul;
+unsigned __int128 * volatile ptrulll;
float * volatile ptrf;
double * volatile ptrd;
@@ -257,6 +266,15 @@ void test_core(void) {
vbl = vec_perm(vbl, vbl, vuc);
// CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vperm
+ vslll = vec_perm(vslll, vslll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vperm
+ vulll = vec_perm(vulll, vulll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vperm
+ vblll = vec_perm(vblll, vblll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vperm
vd = vec_perm(vd, vd, vuc);
// CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vperm
@@ -322,6 +340,10 @@ void test_core(void) {
// CHECK-ASM: vperm
vul = vec_revb(vul);
// CHECK-ASM: vperm
+ vslll = vec_revb(vslll);
+ // CHECK-ASM: vperm
+ vulll = vec_revb(vulll);
+ // CHECK-ASM: vperm
vd = vec_revb(vd);
// CHECK-ASM: vperm
@@ -400,6 +422,18 @@ void test_core(void) {
// CHECK-ASM: vsel
vbl = vec_sel(vbl, vbl, vbl);
// CHECK-ASM: vsel
+ vslll = vec_sel(vslll, vslll, vulll);
+ // CHECK-ASM: vsel
+ vslll = vec_sel(vslll, vslll, vblll);
+ // CHECK-ASM: vsel
+ vulll = vec_sel(vulll, vulll, vulll);
+ // CHECK-ASM: vsel
+ vulll = vec_sel(vulll, vulll, vblll);
+ // CHECK-ASM: vsel
+ vblll = vec_sel(vblll, vblll, vulll);
+ // CHECK-ASM: vsel
+ vblll = vec_sel(vblll, vblll, vblll);
+ // CHECK-ASM: vsel
vd = vec_sel(vd, vd, vul);
// CHECK-ASM: vsel
vd = vec_sel(vd, vd, vbl);
@@ -503,6 +537,10 @@ void test_core(void) {
// CHECK-ASM: vl
vul = vec_xl(idx, cptrul);
// CHECK-ASM: vl
+ vslll = vec_xl(idx, cptrslll);
+ // CHECK-ASM: vl
+ vulll = vec_xl(idx, cptrulll);
+ // CHECK-ASM: vl
vd = vec_xl(idx, cptrd);
// CHECK-ASM: vl
@@ -554,6 +592,10 @@ void test_core(void) {
// CHECK-ASM: vst
vec_xst(vul, idx, ptrul);
// CHECK-ASM: vst
+ vec_xst(vslll, idx, ptrslll);
+ // CHECK-ASM: vst
+ vec_xst(vulll, idx, ptrulll);
+ // CHECK-ASM: vst
vec_xst(vd, idx, ptrd);
// CHECK-ASM: vst
@@ -613,6 +655,12 @@ void test_core(void) {
vul = vec_load_bndry(cptrul, 64);
// CHECK: call <16 x i8> @llvm.s390.vlbb(ptr %{{.*}}, i32 0)
// CHECK-ASM: vlbb
+ vslll = vec_load_bndry(cptrslll, 64);
+ // CHECK: call <16 x i8> @llvm.s390.vlbb(ptr %{{.*}}, i32 0)
+ // CHECK-ASM: vlbb
+ vulll = vec_load_bndry(cptrulll, 64);
+ // CHECK: call <16 x i8> @llvm.s390.vlbb(ptr %{{.*}}, i32 0)
+ // CHECK-ASM: vlbb
vd = vec_load_bndry(cptrd, 64);
// CHECK: call <16 x i8> @llvm.s390.vlbb(ptr %{{.*}}, i32 0)
// CHECK-ASM: vlbb
@@ -867,6 +915,10 @@ void test_core(void) {
vd = vec_splats(d);
// CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x i32> zeroinitializer
// CHECK-ASM: vlrepg
+ vslll = vec_splats(slll);
+ // CHECK: insertelement <1 x i128> poison, i128 %{{.*}}, i64 0
+ vulll = vec_splats(ulll);
+ // CHECK: insertelement <1 x i128> poison, i128 %{{.*}}, i64 0
vsl = vec_extend_s64(vsc);
// CHECK-ASM: vsegb
@@ -982,6 +1034,15 @@ void test_core(void) {
vbi = vec_pack(vbl, vbl);
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
// CHECK-ASM: vpkg
+ vsl = vec_pack(vslll, vslll);
+ // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
+ // CHECK-ASM: vmrlg
+ vul = vec_pack(vulll, vulll);
+ // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
+ // CHECK-ASM: vmrlg
+ vbl = vec_pack(vblll, vblll);
+ // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> <i32 1, i32 3>
+ // CHECK-ASM: vmrlg
vsc = vec_packs(vss, vss);
// CHECK: call <16 x i8> @llvm.s390.vpksh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
@@ -2362,6 +2423,105 @@ void test_compare(void) {
void test_integer(void) {
// CHECK-ASM-LABEL: test_integer
+ vsc = vec_and(vsc, vsc);
+ // CHECK-ASM: vn
+ vuc = vec_and(vuc, vuc);
+ // CHECK-ASM: vn
+ vbc = vec_and(vbc, vbc);
+ // CHECK-ASM: vn
+ vss = vec_and(vss, vss);
+ // CHECK-ASM: vn
+ vus = vec_and(vus, vus);
+ // CHECK-ASM: vn
+ vbs = vec_and(vbs, vbs);
+ // CHECK-ASM: vn
+ vsi = vec_and(vsi, vsi);
+ // CHECK-ASM: vn
+ vui = vec_and(vui, vui);
+ // CHECK-ASM: vn
+ vbi = vec_and(vbi, vbi);
+ // CHECK-ASM: vn
+ vsl = vec_and(vsl, vsl);
+ // CHECK-ASM: vn
+ vul = vec_and(vul, vul);
+ // CHECK-ASM: vn
+ vbl = vec_and(vbl, vbl);
+ // CHECK-ASM: vn
+ vslll = vec_and(vslll, vslll);
+ // CHECK-ASM: vn
+ vulll = vec_and(vulll, vulll);
+ // CHECK-ASM: vn
+ vblll = vec_and(vblll, vblll);
+ // CHECK-ASM: vn
+ vd = vec_and(vd, vd);
+ // CHECK-ASM: vn
+
+ vsc = vec_or(vsc, vsc);
+ // CHECK-ASM: vo
+ vuc = vec_or(vuc, vuc);
+ // CHECK-ASM: vo
+ vbc = vec_or(vbc, vbc);
+ // CHECK-ASM: vo
+ vss = vec_or(vss, vss);
+ // CHECK-ASM: vo
+ vus = vec_or(vus, vus);
+ // CHECK-ASM: vo
+ vbs = vec_or(vbs, vbs);
+ // CHECK-ASM: vo
+ vsi = vec_or(vsi, vsi);
+ // CHECK-ASM: vo
+ vui = vec_or(vui, vui);
+ // CHECK-ASM: vo
+ vbi = vec_or(vbi, vbi);
+ // CHECK-ASM: vo
+ vsl = vec_or(vsl, vsl);
+ // CHECK-ASM: vo
+ vul = vec_or(vul, vul);
+ // CHECK-ASM: vo
+ vbl = vec_or(vbl, vbl);
+ // CHECK-ASM: vo
+ vslll = vec_or(vslll, vslll);
+ // CHECK-ASM: vo
+ vulll = vec_or(vulll, vulll);
+ // CHECK-ASM: vo
+ vblll = vec_or(vblll, vblll);
+ // CHECK-ASM: vo
+ vd = vec_or(vd, vd);
+ // CHECK-ASM: vo
+
+ vsc = vec_xor(vsc, vsc);
+ // CHECK-ASM: vx
+ vuc = vec_xor(vuc, vuc);
+ // CHECK-ASM: vx
+ vbc = vec_xor(vbc, vbc);
+ // CHECK-ASM: vx
+ vss = vec_xor(vss, vss);
+ // CHECK-ASM: vx
+ vus = vec_xor(vus, vus);
+ // CHECK-ASM: vx
+ vbs = vec_xor(vbs, vbs);
+ // CHECK-ASM: vx
+ vsi = vec_xor(vsi, vsi);
+ // CHECK-ASM: vx
+ vui = vec_xor(vui, vui);
+ // CHECK-ASM: vx
+ vbi = vec_xor(vbi, vbi);
+ // CHECK-ASM: vx
+ vsl = vec_xor(vsl, vsl);
+ // CHECK-ASM: vx
+ vul = vec_xor(vul, vul);
+ // CHECK-ASM: vx
+ vbl = vec_xor(vbl, vbl);
+ // CHECK-ASM: vx
+ vslll = vec_xor(vslll, vslll);
+ // CHECK-ASM: vx
+ vulll = vec_xor(vulll, vulll);
+ // CHECK-ASM: vx
+ vblll = vec_xor(vblll, vblll);
+ // CHECK-ASM: vx
+ vd = vec_xor(vd, vd);
+ // CHECK-ASM: vx
+
vsc = vec_andc(vsc, vsc);
// CHECK-ASM: vnc
vsc = vec_andc(vsc, vbc);
@@ -2418,6 +2578,12 @@ void test_integer(void) {
// CHECK-ASM: vnc
vbl = vec_andc(vbl, vbl);
// CHECK-ASM: vnc
+ vslll = vec_andc(vslll, vslll);
+ // CHECK-ASM: vnc
+ vulll = vec_andc(vulll, vulll);
+ // CHECK-ASM: vnc
+ vblll = vec_andc(vblll, vblll);
+ // CHECK-ASM: vnc
vd = vec_andc(vd, vd);
// CHECK-ASM: vnc
vd = vec_andc(vd, vbl);
@@ -2481,6 +2647,12 @@ void test_integer(void) {
// CHECK-ASM: vno
vbl = vec_nor(vbl, vbl);
// CHECK-ASM: vno
+ vslll = vec_nor(vslll, vslll);
+ // CHECK-ASM: vno
+ vulll = vec_nor(vulll, vulll);
+ // CHECK-ASM: vno
+ vblll = vec_nor(vblll, vblll);
+ // CHECK-ASM: vno
vd = vec_nor(vd, vd);
// CHECK-ASM: vno
vd = vec_nor(vd, vbl);
@@ -2770,6 +2942,12 @@ void test_integer(void) {
vbl = vec_sll(vbl, vui);
// CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsl
+ vslll = vec_sll(vslll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsl
+ vulll = vec_sll(vulll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsl
vsc = vec_slb(vsc, vsc);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -2789,42 +2967,69 @@ void test_integer(void) {
vss = vec_slb(vss, vus);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vss = vec_slb(vss, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vus = vec_slb(vus, vss);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
vus = vec_slb(vus, vus);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vus = vec_slb(vus, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vsi = vec_slb(vsi, vsi);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
vsi = vec_slb(vsi, vui);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vsi = vec_slb(vsi, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vui = vec_slb(vui, vsi);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
vui = vec_slb(vui, vui);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vui = vec_slb(vui, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vsl = vec_slb(vsl, vsl);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
vsl = vec_slb(vsl, vul);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vsl = vec_slb(vsl, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vul = vec_slb(vul, vsl);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
vul = vec_slb(vul, vul);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vul = vec_slb(vul, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
+ vslll = vec_slb(vslll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
+ vulll = vec_slb(vulll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vd = vec_slb(vd, vsl);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
vd = vec_slb(vd, vul);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vd = vec_slb(vd, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vsc = vec_sld(vsc, vsc, 0);
// CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
@@ -2898,6 +3103,18 @@ void test_integer(void) {
vbl = vec_sld(vbl, vbl, 15);
// CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
// CHECK-ASM: vsldb
+ vslll = vec_sld(vslll, vslll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: vsldb
+ vslll = vec_sld(vslll, vslll, 15);
+ // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+ // CHECK-ASM: vsldb
+ vulll = vec_sld(vulll, vulll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: vsldb
+ vulll = vec_sld(vulll, vulll, 15);
+ // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 15)
+ // CHECK-ASM: vsldb
vd = vec_sld(vd, vd, 0);
// CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
// CHECK-ASM: vsldb
@@ -2953,6 +3170,18 @@ void test_integer(void) {
vul = vec_sldw(vul, vul, 3);
// CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
// CHECK-ASM: vsldb
+ vslll = vec_sldw(vslll, vslll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: vsldb
+ vslll = vec_sldw(vslll, vslll, 3);
+ // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+ // CHECK-ASM: vsldb
+ vulll = vec_sldw(vulll, vulll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: vsldb
+ vulll = vec_sldw(vulll, vulll, 3);
+ // CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 12)
+ // CHECK-ASM: vsldb
vd = vec_sldw(vd, vd, 0);
// CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
// CHECK-ASM: vsldb
@@ -3068,6 +3297,12 @@ void test_integer(void) {
vbl = vec_sral(vbl, vui);
// CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsra
+ vslll = vec_sral(vslll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsra
+ vulll = vec_sral(vulll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsra(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsra
vsc = vec_srab(vsc, vsc);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -3087,42 +3322,69 @@ void test_integer(void) {
vss = vec_srab(vss, vus);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vss = vec_srab(vss, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vus = vec_srab(vus, vss);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
vus = vec_srab(vus, vus);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vus = vec_srab(vus, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vsi = vec_srab(vsi, vsi);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
vsi = vec_srab(vsi, vui);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vsi = vec_srab(vsi, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vui = vec_srab(vui, vsi);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
vui = vec_srab(vui, vui);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vui = vec_srab(vui, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vsl = vec_srab(vsl, vsl);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
vsl = vec_srab(vsl, vul);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vsl = vec_srab(vsl, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vul = vec_srab(vul, vsl);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
vul = vec_srab(vul, vul);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vul = vec_srab(vul, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
+ vslll = vec_srab(vslll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
+ vulll = vec_srab(vulll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vd = vec_srab(vd, vsl);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
vd = vec_srab(vd, vul);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vd = vec_srab(vd, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vsc = vec_srl(vsc, vuc);
// CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -3232,6 +3494,12 @@ void test_integer(void) {
vbl = vec_srl(vbl, vui);
// CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrl
+ vslll = vec_srl(vslll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrl
+ vulll = vec_srl(vulll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrl(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrl
vsc = vec_srb(vsc, vsc);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -3251,42 +3519,69 @@ void test_integer(void) {
vss = vec_srb(vss, vus);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vss = vec_srb(vss, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
vus = vec_srb(vus, vss);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
vus = vec_srb(vus, vus);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vus = vec_srb(vus, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
vsi = vec_srb(vsi, vsi);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
vsi = vec_srb(vsi, vui);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vsi = vec_srb(vsi, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
vui = vec_srb(vui, vsi);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
vui = vec_srb(vui, vui);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vui = vec_srb(vui, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
vsl = vec_srb(vsl, vsl);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
vsl = vec_srb(vsl, vul);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vsl = vec_srb(vsl, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
vul = vec_srb(vul, vsl);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
vul = vec_srb(vul, vul);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vul = vec_srb(vul, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
+ vslll = vec_srb(vslll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
+ vulll = vec_srb(vulll, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
vd = vec_srb(vd, vsl);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
vd = vec_srb(vd, vul);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vd = vec_srb(vd, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
vsc = vec_abs(vsc);
// CHECK-ASM: vlcb
@@ -3345,6 +3640,10 @@ void test_integer(void) {
// CHECK-ASM: vmxlg
vul = vec_max(vbl, vul);
// CHECK-ASM: vmxlg
+ vslll = vec_max(vslll, vslll);
+ // (emulated)
+ vulll = vec_max(vulll, vulll);
+ // (emulated)
vd = vec_max(vd, vd);
// (emulated)
@@ -3396,6 +3695,10 @@ void test_integer(void) {
// CHECK-ASM: vmnlg
vul = vec_min(vbl, vul);
// CHECK-ASM: vmnlg
+ vslll = vec_min(vslll, vslll);
+ // (emulated)
+ vulll = vec_min(vulll, vulll);
+ // (emulated)
vd = vec_min(vd, vd);
// (emulated)
@@ -3411,6 +3714,16 @@ void test_integer(void) {
vul = vec_addc(vul, vul);
// CHECK: call <2 x i64> @llvm.s390.vaccg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK-ASM: vaccg
+ vulll = vec_addc(vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vaccq(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vaccq
+
+ vulll = vec_adde(vulll, vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vacq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vacq
+ vulll = vec_addec(vulll, vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vacccq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vacccq
vuc = vec_add_u128(vuc, vuc);
// CHECK-ASM: vaq
@@ -3462,6 +3775,9 @@ void test_integer(void) {
vul = vec_gfmsum(vui, vui);
// CHECK: call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK-ASM: vgfmf
+ vulll = vec_gfmsum(vul, vul);
+ // CHECK: call i128 @llvm.s390.vgfmg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vgfmg
vuc = vec_gfmsum_128(vul, vul);
// CHECK: call i128 @llvm.s390.vgfmg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK-ASM: vgfmg
@@ -3475,6 +3791,9 @@ void test_integer(void) {
vul = vec_gfmsum_accum(vui, vui, vul);
// CHECK: call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}})
// CHECK-ASM: vgfmaf
+ vulll = vec_gfmsum_accum(vul, vul, vulll);
+ // CHECK: call i128 @llvm.s390.vgfmag(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vgfmag
vuc = vec_gfmsum_accum_128(vul, vul, vuc);
// CHECK: call i128 @llvm.s390.vgfmag(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
// CHECK-ASM: vgfmag
@@ -3630,6 +3949,16 @@ void test_integer(void) {
vul = vec_subc(vul, vul);
// CHECK: call <2 x i64> @llvm.s390.vscbig(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK-ASM: vscbig
+ vulll = vec_subc(vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vscbiq(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vscbiq
+
+ vulll = vec_sube(vulll, vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vsbiq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vsbiq
+ vulll = vec_subec(vulll, vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vsbcbiq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vsbcbiq
vuc = vec_sub_u128(vuc, vuc);
// CHECK-ASM: vsq
@@ -3655,6 +3984,12 @@ void test_integer(void) {
vul = vec_sum2(vui, vui);
// CHECK: call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK-ASM: vsumgf
+ vulll = vec_sum(vui, vui);
+ // CHECK: call i128 @llvm.s390.vsumqf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK-ASM: vsumqf
+ vulll = vec_sum(vul, vul);
+ // CHECK: call i128 @llvm.s390.vsumqg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vsumqg
vuc = vec_sum_u128(vui, vui);
// CHECK: call i128 @llvm.s390.vsumqf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK-ASM: vsumqf
@@ -3686,6 +4021,12 @@ void test_integer(void) {
idx = vec_test_mask(vul, vul);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
+ idx = vec_test_mask(vslll, vulll);
+ // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vtm
+ idx = vec_test_mask(vulll, vulll);
+ // CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vtm
idx = vec_test_mask(vd, vul);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-error.c
index 127b0f67e85c95..0f2841d99c3ac2 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-error.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2-error.c
@@ -9,10 +9,12 @@ volatile vector signed char vsc;
volatile vector signed short vss;
volatile vector signed int vsi;
volatile vector signed long long vsl;
+volatile vector signed __int128 vslll;
volatile vector unsigned char vuc;
volatile vector unsigned short vus;
volatile vector unsigned int vui;
volatile vector unsigned long long vul;
+volatile vector unsigned __int128 vulll;
volatile vector bool char vbc;
volatile vector bool short vbs;
volatile vector bool int vbi;
@@ -120,12 +122,19 @@ void test_core(void) {
void test_integer(void) {
vf = vec_sld(vf, vf, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 15 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vd = vec_sld(vd, vd, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vsldb' must be a constant integer}}
- // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 15 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+ vulll = vec_msum(vul, vul, vulll, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vmslg' must be a constant integer}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+ vulll = vec_msum(vul, vul, vulll, -1); // expected-error {{no matching function}} expected-error {{argument value -1 is outside the valid range [0, 15]}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+ vulll = vec_msum(vul, vul, vulll, 16); // expected-error {{no matching function}} expected-error {{argument value 16 is outside the valid range [0, 15]}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
+
vuc = vec_msum_u128(vul, vul, vuc, idx); // expected-error {{no matching function}} expected-error {{argument to '__builtin_s390_vmslg' must be a constant integer}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 15}}
vuc = vec_msum_u128(vul, vul, vuc, -1); // expected-error {{no matching function}} expected-error {{argument value -1 is outside the valid range [0, 15]}}
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
index 15e72ecf51dac1..60df95817a329d 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector2.c
@@ -12,14 +12,17 @@ volatile vector signed char vsc;
volatile vector signed short vss;
volatile vector signed int vsi;
volatile vector signed long long vsl;
+volatile vector signed __int128 vslll;
volatile vector unsigned char vuc;
volatile vector unsigned short vus;
volatile vector unsigned int vui;
volatile vector unsigned long long vul;
+volatile vector unsigned __int128 vulll;
volatile vector bool char vbc;
volatile vector bool short vbs;
volatile vector bool int vbi;
volatile vector bool long long vbl;
+volatile vector bool __int128 vblll;
volatile vector float vf;
volatile vector double vd;
@@ -122,6 +125,10 @@ void test_core(void) {
// CHECK: call <16 x i8> @llvm.s390.vperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vperm
+ vul = vec_bperm(vulll, vuc);
+ // CHECK: call <2 x i64> @llvm.s390.vbperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vbperm
+
vul = vec_bperm_u128(vuc, vuc);
// CHECK: call <2 x i64> @llvm.s390.vbperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vbperm
@@ -220,6 +227,12 @@ void test_core(void) {
// CHECK: call void @llvm.s390.vstl(<16 x i8> %{{.*}}, i32 %{{.*}}, ptr %{{.*}})
// CHECK-ASM: vstl
+ vsc = vec_load_len_r(cptrsc, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vlrl(i32 0, ptr %{{.*}})
+ // CHECK-ASM: vlrl %{{.*}}, 0(%{{.*}}), 0
+ vsc = vec_load_len_r(cptrsc, idx);
+ // CHECK: call <16 x i8> @llvm.s390.vlrl(i32 %{{.*}}, ptr %{{.*}})
+ // CHECK-ASM: vlrlr
vuc = vec_load_len_r(cptruc, 0);
// CHECK: call <16 x i8> @llvm.s390.vlrl(i32 0, ptr %{{.*}})
// CHECK-ASM: vlrl %{{.*}}, 0(%{{.*}}), 0
@@ -227,6 +240,12 @@ void test_core(void) {
// CHECK: call <16 x i8> @llvm.s390.vlrl(i32 %{{.*}}, ptr %{{.*}})
// CHECK-ASM: vlrlr
+ vec_store_len_r(vsc, ptrsc, 0);
+ // CHECK: call void @llvm.s390.vstrl(<16 x i8> %{{.*}}, i32 0, ptr %{{.*}})
+ // CHECK-ASM: vstrl %{{.*}}, 0(%{{.*}}), 0
+ vec_store_len_r(vsc, ptrsc, idx);
+ // CHECK: call void @llvm.s390.vstrl(<16 x i8> %{{.*}}, i32 %{{.*}}, ptr %{{.*}})
+ // CHECK-ASM: vstrlr
vec_store_len_r(vuc, ptruc, 0);
// CHECK: call void @llvm.s390.vstrl(<16 x i8> %{{.*}}, i32 0, ptr %{{.*}})
// CHECK-ASM: vstrl %{{.*}}, 0(%{{.*}}), 0
@@ -479,6 +498,21 @@ void test_compare(void) {
void test_integer(void) {
// CHECK-ASM-LABEL: test_integer
+ vf = vec_and(vf, vf);
+ // CHECK-ASM: vn
+ vd = vec_and(vd, vd);
+ // CHECK-ASM: vn
+
+ vf = vec_or(vf, vf);
+ // CHECK-ASM: vo
+ vd = vec_or(vd, vd);
+ // CHECK-ASM: vo
+
+ vf = vec_xor(vf, vf);
+ // CHECK-ASM: vx
+ vd = vec_xor(vd, vd);
+ // CHECK-ASM: vx
+
vf = vec_andc(vf, vf);
// CHECK-ASM: vnc
vd = vec_andc(vd, vd);
@@ -513,6 +547,12 @@ void test_integer(void) {
// CHECK-ASM: vnn
vbl = vec_nand(vbl, vbl);
// CHECK-ASM: vnn
+ vslll = vec_nand(vslll, vslll);
+ // CHECK-ASM: vnn
+ vulll = vec_nand(vulll, vulll);
+ // CHECK-ASM: vnn
+ vblll = vec_nand(vblll, vblll);
+ // CHECK-ASM: vnn
vf = vec_nand(vf, vf);
// CHECK-ASM: vnn
vd = vec_nand(vd, vd);
@@ -542,6 +582,12 @@ void test_integer(void) {
// CHECK-ASM: voc
vbl = vec_orc(vbl, vbl);
// CHECK-ASM: voc
+ vslll = vec_orc(vslll, vslll);
+ // CHECK-ASM: voc
+ vulll = vec_orc(vulll, vulll);
+ // CHECK-ASM: voc
+ vblll = vec_orc(vblll, vblll);
+ // CHECK-ASM: voc
vf = vec_orc(vf, vf);
// CHECK-ASM: voc
vd = vec_orc(vd, vd);
@@ -571,6 +617,12 @@ void test_integer(void) {
// CHECK-ASM: vnx
vbl = vec_eqv(vbl, vbl);
// CHECK-ASM: vnx
+ vslll = vec_eqv(vslll, vslll);
+ // CHECK-ASM: vnx
+ vulll = vec_eqv(vulll, vulll);
+ // CHECK-ASM: vnx
+ vblll = vec_eqv(vblll, vblll);
+ // CHECK-ASM: vnx
vf = vec_eqv(vf, vf);
// CHECK-ASM: vnx
vd = vec_eqv(vd, vd);
@@ -607,12 +659,18 @@ void test_integer(void) {
vf = vec_slb(vf, vui);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vf = vec_slb(vf, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vd = vec_slb(vd, vsl);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
vd = vec_slb(vd, vul);
// CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vslb
+ vd = vec_slb(vd, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vslb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vslb
vf = vec_sld(vf, vf, 0);
// CHECK: call <16 x i8> @llvm.s390.vsldb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
@@ -633,12 +691,18 @@ void test_integer(void) {
vf = vec_srab(vf, vui);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vf = vec_srab(vf, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vd = vec_srab(vd, vsl);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
vd = vec_srab(vd, vul);
// CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrab
+ vd = vec_srab(vd, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrab
vf = vec_srb(vf, vsi);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -646,12 +710,18 @@ void test_integer(void) {
vf = vec_srb(vf, vui);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vf = vec_srb(vf, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
vd = vec_srb(vd, vsl);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
vd = vec_srb(vd, vul);
// CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vsrlb
+ vd = vec_srb(vd, vuc);
+ // CHECK: call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK-ASM: vsrlb
idx = vec_test_mask(vf, vui);
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
@@ -660,6 +730,19 @@ void test_integer(void) {
// CHECK: call i32 @llvm.s390.vtm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: vtm
+ vulll = vec_msum(vul, vul, vulll, 0);
+ // CHECK: call i128 @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}}, i32 0)
+ // CHECK-ASM: vmslg
+ vulll = vec_msum(vul, vul, vulll, 4);
+ // CHECK: call i128 @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}}, i32 4)
+ // CHECK-ASM: vmslg
+ vulll = vec_msum(vul, vul, vulll, 8);
+ // CHECK: call i128 @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}}, i32 8)
+ // CHECK-ASM: vmslg
+ vulll = vec_msum(vul, vul, vulll, 12);
+ // CHECK: call i128 @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}}, i32 12)
+ // CHECK-ASM: vmslg
+
vuc = vec_msum_u128(vul, vul, vuc, 0);
// CHECK: call i128 @llvm.s390.vmslg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}}, i32 0)
// CHECK-ASM: vmslg
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector3-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector3-error.c
index 135e586f38aea1..83af380f627d22 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector3-error.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector3-error.c
@@ -9,10 +9,12 @@ volatile vector signed char vsc;
volatile vector signed short vss;
volatile vector signed int vsi;
volatile vector signed long long vsl;
+volatile vector signed __int128 vslll;
volatile vector unsigned char vuc;
volatile vector unsigned short vus;
volatile vector unsigned int vui;
volatile vector unsigned long long vul;
+volatile vector unsigned __int128 vulll;
volatile vector bool char vbc;
volatile vector bool short vbs;
volatile vector bool int vbi;
@@ -62,83 +64,99 @@ int cc;
void test_integer(void) {
vsc = vec_sldb(vsc, vsc, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vuc = vec_sldb(vuc, vuc, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vss = vec_sldb(vss, vss, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vus = vec_sldb(vus, vus, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vsi = vec_sldb(vsi, vsi, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vui = vec_sldb(vui, vui, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vsl = vec_sldb(vsl, vsl, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vul = vec_sldb(vul, vul, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
+ vslll = vec_sldb(vslll, vslll, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
+ vulll = vec_sldb(vulll, vulll, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vf = vec_sldb(vf, vf, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vd = vec_sldb(vd, vd, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsld' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vsc = vec_srdb(vsc, vsc, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vuc = vec_srdb(vuc, vuc, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vss = vec_srdb(vss, vss, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vus = vec_srdb(vus, vus, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vsi = vec_srdb(vsi, vsi, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vui = vec_srdb(vui, vui, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vsl = vec_srdb(vsl, vsl, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vul = vec_srdb(vul, vul, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
+ vslll = vec_srdb(vslll, vslll, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
+ vulll = vec_srdb(vulll, vulll, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vf = vec_srdb(vf, vf, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
vd = vec_srdb(vd, vd, idx); // expected-error {{no matching function}} \
// expected-error {{argument to '__builtin_s390_vsrd' must be a constant integer}}
- // expected-note at vecintrin.h:* 9 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 11 {{candidate function not viable}}
// expected-note at vecintrin.h:* 1 {{must be a constant integer from 0 to 7}}
}
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector3.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector3.c
index 1b0520d471f9ff..ce8b315127237d 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector3.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector3.c
@@ -12,10 +12,12 @@ volatile vector signed char vsc;
volatile vector signed short vss;
volatile vector signed int vsi;
volatile vector signed long long vsl;
+volatile vector signed __int128 vslll;
volatile vector unsigned char vuc;
volatile vector unsigned short vus;
volatile vector unsigned int vui;
volatile vector unsigned long long vul;
+volatile vector unsigned __int128 vulll;
volatile vector bool char vbc;
volatile vector bool short vbs;
volatile vector bool int vbi;
@@ -39,10 +41,12 @@ const signed char * volatile cptrsc;
const signed short * volatile cptrss;
const signed int * volatile cptrsi;
const signed long long * volatile cptrsl;
+const signed __int128 * volatile cptrslll;
const unsigned char * volatile cptruc;
const unsigned short * volatile cptrus;
const unsigned int * volatile cptrui;
const unsigned long long * volatile cptrul;
+const unsigned __int128 * volatile cptrulll;
const float * volatile cptrf;
const double * volatile cptrd;
@@ -51,10 +55,12 @@ signed char * volatile ptrsc;
signed short * volatile ptrss;
signed int * volatile ptrsi;
signed long long * volatile ptrsl;
+signed __int128 * volatile ptrslll;
unsigned char * volatile ptruc;
unsigned short * volatile ptrus;
unsigned int * volatile ptrui;
unsigned long long * volatile ptrul;
+unsigned __int128 * volatile ptrulll;
float * volatile ptrf;
double * volatile ptrd;
@@ -85,6 +91,10 @@ void test_core(void) {
// CHECK-ASM: vlbrg
vul += vec_revb(vec_xl(idx, cptrul));
// CHECK-ASM: vlbrg
+ vslll += vec_revb(vec_xl(idx, cptrslll));
+ // CHECK-ASM: vlbrq
+ vulll += vec_revb(vec_xl(idx, cptrulll));
+ // CHECK-ASM: vlbrq
vf += vec_revb(vec_xl(idx, cptrf));
// CHECK-ASM: vlbrf
vd += vec_revb(vec_xl(idx, cptrd));
@@ -102,6 +112,10 @@ void test_core(void) {
// CHECK-ASM: vstbrg
vec_xst(vec_revb(vul), idx, ptrul);
// CHECK-ASM: vstbrg
+ vec_xst(vec_revb(vslll), idx, ptrslll);
+ // CHECK-ASM: vstbrq
+ vec_xst(vec_revb(vulll), idx, ptrulll);
+ // CHECK-ASM: vstbrq
vec_xst(vec_revb(vf), idx, ptrf);
// CHECK-ASM: vstbrf
vec_xst(vec_revb(vd), idx, ptrd);
@@ -301,6 +315,18 @@ void test_integer(void) {
vul = vec_sldb(vul, vul, 7);
// CHECK: call <16 x i8> @llvm.s390.vsld(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 7)
// CHECK-ASM: vsld
+ vslll = vec_sldb(vslll, vslll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vsld(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: vsld
+ vslll = vec_sldb(vslll, vslll, 7);
+ // CHECK: call <16 x i8> @llvm.s390.vsld(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 7)
+ // CHECK-ASM: vsld
+ vulll = vec_sldb(vulll, vulll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vsld(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: vsld
+ vulll = vec_sldb(vulll, vulll, 7);
+ // CHECK: call <16 x i8> @llvm.s390.vsld(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 7)
+ // CHECK-ASM: vsld
vf = vec_sldb(vf, vf, 0);
// CHECK: call <16 x i8> @llvm.s390.vsld(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
// CHECK-ASM: vsld
@@ -362,6 +388,18 @@ void test_integer(void) {
vul = vec_srdb(vul, vul, 7);
// CHECK: call <16 x i8> @llvm.s390.vsrd(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 7)
// CHECK-ASM: vsrd
+ vslll = vec_srdb(vslll, vslll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vsrd(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: vsrd
+ vslll = vec_srdb(vslll, vslll, 7);
+ // CHECK: call <16 x i8> @llvm.s390.vsrd(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 7)
+ // CHECK-ASM: vsrd
+ vulll = vec_srdb(vulll, vulll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.vsrd(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: vsrd
+ vulll = vec_srdb(vulll, vulll, 7);
+ // CHECK: call <16 x i8> @llvm.s390.vsrd(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 7)
+ // CHECK-ASM: vsrd
vf = vec_srdb(vf, vf, 0);
// CHECK: call <16 x i8> @llvm.s390.vsrd(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
// CHECK-ASM: vsrd
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c
new file mode 100644
index 00000000000000..9f4844efd63124
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5-error.c
@@ -0,0 +1,124 @@
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \
+// RUN: -fzvector -flax-vector-conversions=none \
+// RUN: -Wall -Wno-unused -Werror -fsyntax-only -verify %s
+
+#include <vecintrin.h>
+
+volatile vector signed char vsc;
+volatile vector signed short vss;
+volatile vector signed int vsi;
+volatile vector signed long long vsl;
+volatile vector signed __int128 vslll;
+volatile vector unsigned char vuc;
+volatile vector unsigned short vus;
+volatile vector unsigned int vui;
+volatile vector unsigned long long vul;
+volatile vector unsigned __int128 vulll;
+volatile vector bool char vbc;
+volatile vector bool short vbs;
+volatile vector bool int vbi;
+volatile vector bool long long vbl;
+volatile vector bool __int128 vblll;
+volatile vector double vd;
+
+volatile signed char sc;
+volatile signed short ss;
+volatile signed int si;
+volatile signed long long sl;
+volatile unsigned char uc;
+volatile unsigned short us;
+volatile unsigned int ui;
+volatile unsigned long long ul;
+volatile double d;
+
+const void * volatile cptr;
+const signed char * volatile cptrsc;
+const signed short * volatile cptrss;
+const signed int * volatile cptrsi;
+const signed long long * volatile cptrsl;
+const unsigned char * volatile cptruc;
+const unsigned short * volatile cptrus;
+const unsigned int * volatile cptrui;
+const unsigned long long * volatile cptrul;
+const float * volatile cptrf;
+const double * volatile cptrd;
+
+void * volatile ptr;
+signed char * volatile ptrsc;
+signed short * volatile ptrss;
+signed int * volatile ptrsi;
+signed long long * volatile ptrsl;
+unsigned char * volatile ptruc;
+unsigned short * volatile ptrus;
+unsigned int * volatile ptrui;
+unsigned long long * volatile ptrul;
+float * volatile ptrf;
+double * volatile ptrd;
+
+volatile unsigned int len;
+volatile int idx;
+int cc;
+
+void test_integer(void) {
+ vsc = vec_evaluate(vsc, vsc, vsc, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer}}
+ vuc = vec_evaluate(vuc, vuc, vuc, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vbc = vec_evaluate(vbc, vbc, vbc, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vss = vec_evaluate(vss, vss, vss, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer}}
+ vus = vec_evaluate(vus, vus, vus, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vbs = vec_evaluate(vbs, vbs, vbs, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vsi = vec_evaluate(vsi, vsi, vsi, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer}}
+ vui = vec_evaluate(vui, vui, vui, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vbi = vec_evaluate(vbi, vbi, vbi, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vsl = vec_evaluate(vsl, vsl, vsl, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer}}
+ vul = vec_evaluate(vul, vul, vul, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vbl = vec_evaluate(vbl, vbl, vbl, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vslll = vec_evaluate(vslll, vslll, vslll, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 14 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 1 {{must be a constant integer}}
+ vulll = vec_evaluate(vulll, vulll, vulll, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+ vblll = vec_evaluate(vblll, vblll, vblll, idx); // expected-error {{no matching function}} \
+ // expected-error {{argument to '__builtin_s390_veval' must be a constant integer}} \
+ // expected-note at vecintrin.h:* 13 {{candidate function not viable}}
+ // expected-note at vecintrin.h:* 2 {{must be a constant integer}}
+}
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
new file mode 100644
index 00000000000000..7a29dbf552e0b1
--- /dev/null
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector5.c
@@ -0,0 +1,429 @@
+// REQUIRES: systemz-registered-target
+// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \
+// RUN: -O2 -fzvector -flax-vector-conversions=none \
+// RUN: -Wall -Wno-unused -Werror -emit-llvm %s -o - | FileCheck %s
+// RUN: %clang_cc1 -target-cpu arch15 -triple s390x-linux-gnu \
+// RUN: -O2 -fzvector -flax-vector-conversions=none \
+// RUN: -Wall -Wno-unused -Werror -S %s -o - | FileCheck %s --check-prefix=CHECK-ASM
+
+#include <vecintrin.h>
+
+volatile vector signed char vsc;
+volatile vector signed short vss;
+volatile vector signed int vsi;
+volatile vector signed long long vsl;
+volatile vector signed __int128 vslll;
+volatile vector unsigned char vuc;
+volatile vector unsigned short vus;
+volatile vector unsigned int vui;
+volatile vector unsigned long long vul;
+volatile vector unsigned __int128 vulll;
+volatile vector bool char vbc;
+volatile vector bool short vbs;
+volatile vector bool int vbi;
+volatile vector bool long long vbl;
+volatile vector bool __int128 vblll;
+volatile vector float vf;
+volatile vector double vd;
+
+volatile int idx;
+int cc;
+
+void test_core(void) {
+ // CHECK-ASM-LABEL: test_core
+
+ vuc = vec_gen_element_masks_8(vus);
+ // CHECK: call <16 x i8> @llvm.s390.vgemb(<8 x i16> %{{.*}})
+ // CHECK-ASM: vgemb
+ vus = vec_gen_element_masks_16(vuc);
+ // CHECK: call <8 x i16> @llvm.s390.vgemh(<16 x i8> %{{.*}})
+ // CHECK-ASM: vgemh
+ vui = vec_gen_element_masks_32(vuc);
+ // CHECK: call <4 x i32> @llvm.s390.vgemf(<16 x i8> %{{.*}})
+ // CHECK-ASM: vgemf
+ vul = vec_gen_element_masks_64(vuc);
+ // CHECK: call <2 x i64> @llvm.s390.vgemg(<16 x i8> %{{.*}})
+ // CHECK-ASM: vgemg
+ vulll = vec_gen_element_masks_128(vuc);
+ // CHECK: call i128 @llvm.s390.vgemq(<16 x i8> %{{.*}})
+ // CHECK-ASM: vgemq
+
+ vsc = vec_blend(vsc, vsc, vsc);
+ // CHECK-ASM: vblendb
+ vbc = vec_blend(vbc, vbc, vsc);
+ // CHECK-ASM: vblendb
+ vuc = vec_blend(vuc, vuc, vsc);
+ // CHECK-ASM: vblendb
+ vss = vec_blend(vss, vss, vss);
+ // CHECK-ASM: vblendh
+ vbs = vec_blend(vbs, vbs, vss);
+ // CHECK-ASM: vblendh
+ vus = vec_blend(vus, vus, vss);
+ // CHECK-ASM: vblendh
+ vsi = vec_blend(vsi, vsi, vsi);
+ // CHECK-ASM: vblendf
+ vbi = vec_blend(vbi, vbi, vsi);
+ // CHECK-ASM: vblendf
+ vui = vec_blend(vui, vui, vsi);
+ // CHECK-ASM: vblendf
+ vsl = vec_blend(vsl, vsl, vsl);
+ // CHECK-ASM: vblendg
+ vul = vec_blend(vul, vul, vsl);
+ // CHECK-ASM: vblendg
+ vbl = vec_blend(vbl, vbl, vsl);
+ // CHECK-ASM: vblendg
+ vslll = vec_blend(vslll, vslll, vslll);
+ // CHECK-ASM: vblendq
+ vblll = vec_blend(vblll, vblll, vslll);
+ // CHECK-ASM: vblendq
+ vulll = vec_blend(vulll, vulll, vslll);
+ // CHECK-ASM: vblendq
+ vf = vec_blend(vf, vf, vsi);
+ // CHECK-ASM: vblendf
+ vd = vec_blend(vd, vd, vsl);
+ // CHECK-ASM: vblendg
+
+ vslll = vec_unpackh(vsl);
+ // CHECK: call i128 @llvm.s390.vuphg(<2 x i64> %{{.*}})
+ // CHECK-ASM: vuphg
+ vulll = vec_unpackh(vul);
+ // CHECK: call i128 @llvm.s390.vuplhg(<2 x i64> %{{.*}})
+ // CHECK-ASM: vuplhg
+ vslll = vec_unpackl(vsl);
+ // CHECK: call i128 @llvm.s390.vuplg(<2 x i64> %{{.*}})
+ // CHECK-ASM: vuplg
+ vulll = vec_unpackl(vul);
+ // CHECK: call i128 @llvm.s390.vupllg(<2 x i64> %{{.*}})
+ // CHECK-ASM: vupllg
+}
+
+void test_compare(void) {
+ // CHECK-ASM-LABEL: test_compare
+
+ vblll = vec_cmpeq(vslll, vslll);
+ // CHECK: icmp eq <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vceqq
+ vblll = vec_cmpeq(vulll, vulll);
+ // CHECK: icmp eq <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vceqq
+ vblll = vec_cmpeq(vblll, vblll);
+ // CHECK: icmp eq <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vceqq
+
+ vblll = vec_cmpge(vslll, vslll);
+ // CHECK: icmp sge <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vchq
+ vblll = vec_cmpge(vulll, vulll);
+ // CHECK: icmp uge <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vchlq
+
+ vblll = vec_cmpgt(vslll, vslll);
+ // CHECK: icmp sgt <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vchq
+ vblll = vec_cmpgt(vulll, vulll);
+ // CHECK: icmp ugt <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vchlq
+
+ vblll = vec_cmple(vslll, vslll);
+ // CHECK: icmp sle <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vchq
+ vblll = vec_cmple(vulll, vulll);
+ // CHECK: icmp ule <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vchlq
+
+ vblll = vec_cmplt(vslll, vslll);
+ // CHECK: icmp slt <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vchq
+ vblll = vec_cmplt(vulll, vulll);
+ // CHECK: icmp ult <1 x i128> %{{.*}}, %{{.*}}
+ // CHECK-ASM: vchlq
+
+ idx = vec_all_eq(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+ idx = vec_all_eq(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+ idx = vec_all_eq(vblll, vblll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+
+ idx = vec_all_ne(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+ idx = vec_all_ne(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+ idx = vec_all_ne(vblll, vblll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+
+ idx = vec_all_ge(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchqs
+ idx = vec_all_ge(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchlqs
+
+ idx = vec_all_gt(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchqs
+ idx = vec_all_gt(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchlqs
+
+ idx = vec_all_le(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchqs
+ idx = vec_all_le(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchlqs
+
+ idx = vec_all_lt(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchqs
+ idx = vec_all_lt(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchlqs
+
+ idx = vec_any_eq(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+ idx = vec_any_eq(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+ idx = vec_any_eq(vblll, vblll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+
+ idx = vec_any_ne(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+ idx = vec_any_ne(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+ idx = vec_any_ne(vblll, vblll);
+ // CHECK: call { i128, i32 } @llvm.s390.vceqqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vceqqs
+
+ idx = vec_any_ge(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchqs
+ idx = vec_any_ge(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchlqs
+
+ idx = vec_any_gt(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchqs
+ idx = vec_any_gt(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchlqs
+
+ idx = vec_any_le(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchqs
+ idx = vec_any_le(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchlqs
+
+ idx = vec_any_lt(vslll, vslll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchqs
+ idx = vec_any_lt(vulll, vulll);
+ // CHECK: call { i128, i32 } @llvm.s390.vchlqs(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vchlqs
+}
+
+void test_integer(void) {
+ // CHECK-ASM-LABEL: test_integer
+
+ vulll = vec_cntlz(vulll);
+ // CHECK: call range(i128 0, 129) i128 @llvm.ctlz.i128(i128 %{{.*}}, i1 false)
+ // CHECK-ASM: vclzq
+ vulll = vec_cnttz(vulll);
+ // CHECK: call range(i128 0, 129) i128 @llvm.cttz.i128(i128 %{{.*}}, i1 false)
+ // CHECK-ASM: vctzq
+
+ vslll = vec_abs(vslll);
+ // CHECK-ASM: vlcq
+
+ vslll = vec_avg(vslll, vslll);
+ // CHECK: call i128 @llvm.s390.vavgq(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vavgq
+ vulll = vec_avg(vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vavglq(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vavglq
+
+ vsc = vec_evaluate(vsc, vsc, vsc, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vsc = vec_evaluate(vsc, vsc, vsc, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vuc = vec_evaluate(vuc, vuc, vuc, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vuc = vec_evaluate(vuc, vuc, vuc, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vbc = vec_evaluate(vbc, vbc, vbc, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vbc = vec_evaluate(vbc, vbc, vbc, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vss = vec_evaluate(vss, vss, vss, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vss = vec_evaluate(vss, vss, vss, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vus = vec_evaluate(vus, vus, vus, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vus = vec_evaluate(vus, vus, vus, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vbs = vec_evaluate(vbs, vbs, vbs, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vbs = vec_evaluate(vbs, vbs, vbs, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vsi = vec_evaluate(vsi, vsi, vsi, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vsi = vec_evaluate(vsi, vsi, vsi, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vui = vec_evaluate(vui, vui, vui, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vui = vec_evaluate(vui, vui, vui, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vbi = vec_evaluate(vbi, vbi, vbi, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vbi = vec_evaluate(vbi, vbi, vbi, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vsl = vec_evaluate(vsl, vsl, vsl, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vsl = vec_evaluate(vsl, vsl, vsl, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vul = vec_evaluate(vul, vul, vul, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vul = vec_evaluate(vul, vul, vul, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vbl = vec_evaluate(vbl, vbl, vbl, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vbl = vec_evaluate(vbl, vbl, vbl, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vslll = vec_evaluate(vslll, vslll, vslll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vslll = vec_evaluate(vslll, vslll, vslll, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vulll = vec_evaluate(vulll, vulll, vulll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vulll = vec_evaluate(vulll, vulll, vulll, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+ vblll = vec_evaluate(vblll, vblll, vblll, 0);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 0)
+ // CHECK-ASM: veval
+ vblll = vec_evaluate(vblll, vblll, vblll, 255);
+ // CHECK: call <16 x i8> @llvm.s390.veval(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i32 255)
+ // CHECK-ASM: veval
+
+ vslll = vec_max(vslll, vslll);
+ // CHECK-ASM: vmxq
+ vulll = vec_max(vulll, vulll);
+ // CHECK-ASM: vmxlq
+ vslll = vec_min(vslll, vslll);
+ // CHECK-ASM: vmnq
+ vulll = vec_min(vulll, vulll);
+ // CHECK-ASM: vmnlq
+
+ vsl = vec_mladd(vsl, vsl, vsl);
+ // CHECK-ASM: vmalg
+ vsl = vec_mladd(vul, vsl, vsl);
+ // CHECK-ASM: vmalg
+ vsl = vec_mladd(vsl, vul, vul);
+ // CHECK-ASM: vmalg
+ vul = vec_mladd(vul, vul, vul);
+ // CHECK-ASM: vmalg
+ vslll = vec_mladd(vslll, vslll, vslll);
+ // CHECK-ASM: vmalq
+ vslll = vec_mladd(vulll, vslll, vslll);
+ // CHECK-ASM: vmalq
+ vslll = vec_mladd(vslll, vulll, vulll);
+ // CHECK-ASM: vmalq
+ vulll = vec_mladd(vulll, vulll, vulll);
+ // CHECK-ASM: vmalq
+
+ vsl = vec_mhadd(vsl, vsl, vsl);
+ // CHECK: call <2 x i64> @llvm.s390.vmahg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vmahg
+ vul = vec_mhadd(vul, vul, vul);
+ // CHECK: call <2 x i64> @llvm.s390.vmalhg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vmalhg
+ vslll = vec_mhadd(vslll, vslll, vslll);
+ // CHECK: call i128 @llvm.s390.vmahq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vmahq
+ vulll = vec_mhadd(vulll, vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vmalhq(i128 %{{.*}}, i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vmalhq
+
+ vslll = vec_meadd(vsl, vsl, vslll);
+ // CHECK: call i128 @llvm.s390.vmaeg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vmaeg
+ vulll = vec_meadd(vul, vul, vulll);
+ // CHECK: call i128 @llvm.s390.vmaleg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vmaleg
+
+ vslll = vec_moadd(vsl, vsl, vslll);
+ // CHECK: call i128 @llvm.s390.vmaog(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vmaog
+ vulll = vec_moadd(vul, vul, vulll);
+ // CHECK: call i128 @llvm.s390.vmalog(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vmalog
+
+ vsl = vec_mulh(vsl, vsl);
+ // CHECK: call <2 x i64> @llvm.s390.vmhg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vmhg
+ vul = vec_mulh(vul, vul);
+ // CHECK: call <2 x i64> @llvm.s390.vmlhg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vmlhg
+ vslll = vec_mulh(vslll, vslll);
+ // CHECK: call i128 @llvm.s390.vmhq(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vmhq
+ vulll = vec_mulh(vulll, vulll);
+ // CHECK: call i128 @llvm.s390.vmlhq(i128 %{{.*}}, i128 %{{.*}})
+ // CHECK-ASM: vmlhq
+
+ vslll = vec_mule(vsl, vsl);
+ // CHECK: call i128 @llvm.s390.vmeg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vmeg
+ vulll = vec_mule(vul, vul);
+ // CHECK: call i128 @llvm.s390.vmleg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vmleg
+
+ vslll = vec_mulo(vsl, vsl);
+ // CHECK: call i128 @llvm.s390.vmog(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vmog
+ vulll = vec_mulo(vul, vul);
+ // CHECK: call i128 @llvm.s390.vmlog(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK-ASM: vmlog
+}
+
diff --git a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
index 8361ccef21022d..1e1926678ec33e 100644
--- a/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
+++ b/clang/test/CodeGen/SystemZ/systemz-abi-vector.c
@@ -18,6 +18,8 @@
// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch14 \
// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s
+// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \
+// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=CHECK-VECTOR %s
// Vector types
diff --git a/clang/test/CodeGen/SystemZ/systemz-abi.c b/clang/test/CodeGen/SystemZ/systemz-abi.c
index fd2b5d450cc643..58081bdc6cc2aa 100644
--- a/clang/test/CodeGen/SystemZ/systemz-abi.c
+++ b/clang/test/CodeGen/SystemZ/systemz-abi.c
@@ -24,6 +24,11 @@
// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch14 \
// RUN: -emit-llvm -o - %s -mfloat-abi soft | FileCheck %s \
// RUN: --check-prefixes=CHECK,SOFT-FLOAT
+// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \
+// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,HARD-FLOAT
+// RUN: %clang_cc1 -no-enable-noundef-analysis -triple s390x-linux-gnu -target-cpu arch15 \
+// RUN: -emit-llvm -o - %s -mfloat-abi soft | FileCheck %s \
+// RUN: --check-prefixes=CHECK,SOFT-FLOAT
// Scalar types
diff --git a/clang/test/CodeGen/SystemZ/zvector.c b/clang/test/CodeGen/SystemZ/zvector.c
index cbf6a9a1a1bf2a..a0b654d9acc9a4 100644
--- a/clang/test/CodeGen/SystemZ/zvector.c
+++ b/clang/test/CodeGen/SystemZ/zvector.c
@@ -19,6 +19,10 @@ volatile vector signed long long sl, sl2;
volatile vector unsigned long long ul, ul2;
volatile vector bool long long bl, bl2;
+volatile vector signed __int128 slll, slll2;
+volatile vector unsigned __int128 ulll, ulll2;
+volatile vector bool __int128 blll, blll2;
+
volatile vector double fd, fd2;
volatile int cnt;
@@ -42,8 +46,12 @@ volatile int cnt;
// CHECK-NEXT: store volatile <2 x i64> [[TMP6]], ptr @sl, align 8
// CHECK-NEXT: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: store volatile <2 x i64> [[TMP7]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: store volatile <2 x double> [[TMP8]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: store volatile <1 x i128> [[TMP8]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: store volatile <1 x i128> [[TMP9]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: store volatile <2 x double> [[TMP10]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_assign(void) {
@@ -60,6 +68,9 @@ void test_assign(void) {
sl = sl2;
ul = ul2;
+ slll = slll2;
+ ulll = ulll2;
+
fd = fd2;
}
@@ -82,8 +93,12 @@ void test_assign(void) {
// CHECK-NEXT: store volatile <2 x i64> [[TMP6]], ptr @sl, align 8
// CHECK-NEXT: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: store volatile <2 x i64> [[TMP7]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: store volatile <2 x double> [[TMP8]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: store volatile <1 x i128> [[TMP8]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: store volatile <1 x i128> [[TMP9]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: store volatile <2 x double> [[TMP10]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_pos(void) {
@@ -100,6 +115,9 @@ void test_pos(void) {
sl = +sl2;
ul = +ul2;
+ slll = +slll2;
+ ulll = +ulll2;
+
fd = +fd2;
}
@@ -118,8 +136,11 @@ void test_pos(void) {
// CHECK-NEXT: [[TMP3:%.*]] = load volatile <2 x i64>, ptr @sl2, align 8
// CHECK-NEXT: [[SUB3:%.*]] = sub <2 x i64> zeroinitializer, [[TMP3]]
// CHECK-NEXT: store volatile <2 x i64> [[SUB3]], ptr @sl, align 8
-// CHECK-NEXT: [[TMP4:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x double> [[TMP4]]
+// CHECK-NEXT: [[TMP4:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[SUB4:%.*]] = sub <1 x i128> zeroinitializer, [[TMP4]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB4]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP5:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[FNEG:%.*]] = fneg <2 x double> [[TMP5]]
// CHECK-NEXT: store volatile <2 x double> [[FNEG]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
@@ -129,6 +150,7 @@ void test_neg(void) {
ss = -ss2;
si = -si2;
sl = -sl2;
+ slll = -slll2;
fd = -fd2;
}
@@ -159,9 +181,15 @@ void test_neg(void) {
// CHECK-NEXT: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[INC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 1)
// CHECK-NEXT: store volatile <2 x i64> [[INC7]], ptr @ul2, align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00)
-// CHECK-NEXT: store volatile <2 x double> [[INC8]], ptr @fd2, align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[INC8:%.*]] = add <1 x i128> [[TMP8]], splat (i128 1)
+// CHECK-NEXT: store volatile <1 x i128> [[INC8]], ptr @slll2, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[INC9:%.*]] = add <1 x i128> [[TMP9]], splat (i128 1)
+// CHECK-NEXT: store volatile <1 x i128> [[INC9]], ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[INC10:%.*]] = fadd <2 x double> [[TMP10]], splat (double 1.000000e+00)
+// CHECK-NEXT: store volatile <2 x double> [[INC10]], ptr @fd2, align 8
// CHECK-NEXT: ret void
//
void test_preinc(void) {
@@ -178,6 +206,9 @@ void test_preinc(void) {
++sl2;
++ul2;
+ ++slll2;
+ ++ulll2;
+
++fd2;
}
@@ -208,9 +239,15 @@ void test_preinc(void) {
// CHECK-NEXT: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[INC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 1)
// CHECK-NEXT: store volatile <2 x i64> [[INC7]], ptr @ul2, align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[INC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double 1.000000e+00)
-// CHECK-NEXT: store volatile <2 x double> [[INC8]], ptr @fd2, align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[INC8:%.*]] = add <1 x i128> [[TMP8]], splat (i128 1)
+// CHECK-NEXT: store volatile <1 x i128> [[INC8]], ptr @slll2, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[INC9:%.*]] = add <1 x i128> [[TMP9]], splat (i128 1)
+// CHECK-NEXT: store volatile <1 x i128> [[INC9]], ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[INC10:%.*]] = fadd <2 x double> [[TMP10]], splat (double 1.000000e+00)
+// CHECK-NEXT: store volatile <2 x double> [[INC10]], ptr @fd2, align 8
// CHECK-NEXT: ret void
//
void test_postinc(void) {
@@ -227,6 +264,9 @@ void test_postinc(void) {
sl2++;
ul2++;
+ slll2++;
+ ulll2++;
+
fd2++;
}
@@ -257,9 +297,15 @@ void test_postinc(void) {
// CHECK-NEXT: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -1)
// CHECK-NEXT: store volatile <2 x i64> [[DEC7]], ptr @ul2, align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double -1.000000e+00)
-// CHECK-NEXT: store volatile <2 x double> [[DEC8]], ptr @fd2, align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[DEC8:%.*]] = add <1 x i128> [[TMP8]], splat (i128 18446744073709551615)
+// CHECK-NEXT: store volatile <1 x i128> [[DEC8]], ptr @slll2, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[DEC9:%.*]] = add <1 x i128> [[TMP9]], splat (i128 18446744073709551615)
+// CHECK-NEXT: store volatile <1 x i128> [[DEC9]], ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[DEC10:%.*]] = fadd <2 x double> [[TMP10]], splat (double -1.000000e+00)
+// CHECK-NEXT: store volatile <2 x double> [[DEC10]], ptr @fd2, align 8
// CHECK-NEXT: ret void
//
void test_predec(void) {
@@ -276,6 +322,9 @@ void test_predec(void) {
--sl2;
--ul2;
+ --slll2;
+ --ulll2;
+
--fd2;
}
@@ -306,9 +355,15 @@ void test_predec(void) {
// CHECK-NEXT: [[TMP7:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[DEC7:%.*]] = add <2 x i64> [[TMP7]], splat (i64 -1)
// CHECK-NEXT: store volatile <2 x i64> [[DEC7]], ptr @ul2, align 8
-// CHECK-NEXT: [[TMP8:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[DEC8:%.*]] = fadd <2 x double> [[TMP8]], splat (double -1.000000e+00)
-// CHECK-NEXT: store volatile <2 x double> [[DEC8]], ptr @fd2, align 8
+// CHECK-NEXT: [[TMP8:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[DEC8:%.*]] = add <1 x i128> [[TMP8]], splat (i128 18446744073709551615)
+// CHECK-NEXT: store volatile <1 x i128> [[DEC8]], ptr @slll2, align 8
+// CHECK-NEXT: [[TMP9:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[DEC9:%.*]] = add <1 x i128> [[TMP9]], splat (i128 18446744073709551615)
+// CHECK-NEXT: store volatile <1 x i128> [[DEC9]], ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP10:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[DEC10:%.*]] = fadd <2 x double> [[TMP10]], splat (double -1.000000e+00)
+// CHECK-NEXT: store volatile <2 x double> [[DEC10]], ptr @fd2, align 8
// CHECK-NEXT: ret void
//
void test_postdec(void) {
@@ -325,6 +380,9 @@ void test_postdec(void) {
sl2--;
ul2--;
+ slll2--;
+ ulll2--;
+
fd2--;
}
@@ -427,10 +485,34 @@ void test_postdec(void) {
// CHECK-NEXT: [[TMP47:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[ADD23:%.*]] = add <2 x i64> [[TMP46]], [[TMP47]]
// CHECK-NEXT: store volatile <2 x i64> [[ADD23]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP48:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP49:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[ADD24:%.*]] = fadd <2 x double> [[TMP48]], [[TMP49]]
-// CHECK-NEXT: store volatile <2 x double> [[ADD24]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[ADD24:%.*]] = add <1 x i128> [[TMP48]], [[TMP49]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD24]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP50:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP51:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[ADD25:%.*]] = add <1 x i128> [[TMP50]], [[TMP51]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD25]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP52:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP53:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[ADD26:%.*]] = add <1 x i128> [[TMP52]], [[TMP53]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD26]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP54:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP55:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[ADD27:%.*]] = add <1 x i128> [[TMP54]], [[TMP55]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD27]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[ADD28:%.*]] = add <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD28]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[ADD29:%.*]] = add <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD29]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[ADD30:%.*]] = fadd <2 x double> [[TMP60]], [[TMP61]]
+// CHECK-NEXT: store volatile <2 x double> [[ADD30]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_add(void) {
@@ -463,6 +545,13 @@ void test_add(void) {
ul = ul + bl2;
ul = bl + ul2;
+ slll = slll + slll2;
+ slll = slll + blll2;
+ slll = blll + slll2;
+ ulll = ulll + ulll2;
+ ulll = ulll + blll2;
+ ulll = blll + ulll2;
+
fd = fd + fd2;
}
@@ -533,10 +622,26 @@ void test_add(void) {
// CHECK-NEXT: [[TMP31:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[ADD15:%.*]] = add <2 x i64> [[TMP31]], [[TMP30]]
// CHECK-NEXT: store volatile <2 x i64> [[ADD15]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[TMP33:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[ADD16:%.*]] = fadd <2 x double> [[TMP33]], [[TMP32]]
-// CHECK-NEXT: store volatile <2 x double> [[ADD16]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP33:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[ADD16:%.*]] = add <1 x i128> [[TMP33]], [[TMP32]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD16]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP34:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP35:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[ADD17:%.*]] = add <1 x i128> [[TMP35]], [[TMP34]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD17]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP36:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[ADD18:%.*]] = add <1 x i128> [[TMP37]], [[TMP36]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD18]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP38:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP39:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[ADD19:%.*]] = add <1 x i128> [[TMP39]], [[TMP38]]
+// CHECK-NEXT: store volatile <1 x i128> [[ADD19]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[ADD20:%.*]] = fadd <2 x double> [[TMP41]], [[TMP40]]
+// CHECK-NEXT: store volatile <2 x double> [[ADD20]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_add_assign(void) {
@@ -561,6 +666,11 @@ void test_add_assign(void) {
ul += ul2;
ul += bl2;
+ slll += slll2;
+ slll += blll2;
+ ulll += ulll2;
+ ulll += blll2;
+
fd += fd2;
}
@@ -663,10 +773,34 @@ void test_add_assign(void) {
// CHECK-NEXT: [[TMP47:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[SUB23:%.*]] = sub <2 x i64> [[TMP46]], [[TMP47]]
// CHECK-NEXT: store volatile <2 x i64> [[SUB23]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP48:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP49:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[SUB24:%.*]] = fsub <2 x double> [[TMP48]], [[TMP49]]
-// CHECK-NEXT: store volatile <2 x double> [[SUB24]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[SUB24:%.*]] = sub <1 x i128> [[TMP48]], [[TMP49]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB24]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP50:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP51:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[SUB25:%.*]] = sub <1 x i128> [[TMP50]], [[TMP51]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB25]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP52:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP53:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[SUB26:%.*]] = sub <1 x i128> [[TMP52]], [[TMP53]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB26]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP54:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP55:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[SUB27:%.*]] = sub <1 x i128> [[TMP54]], [[TMP55]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB27]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[SUB28:%.*]] = sub <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB28]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[SUB29:%.*]] = sub <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB29]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[SUB30:%.*]] = fsub <2 x double> [[TMP60]], [[TMP61]]
+// CHECK-NEXT: store volatile <2 x double> [[SUB30]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_sub(void) {
@@ -699,6 +833,13 @@ void test_sub(void) {
ul = ul - bl2;
ul = bl - ul2;
+ slll = slll - slll2;
+ slll = slll - blll2;
+ slll = blll - slll2;
+ ulll = ulll - ulll2;
+ ulll = ulll - blll2;
+ ulll = blll - ulll2;
+
fd = fd - fd2;
}
@@ -769,10 +910,26 @@ void test_sub(void) {
// CHECK-NEXT: [[TMP31:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[SUB15:%.*]] = sub <2 x i64> [[TMP31]], [[TMP30]]
// CHECK-NEXT: store volatile <2 x i64> [[SUB15]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP32:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[TMP33:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[SUB16:%.*]] = fsub <2 x double> [[TMP33]], [[TMP32]]
-// CHECK-NEXT: store volatile <2 x double> [[SUB16]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP32:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP33:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SUB16:%.*]] = sub <1 x i128> [[TMP33]], [[TMP32]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB16]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP34:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP35:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SUB17:%.*]] = sub <1 x i128> [[TMP35]], [[TMP34]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB17]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP36:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP37:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SUB18:%.*]] = sub <1 x i128> [[TMP37]], [[TMP36]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB18]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP38:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP39:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SUB19:%.*]] = sub <1 x i128> [[TMP39]], [[TMP38]]
+// CHECK-NEXT: store volatile <1 x i128> [[SUB19]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[SUB20:%.*]] = fsub <2 x double> [[TMP41]], [[TMP40]]
+// CHECK-NEXT: store volatile <2 x double> [[SUB20]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_sub_assign(void) {
@@ -797,6 +954,11 @@ void test_sub_assign(void) {
ul -= ul2;
ul -= bl2;
+ slll -= slll2;
+ slll -= blll2;
+ ulll -= ulll2;
+ ulll -= blll2;
+
fd -= fd2;
}
@@ -835,10 +997,18 @@ void test_sub_assign(void) {
// CHECK-NEXT: [[TMP15:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[MUL7:%.*]] = mul <2 x i64> [[TMP14]], [[TMP15]]
// CHECK-NEXT: store volatile <2 x i64> [[MUL7]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[MUL8:%.*]] = fmul <2 x double> [[TMP16]], [[TMP17]]
-// CHECK-NEXT: store volatile <2 x double> [[MUL8]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[MUL8:%.*]] = mul <1 x i128> [[TMP16]], [[TMP17]]
+// CHECK-NEXT: store volatile <1 x i128> [[MUL8]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP19:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[MUL9:%.*]] = mul <1 x i128> [[TMP18]], [[TMP19]]
+// CHECK-NEXT: store volatile <1 x i128> [[MUL9]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[MUL10:%.*]] = fmul <2 x double> [[TMP20]], [[TMP21]]
+// CHECK-NEXT: store volatile <2 x double> [[MUL10]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_mul(void) {
@@ -855,6 +1025,9 @@ void test_mul(void) {
sl = sl * sl2;
ul = ul * ul2;
+ slll = slll * slll2;
+ ulll = ulll * ulll2;
+
fd = fd * fd2;
}
@@ -893,10 +1066,18 @@ void test_mul(void) {
// CHECK-NEXT: [[TMP15:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[MUL7:%.*]] = mul <2 x i64> [[TMP15]], [[TMP14]]
// CHECK-NEXT: store volatile <2 x i64> [[MUL7]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[MUL8:%.*]] = fmul <2 x double> [[TMP17]], [[TMP16]]
-// CHECK-NEXT: store volatile <2 x double> [[MUL8]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[MUL8:%.*]] = mul <1 x i128> [[TMP17]], [[TMP16]]
+// CHECK-NEXT: store volatile <1 x i128> [[MUL8]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP19:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[MUL9:%.*]] = mul <1 x i128> [[TMP19]], [[TMP18]]
+// CHECK-NEXT: store volatile <1 x i128> [[MUL9]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[MUL10:%.*]] = fmul <2 x double> [[TMP21]], [[TMP20]]
+// CHECK-NEXT: store volatile <2 x double> [[MUL10]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_mul_assign(void) {
@@ -913,6 +1094,9 @@ void test_mul_assign(void) {
sl *= sl2;
ul *= ul2;
+ slll *= slll2;
+ ulll *= ulll2;
+
fd *= fd2;
}
@@ -951,10 +1135,18 @@ void test_mul_assign(void) {
// CHECK-NEXT: [[TMP15:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[DIV7:%.*]] = udiv <2 x i64> [[TMP14]], [[TMP15]]
// CHECK-NEXT: store volatile <2 x i64> [[DIV7]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[DIV8:%.*]] = fdiv <2 x double> [[TMP16]], [[TMP17]]
-// CHECK-NEXT: store volatile <2 x double> [[DIV8]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[DIV8:%.*]] = sdiv <1 x i128> [[TMP16]], [[TMP17]]
+// CHECK-NEXT: store volatile <1 x i128> [[DIV8]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP19:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[DIV9:%.*]] = udiv <1 x i128> [[TMP18]], [[TMP19]]
+// CHECK-NEXT: store volatile <1 x i128> [[DIV9]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[DIV10:%.*]] = fdiv <2 x double> [[TMP20]], [[TMP21]]
+// CHECK-NEXT: store volatile <2 x double> [[DIV10]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_div(void) {
@@ -971,6 +1163,9 @@ void test_div(void) {
sl = sl / sl2;
ul = ul / ul2;
+ slll = slll / slll2;
+ ulll = ulll / ulll2;
+
fd = fd / fd2;
}
@@ -1009,10 +1204,18 @@ void test_div(void) {
// CHECK-NEXT: [[TMP15:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[DIV7:%.*]] = udiv <2 x i64> [[TMP15]], [[TMP14]]
// CHECK-NEXT: store volatile <2 x i64> [[DIV7]], ptr @ul, align 8
-// CHECK-NEXT: [[TMP16:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[TMP17:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[DIV8:%.*]] = fdiv <2 x double> [[TMP17]], [[TMP16]]
-// CHECK-NEXT: store volatile <2 x double> [[DIV8]], ptr @fd, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[DIV8:%.*]] = sdiv <1 x i128> [[TMP17]], [[TMP16]]
+// CHECK-NEXT: store volatile <1 x i128> [[DIV8]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP19:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[DIV9:%.*]] = udiv <1 x i128> [[TMP19]], [[TMP18]]
+// CHECK-NEXT: store volatile <1 x i128> [[DIV9]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP20:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[TMP21:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[DIV10:%.*]] = fdiv <2 x double> [[TMP21]], [[TMP20]]
+// CHECK-NEXT: store volatile <2 x double> [[DIV10]], ptr @fd, align 8
// CHECK-NEXT: ret void
//
void test_div_assign(void) {
@@ -1029,6 +1232,9 @@ void test_div_assign(void) {
sl /= sl2;
ul /= ul2;
+ slll /= slll2;
+ ulll /= ulll2;
+
fd /= fd2;
}
@@ -1067,6 +1273,14 @@ void test_div_assign(void) {
// CHECK-NEXT: [[TMP15:%.*]] = load volatile <2 x i64>, ptr @ul2, align 8
// CHECK-NEXT: [[REM7:%.*]] = urem <2 x i64> [[TMP14]], [[TMP15]]
// CHECK-NEXT: store volatile <2 x i64> [[REM7]], ptr @ul, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[REM8:%.*]] = srem <1 x i128> [[TMP16]], [[TMP17]]
+// CHECK-NEXT: store volatile <1 x i128> [[REM8]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP19:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[REM9:%.*]] = urem <1 x i128> [[TMP18]], [[TMP19]]
+// CHECK-NEXT: store volatile <1 x i128> [[REM9]], ptr @ulll, align 8
// CHECK-NEXT: ret void
//
void test_rem(void) {
@@ -1082,6 +1296,9 @@ void test_rem(void) {
sl = sl % sl2;
ul = ul % ul2;
+
+ slll = slll % slll2;
+ ulll = ulll % ulll2;
}
// CHECK-LABEL: define dso_local void @test_rem_assign(
@@ -1119,6 +1336,14 @@ void test_rem(void) {
// CHECK-NEXT: [[TMP15:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[REM7:%.*]] = urem <2 x i64> [[TMP15]], [[TMP14]]
// CHECK-NEXT: store volatile <2 x i64> [[REM7]], ptr @ul, align 8
+// CHECK-NEXT: [[TMP16:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP17:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[REM8:%.*]] = srem <1 x i128> [[TMP17]], [[TMP16]]
+// CHECK-NEXT: store volatile <1 x i128> [[REM8]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP18:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP19:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[REM9:%.*]] = urem <1 x i128> [[TMP19]], [[TMP18]]
+// CHECK-NEXT: store volatile <1 x i128> [[REM9]], ptr @ulll, align 8
// CHECK-NEXT: ret void
//
void test_rem_assign(void) {
@@ -1134,6 +1359,9 @@ void test_rem_assign(void) {
sl %= sl2;
ul %= ul2;
+
+ slll %= slll2;
+ ulll %= ulll2;
}
// CHECK-LABEL: define dso_local void @test_not(
@@ -1175,6 +1403,15 @@ void test_rem_assign(void) {
// CHECK-NEXT: [[TMP11:%.*]] = load volatile <2 x i64>, ptr @bl2, align 8
// CHECK-NEXT: [[NOT11:%.*]] = xor <2 x i64> [[TMP11]], splat (i64 -1)
// CHECK-NEXT: store volatile <2 x i64> [[NOT11]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP12:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[NOT12:%.*]] = xor <1 x i128> [[TMP12]], splat (i128 -1)
+// CHECK-NEXT: store volatile <1 x i128> [[NOT12]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP13:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[NOT13:%.*]] = xor <1 x i128> [[TMP13]], splat (i128 -1)
+// CHECK-NEXT: store volatile <1 x i128> [[NOT13]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP14:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[NOT14:%.*]] = xor <1 x i128> [[TMP14]], splat (i128 -1)
+// CHECK-NEXT: store volatile <1 x i128> [[NOT14]], ptr @blll, align 8
// CHECK-NEXT: ret void
//
void test_not(void) {
@@ -1194,6 +1431,10 @@ void test_not(void) {
sl = ~sl2;
ul = ~ul2;
bl = ~bl2;
+
+ slll = ~slll2;
+ ulll = ~ulll2;
+ blll = ~blll2;
}
// CHECK-LABEL: define dso_local void @test_and(
@@ -1311,6 +1552,34 @@ void test_not(void) {
// CHECK-NEXT: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @bl2, align 8
// CHECK-NEXT: [[AND27:%.*]] = and <2 x i64> [[TMP54]], [[TMP55]]
// CHECK-NEXT: store volatile <2 x i64> [[AND27]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[AND28:%.*]] = and <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND28]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[AND29:%.*]] = and <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND29]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[AND30:%.*]] = and <1 x i128> [[TMP60]], [[TMP61]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND30]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[AND31:%.*]] = and <1 x i128> [[TMP62]], [[TMP63]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND31]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[AND32:%.*]] = and <1 x i128> [[TMP64]], [[TMP65]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND32]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[AND33:%.*]] = and <1 x i128> [[TMP66]], [[TMP67]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND33]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[AND34:%.*]] = and <1 x i128> [[TMP68]], [[TMP69]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND34]], ptr @blll, align 8
// CHECK-NEXT: ret void
//
void test_and(void) {
@@ -1346,6 +1615,14 @@ void test_and(void) {
ul = ul & bl2;
ul = bl & ul2;
bl = bl & bl2;
+
+ slll = slll & slll2;
+ slll = slll & blll2;
+ slll = blll & slll2;
+ ulll = ulll & ulll2;
+ ulll = ulll & blll2;
+ ulll = blll & ulll2;
+ blll = blll & blll2;
}
// CHECK-LABEL: define dso_local void @test_and_assign(
@@ -1431,6 +1708,26 @@ void test_and(void) {
// CHECK-NEXT: [[TMP39:%.*]] = load volatile <2 x i64>, ptr @bl, align 8
// CHECK-NEXT: [[AND19:%.*]] = and <2 x i64> [[TMP39]], [[TMP38]]
// CHECK-NEXT: store volatile <2 x i64> [[AND19]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[AND20:%.*]] = and <1 x i128> [[TMP41]], [[TMP40]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND20]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP43:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[AND21:%.*]] = and <1 x i128> [[TMP43]], [[TMP42]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND21]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP44:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP45:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[AND22:%.*]] = and <1 x i128> [[TMP45]], [[TMP44]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND22]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP46:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[AND23:%.*]] = and <1 x i128> [[TMP47]], [[TMP46]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND23]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[AND24:%.*]] = and <1 x i128> [[TMP49]], [[TMP48]]
+// CHECK-NEXT: store volatile <1 x i128> [[AND24]], ptr @blll, align 8
// CHECK-NEXT: ret void
//
void test_and_assign(void) {
@@ -1458,6 +1755,12 @@ void test_and_assign(void) {
ul &= ul2;
ul &= bl2;
bl &= bl2;
+
+ slll &= slll2;
+ slll &= blll2;
+ ulll &= ulll2;
+ ulll &= blll2;
+ blll &= blll2;
}
// CHECK-LABEL: define dso_local void @test_or(
@@ -1575,6 +1878,34 @@ void test_and_assign(void) {
// CHECK-NEXT: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @bl2, align 8
// CHECK-NEXT: [[OR27:%.*]] = or <2 x i64> [[TMP54]], [[TMP55]]
// CHECK-NEXT: store volatile <2 x i64> [[OR27]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[OR28:%.*]] = or <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR28]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[OR29:%.*]] = or <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR29]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[OR30:%.*]] = or <1 x i128> [[TMP60]], [[TMP61]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR30]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[OR31:%.*]] = or <1 x i128> [[TMP62]], [[TMP63]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR31]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[OR32:%.*]] = or <1 x i128> [[TMP64]], [[TMP65]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR32]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[OR33:%.*]] = or <1 x i128> [[TMP66]], [[TMP67]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR33]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[OR34:%.*]] = or <1 x i128> [[TMP68]], [[TMP69]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR34]], ptr @blll, align 8
// CHECK-NEXT: ret void
//
void test_or(void) {
@@ -1610,6 +1941,14 @@ void test_or(void) {
ul = ul | bl2;
ul = bl | ul2;
bl = bl | bl2;
+
+ slll = slll | slll2;
+ slll = slll | blll2;
+ slll = blll | slll2;
+ ulll = ulll | ulll2;
+ ulll = ulll | blll2;
+ ulll = blll | ulll2;
+ blll = blll | blll2;
}
// CHECK-LABEL: define dso_local void @test_or_assign(
@@ -1695,6 +2034,26 @@ void test_or(void) {
// CHECK-NEXT: [[TMP39:%.*]] = load volatile <2 x i64>, ptr @bl, align 8
// CHECK-NEXT: [[OR19:%.*]] = or <2 x i64> [[TMP39]], [[TMP38]]
// CHECK-NEXT: store volatile <2 x i64> [[OR19]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[OR20:%.*]] = or <1 x i128> [[TMP41]], [[TMP40]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR20]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP43:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[OR21:%.*]] = or <1 x i128> [[TMP43]], [[TMP42]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR21]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP44:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP45:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[OR22:%.*]] = or <1 x i128> [[TMP45]], [[TMP44]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR22]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP46:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[OR23:%.*]] = or <1 x i128> [[TMP47]], [[TMP46]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR23]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[OR24:%.*]] = or <1 x i128> [[TMP49]], [[TMP48]]
+// CHECK-NEXT: store volatile <1 x i128> [[OR24]], ptr @blll, align 8
// CHECK-NEXT: ret void
//
void test_or_assign(void) {
@@ -1722,6 +2081,12 @@ void test_or_assign(void) {
ul |= ul2;
ul |= bl2;
bl |= bl2;
+
+ slll |= slll2;
+ slll |= blll2;
+ ulll |= ulll2;
+ ulll |= blll2;
+ blll |= blll2;
}
// CHECK-LABEL: define dso_local void @test_xor(
@@ -1839,6 +2204,34 @@ void test_or_assign(void) {
// CHECK-NEXT: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @bl2, align 8
// CHECK-NEXT: [[XOR27:%.*]] = xor <2 x i64> [[TMP54]], [[TMP55]]
// CHECK-NEXT: store volatile <2 x i64> [[XOR27]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[XOR28:%.*]] = xor <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR28]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[XOR29:%.*]] = xor <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR29]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[XOR30:%.*]] = xor <1 x i128> [[TMP60]], [[TMP61]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR30]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[XOR31:%.*]] = xor <1 x i128> [[TMP62]], [[TMP63]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR31]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[XOR32:%.*]] = xor <1 x i128> [[TMP64]], [[TMP65]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR32]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[XOR33:%.*]] = xor <1 x i128> [[TMP66]], [[TMP67]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR33]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[XOR34:%.*]] = xor <1 x i128> [[TMP68]], [[TMP69]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR34]], ptr @blll, align 8
// CHECK-NEXT: ret void
//
void test_xor(void) {
@@ -1874,6 +2267,14 @@ void test_xor(void) {
ul = ul ^ bl2;
ul = bl ^ ul2;
bl = bl ^ bl2;
+
+ slll = slll ^ slll2;
+ slll = slll ^ blll2;
+ slll = blll ^ slll2;
+ ulll = ulll ^ ulll2;
+ ulll = ulll ^ blll2;
+ ulll = blll ^ ulll2;
+ blll = blll ^ blll2;
}
// CHECK-LABEL: define dso_local void @test_xor_assign(
@@ -1959,6 +2360,26 @@ void test_xor(void) {
// CHECK-NEXT: [[TMP39:%.*]] = load volatile <2 x i64>, ptr @bl, align 8
// CHECK-NEXT: [[XOR19:%.*]] = xor <2 x i64> [[TMP39]], [[TMP38]]
// CHECK-NEXT: store volatile <2 x i64> [[XOR19]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP40:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP41:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[XOR20:%.*]] = xor <1 x i128> [[TMP41]], [[TMP40]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR20]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP42:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP43:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[XOR21:%.*]] = xor <1 x i128> [[TMP43]], [[TMP42]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR21]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP44:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP45:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[XOR22:%.*]] = xor <1 x i128> [[TMP45]], [[TMP44]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR22]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP46:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP47:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[XOR23:%.*]] = xor <1 x i128> [[TMP47]], [[TMP46]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR23]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP48:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[TMP49:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[XOR24:%.*]] = xor <1 x i128> [[TMP49]], [[TMP48]]
+// CHECK-NEXT: store volatile <1 x i128> [[XOR24]], ptr @blll, align 8
// CHECK-NEXT: ret void
//
void test_xor_assign(void) {
@@ -1986,6 +2407,12 @@ void test_xor_assign(void) {
ul ^= ul2;
ul ^= bl2;
bl ^= bl2;
+
+ slll ^= slll2;
+ slll ^= blll2;
+ ulll ^= ulll2;
+ ulll ^= blll2;
+ blll ^= blll2;
}
// CHECK-LABEL: define dso_local void @test_sl(
@@ -2133,6 +2560,42 @@ void test_xor_assign(void) {
// CHECK-NEXT: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], splat (i64 5)
// CHECK-NEXT: store volatile <2 x i64> [[SHL50]], ptr @ul, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[SHL51:%.*]] = shl <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL51]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[SHL52:%.*]] = shl <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL52]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile i32, ptr @cnt, align 4
+// CHECK-NEXT: [[SPLAT_SPLATINSERT53:%.*]] = insertelement <1 x i32> poison, i32 [[TMP61]], i64 0
+// CHECK-NEXT: [[SPLAT_SPLAT54:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT53]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT: [[SH_PROM55:%.*]] = zext <1 x i32> [[SPLAT_SPLAT54]] to <1 x i128>
+// CHECK-NEXT: [[SHL56:%.*]] = shl <1 x i128> [[TMP60]], [[SH_PROM55]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL56]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SHL57:%.*]] = shl <1 x i128> [[TMP62]], splat (i128 5)
+// CHECK-NEXT: store volatile <1 x i128> [[SHL57]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[SHL58:%.*]] = shl <1 x i128> [[TMP63]], [[TMP64]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL58]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[SHL59:%.*]] = shl <1 x i128> [[TMP65]], [[TMP66]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL59]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile i32, ptr @cnt, align 4
+// CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x i32> poison, i32 [[TMP68]], i64 0
+// CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT60]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT: [[SH_PROM62:%.*]] = zext <1 x i32> [[SPLAT_SPLAT61]] to <1 x i128>
+// CHECK-NEXT: [[SHL63:%.*]] = shl <1 x i128> [[TMP67]], [[SH_PROM62]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL63]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SHL64:%.*]] = shl <1 x i128> [[TMP69]], splat (i128 5)
+// CHECK-NEXT: store volatile <1 x i128> [[SHL64]], ptr @ulll, align 8
// CHECK-NEXT: ret void
//
void test_sl(void) {
@@ -2172,6 +2635,15 @@ void test_sl(void) {
ul = ul << ul2;
ul = ul << cnt;
ul = ul << 5;
+
+ slll = slll << slll2;
+ slll = slll << ulll2;
+ slll = slll << cnt;
+ slll = slll << 5;
+ ulll = ulll << slll2;
+ ulll = ulll << ulll2;
+ ulll = ulll << cnt;
+ ulll = ulll << 5;
}
// CHECK-LABEL: define dso_local void @test_sl_assign(
@@ -2319,6 +2791,42 @@ void test_sl(void) {
// CHECK-NEXT: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[SHL50:%.*]] = shl <2 x i64> [[TMP55]], splat (i64 5)
// CHECK-NEXT: store volatile <2 x i64> [[SHL50]], ptr @ul, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SHL51:%.*]] = shl <1 x i128> [[TMP57]], [[TMP56]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL51]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SHL52:%.*]] = shl <1 x i128> [[TMP59]], [[TMP58]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL52]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile i32, ptr @cnt, align 4
+// CHECK-NEXT: [[SPLAT_SPLATINSERT53:%.*]] = insertelement <1 x i32> poison, i32 [[TMP60]], i64 0
+// CHECK-NEXT: [[SPLAT_SPLAT54:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT53]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SH_PROM55:%.*]] = zext <1 x i32> [[SPLAT_SPLAT54]] to <1 x i128>
+// CHECK-NEXT: [[SHL56:%.*]] = shl <1 x i128> [[TMP61]], [[SH_PROM55]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL56]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SHL57:%.*]] = shl <1 x i128> [[TMP62]], splat (i128 5)
+// CHECK-NEXT: store volatile <1 x i128> [[SHL57]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SHL58:%.*]] = shl <1 x i128> [[TMP64]], [[TMP63]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL58]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SHL59:%.*]] = shl <1 x i128> [[TMP66]], [[TMP65]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL59]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile i32, ptr @cnt, align 4
+// CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x i32> poison, i32 [[TMP67]], i64 0
+// CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT60]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SH_PROM62:%.*]] = zext <1 x i32> [[SPLAT_SPLAT61]] to <1 x i128>
+// CHECK-NEXT: [[SHL63:%.*]] = shl <1 x i128> [[TMP68]], [[SH_PROM62]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHL63]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SHL64:%.*]] = shl <1 x i128> [[TMP69]], splat (i128 5)
+// CHECK-NEXT: store volatile <1 x i128> [[SHL64]], ptr @ulll, align 8
// CHECK-NEXT: ret void
//
void test_sl_assign(void) {
@@ -2358,6 +2866,15 @@ void test_sl_assign(void) {
ul <<= ul2;
ul <<= cnt;
ul <<= 5;
+
+ slll <<= slll2;
+ slll <<= ulll2;
+ slll <<= cnt;
+ slll <<= 5;
+ ulll <<= slll2;
+ ulll <<= ulll2;
+ ulll <<= cnt;
+ ulll <<= 5;
}
// CHECK-LABEL: define dso_local void @test_sr(
@@ -2505,6 +3022,42 @@ void test_sl_assign(void) {
// CHECK-NEXT: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], splat (i64 5)
// CHECK-NEXT: store volatile <2 x i64> [[SHR50]], ptr @ul, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[SHR51:%.*]] = ashr <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR51]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[SHR52:%.*]] = ashr <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR52]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile i32, ptr @cnt, align 4
+// CHECK-NEXT: [[SPLAT_SPLATINSERT53:%.*]] = insertelement <1 x i32> poison, i32 [[TMP61]], i64 0
+// CHECK-NEXT: [[SPLAT_SPLAT54:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT53]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT: [[SH_PROM55:%.*]] = zext <1 x i32> [[SPLAT_SPLAT54]] to <1 x i128>
+// CHECK-NEXT: [[SHR56:%.*]] = ashr <1 x i128> [[TMP60]], [[SH_PROM55]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR56]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SHR57:%.*]] = ashr <1 x i128> [[TMP62]], splat (i128 5)
+// CHECK-NEXT: store volatile <1 x i128> [[SHR57]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[SHR58:%.*]] = lshr <1 x i128> [[TMP63]], [[TMP64]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR58]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[SHR59:%.*]] = lshr <1 x i128> [[TMP65]], [[TMP66]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR59]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile i32, ptr @cnt, align 4
+// CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x i32> poison, i32 [[TMP68]], i64 0
+// CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT60]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT: [[SH_PROM62:%.*]] = zext <1 x i32> [[SPLAT_SPLAT61]] to <1 x i128>
+// CHECK-NEXT: [[SHR63:%.*]] = lshr <1 x i128> [[TMP67]], [[SH_PROM62]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR63]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SHR64:%.*]] = lshr <1 x i128> [[TMP69]], splat (i128 5)
+// CHECK-NEXT: store volatile <1 x i128> [[SHR64]], ptr @ulll, align 8
// CHECK-NEXT: ret void
//
void test_sr(void) {
@@ -2544,6 +3097,15 @@ void test_sr(void) {
ul = ul >> ul2;
ul = ul >> cnt;
ul = ul >> 5;
+
+ slll = slll >> slll2;
+ slll = slll >> ulll2;
+ slll = slll >> cnt;
+ slll = slll >> 5;
+ ulll = ulll >> slll2;
+ ulll = ulll >> ulll2;
+ ulll = ulll >> cnt;
+ ulll = ulll >> 5;
}
// CHECK-LABEL: define dso_local void @test_sr_assign(
@@ -2691,6 +3253,42 @@ void test_sr(void) {
// CHECK-NEXT: [[TMP55:%.*]] = load volatile <2 x i64>, ptr @ul, align 8
// CHECK-NEXT: [[SHR50:%.*]] = lshr <2 x i64> [[TMP55]], splat (i64 5)
// CHECK-NEXT: store volatile <2 x i64> [[SHR50]], ptr @ul, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SHR51:%.*]] = ashr <1 x i128> [[TMP57]], [[TMP56]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR51]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SHR52:%.*]] = ashr <1 x i128> [[TMP59]], [[TMP58]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR52]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile i32, ptr @cnt, align 4
+// CHECK-NEXT: [[SPLAT_SPLATINSERT53:%.*]] = insertelement <1 x i32> poison, i32 [[TMP60]], i64 0
+// CHECK-NEXT: [[SPLAT_SPLAT54:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT53]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SH_PROM55:%.*]] = zext <1 x i32> [[SPLAT_SPLAT54]] to <1 x i128>
+// CHECK-NEXT: [[SHR56:%.*]] = ashr <1 x i128> [[TMP61]], [[SH_PROM55]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR56]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[SHR57:%.*]] = ashr <1 x i128> [[TMP62]], splat (i128 5)
+// CHECK-NEXT: store volatile <1 x i128> [[SHR57]], ptr @slll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SHR58:%.*]] = lshr <1 x i128> [[TMP64]], [[TMP63]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR58]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SHR59:%.*]] = lshr <1 x i128> [[TMP66]], [[TMP65]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR59]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile i32, ptr @cnt, align 4
+// CHECK-NEXT: [[SPLAT_SPLATINSERT60:%.*]] = insertelement <1 x i32> poison, i32 [[TMP67]], i64 0
+// CHECK-NEXT: [[SPLAT_SPLAT61:%.*]] = shufflevector <1 x i32> [[SPLAT_SPLATINSERT60]], <1 x i32> poison, <1 x i32> zeroinitializer
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SH_PROM62:%.*]] = zext <1 x i32> [[SPLAT_SPLAT61]] to <1 x i128>
+// CHECK-NEXT: [[SHR63:%.*]] = lshr <1 x i128> [[TMP68]], [[SH_PROM62]]
+// CHECK-NEXT: store volatile <1 x i128> [[SHR63]], ptr @ulll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[SHR64:%.*]] = lshr <1 x i128> [[TMP69]], splat (i128 5)
+// CHECK-NEXT: store volatile <1 x i128> [[SHR64]], ptr @ulll, align 8
// CHECK-NEXT: ret void
//
void test_sr_assign(void) {
@@ -2730,6 +3328,15 @@ void test_sr_assign(void) {
ul >>= ul2;
ul >>= cnt;
ul >>= 5;
+
+ slll >>= slll2;
+ slll >>= ulll2;
+ slll >>= cnt;
+ slll >>= 5;
+ ulll >>= slll2;
+ ulll >>= ulll2;
+ ulll >>= cnt;
+ ulll >>= 5;
}
@@ -2876,11 +3483,46 @@ void test_sr_assign(void) {
// CHECK-NEXT: [[CMP53:%.*]] = icmp eq <2 x i64> [[TMP54]], [[TMP55]]
// CHECK-NEXT: [[SEXT54:%.*]] = sext <2 x i1> [[CMP53]] to <2 x i64>
// CHECK-NEXT: store volatile <2 x i64> [[SEXT54]], ptr @bl, align 8
-// CHECK-NEXT: [[TMP56:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP57:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[CMP55:%.*]] = fcmp oeq <2 x double> [[TMP56]], [[TMP57]]
-// CHECK-NEXT: [[SEXT56:%.*]] = sext <2 x i1> [[CMP55]] to <2 x i64>
-// CHECK-NEXT: store volatile <2 x i64> [[SEXT56]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[CMP55:%.*]] = icmp eq <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: [[SEXT56:%.*]] = sext <1 x i1> [[CMP55]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT56]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP57:%.*]] = icmp eq <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: [[SEXT58:%.*]] = sext <1 x i1> [[CMP57]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT58]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[CMP59:%.*]] = icmp eq <1 x i128> [[TMP60]], [[TMP61]]
+// CHECK-NEXT: [[SEXT60:%.*]] = sext <1 x i1> [[CMP59]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT60]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[CMP61:%.*]] = icmp eq <1 x i128> [[TMP62]], [[TMP63]]
+// CHECK-NEXT: [[SEXT62:%.*]] = sext <1 x i1> [[CMP61]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT62]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP63:%.*]] = icmp eq <1 x i128> [[TMP64]], [[TMP65]]
+// CHECK-NEXT: [[SEXT64:%.*]] = sext <1 x i1> [[CMP63]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT64]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[CMP65:%.*]] = icmp eq <1 x i128> [[TMP66]], [[TMP67]]
+// CHECK-NEXT: [[SEXT66:%.*]] = sext <1 x i1> [[CMP65]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT66]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP67:%.*]] = icmp eq <1 x i128> [[TMP68]], [[TMP69]]
+// CHECK-NEXT: [[SEXT68:%.*]] = sext <1 x i1> [[CMP67]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT68]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP70:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP71:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[CMP69:%.*]] = fcmp oeq <2 x double> [[TMP70]], [[TMP71]]
+// CHECK-NEXT: [[SEXT70:%.*]] = sext <2 x i1> [[CMP69]] to <2 x i64>
+// CHECK-NEXT: store volatile <2 x i64> [[SEXT70]], ptr @bl, align 8
// CHECK-NEXT: ret void
//
void test_cmpeq(void) {
@@ -2917,6 +3559,14 @@ void test_cmpeq(void) {
bl = bl == ul2;
bl = bl == bl2;
+ blll = slll == slll2;
+ blll = slll == blll2;
+ blll = blll == slll2;
+ blll = ulll == ulll2;
+ blll = ulll == blll2;
+ blll = blll == ulll2;
+ blll = blll == blll2;
+
bl = fd == fd2;
}
@@ -3063,11 +3713,46 @@ void test_cmpeq(void) {
// CHECK-NEXT: [[CMP53:%.*]] = icmp ne <2 x i64> [[TMP54]], [[TMP55]]
// CHECK-NEXT: [[SEXT54:%.*]] = sext <2 x i1> [[CMP53]] to <2 x i64>
// CHECK-NEXT: store volatile <2 x i64> [[SEXT54]], ptr @bl, align 8
-// CHECK-NEXT: [[TMP56:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP57:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[CMP55:%.*]] = fcmp une <2 x double> [[TMP56]], [[TMP57]]
-// CHECK-NEXT: [[SEXT56:%.*]] = sext <2 x i1> [[CMP55]] to <2 x i64>
-// CHECK-NEXT: store volatile <2 x i64> [[SEXT56]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP56:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP57:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[CMP55:%.*]] = icmp ne <1 x i128> [[TMP56]], [[TMP57]]
+// CHECK-NEXT: [[SEXT56:%.*]] = sext <1 x i1> [[CMP55]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT56]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP58:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP59:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP57:%.*]] = icmp ne <1 x i128> [[TMP58]], [[TMP59]]
+// CHECK-NEXT: [[SEXT58:%.*]] = sext <1 x i1> [[CMP57]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT58]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP60:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP61:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[CMP59:%.*]] = icmp ne <1 x i128> [[TMP60]], [[TMP61]]
+// CHECK-NEXT: [[SEXT60:%.*]] = sext <1 x i1> [[CMP59]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT60]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP62:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP63:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[CMP61:%.*]] = icmp ne <1 x i128> [[TMP62]], [[TMP63]]
+// CHECK-NEXT: [[SEXT62:%.*]] = sext <1 x i1> [[CMP61]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT62]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP64:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP65:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP63:%.*]] = icmp ne <1 x i128> [[TMP64]], [[TMP65]]
+// CHECK-NEXT: [[SEXT64:%.*]] = sext <1 x i1> [[CMP63]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT64]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP66:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP67:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[CMP65:%.*]] = icmp ne <1 x i128> [[TMP66]], [[TMP67]]
+// CHECK-NEXT: [[SEXT66:%.*]] = sext <1 x i1> [[CMP65]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT66]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP68:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP69:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP67:%.*]] = icmp ne <1 x i128> [[TMP68]], [[TMP69]]
+// CHECK-NEXT: [[SEXT68:%.*]] = sext <1 x i1> [[CMP67]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT68]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP70:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP71:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[CMP69:%.*]] = fcmp une <2 x double> [[TMP70]], [[TMP71]]
+// CHECK-NEXT: [[SEXT70:%.*]] = sext <2 x i1> [[CMP69]] to <2 x i64>
+// CHECK-NEXT: store volatile <2 x i64> [[SEXT70]], ptr @bl, align 8
// CHECK-NEXT: ret void
//
void test_cmpne(void) {
@@ -3104,6 +3789,14 @@ void test_cmpne(void) {
bl = bl != ul2;
bl = bl != bl2;
+ blll = slll != slll2;
+ blll = slll != blll2;
+ blll = blll != slll2;
+ blll = ulll != ulll2;
+ blll = ulll != blll2;
+ blll = blll != ulll2;
+ blll = blll != blll2;
+
bl = fd != fd2;
}
@@ -3170,11 +3863,26 @@ void test_cmpne(void) {
// CHECK-NEXT: [[CMP21:%.*]] = icmp uge <2 x i64> [[TMP22]], [[TMP23]]
// CHECK-NEXT: [[SEXT22:%.*]] = sext <2 x i1> [[CMP21]] to <2 x i64>
// CHECK-NEXT: store volatile <2 x i64> [[SEXT22]], ptr @bl, align 8
-// CHECK-NEXT: [[TMP24:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[CMP23:%.*]] = fcmp oge <2 x double> [[TMP24]], [[TMP25]]
-// CHECK-NEXT: [[SEXT24:%.*]] = sext <2 x i1> [[CMP23]] to <2 x i64>
-// CHECK-NEXT: store volatile <2 x i64> [[SEXT24]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP24:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[CMP23:%.*]] = icmp sge <1 x i128> [[TMP24]], [[TMP25]]
+// CHECK-NEXT: [[SEXT24:%.*]] = sext <1 x i1> [[CMP23]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT24]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[CMP25:%.*]] = icmp uge <1 x i128> [[TMP26]], [[TMP27]]
+// CHECK-NEXT: [[SEXT26:%.*]] = sext <1 x i1> [[CMP25]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT26]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP29:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP27:%.*]] = icmp uge <1 x i128> [[TMP28]], [[TMP29]]
+// CHECK-NEXT: [[SEXT28:%.*]] = sext <1 x i1> [[CMP27]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT28]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[CMP29:%.*]] = fcmp oge <2 x double> [[TMP30]], [[TMP31]]
+// CHECK-NEXT: [[SEXT30:%.*]] = sext <2 x i1> [[CMP29]] to <2 x i64>
+// CHECK-NEXT: store volatile <2 x i64> [[SEXT30]], ptr @bl, align 8
// CHECK-NEXT: ret void
//
void test_cmpge(void) {
@@ -3195,6 +3903,10 @@ void test_cmpge(void) {
bl = ul >= ul2;
bl = bl >= bl2;
+ blll = slll >= slll2;
+ blll = ulll >= ulll2;
+ blll = blll >= blll2;
+
bl = fd >= fd2;
}
@@ -3261,11 +3973,26 @@ void test_cmpge(void) {
// CHECK-NEXT: [[CMP21:%.*]] = icmp ugt <2 x i64> [[TMP22]], [[TMP23]]
// CHECK-NEXT: [[SEXT22:%.*]] = sext <2 x i1> [[CMP21]] to <2 x i64>
// CHECK-NEXT: store volatile <2 x i64> [[SEXT22]], ptr @bl, align 8
-// CHECK-NEXT: [[TMP24:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[CMP23:%.*]] = fcmp ogt <2 x double> [[TMP24]], [[TMP25]]
-// CHECK-NEXT: [[SEXT24:%.*]] = sext <2 x i1> [[CMP23]] to <2 x i64>
-// CHECK-NEXT: store volatile <2 x i64> [[SEXT24]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP24:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[CMP23:%.*]] = icmp sgt <1 x i128> [[TMP24]], [[TMP25]]
+// CHECK-NEXT: [[SEXT24:%.*]] = sext <1 x i1> [[CMP23]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT24]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[CMP25:%.*]] = icmp ugt <1 x i128> [[TMP26]], [[TMP27]]
+// CHECK-NEXT: [[SEXT26:%.*]] = sext <1 x i1> [[CMP25]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT26]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP29:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP27:%.*]] = icmp ugt <1 x i128> [[TMP28]], [[TMP29]]
+// CHECK-NEXT: [[SEXT28:%.*]] = sext <1 x i1> [[CMP27]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT28]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[CMP29:%.*]] = fcmp ogt <2 x double> [[TMP30]], [[TMP31]]
+// CHECK-NEXT: [[SEXT30:%.*]] = sext <2 x i1> [[CMP29]] to <2 x i64>
+// CHECK-NEXT: store volatile <2 x i64> [[SEXT30]], ptr @bl, align 8
// CHECK-NEXT: ret void
//
void test_cmpgt(void) {
@@ -3286,6 +4013,10 @@ void test_cmpgt(void) {
bl = ul > ul2;
bl = bl > bl2;
+ blll = slll > slll2;
+ blll = ulll > ulll2;
+ blll = blll > blll2;
+
bl = fd > fd2;
}
@@ -3352,11 +4083,26 @@ void test_cmpgt(void) {
// CHECK-NEXT: [[CMP21:%.*]] = icmp ule <2 x i64> [[TMP22]], [[TMP23]]
// CHECK-NEXT: [[SEXT22:%.*]] = sext <2 x i1> [[CMP21]] to <2 x i64>
// CHECK-NEXT: store volatile <2 x i64> [[SEXT22]], ptr @bl, align 8
-// CHECK-NEXT: [[TMP24:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[CMP23:%.*]] = fcmp ole <2 x double> [[TMP24]], [[TMP25]]
-// CHECK-NEXT: [[SEXT24:%.*]] = sext <2 x i1> [[CMP23]] to <2 x i64>
-// CHECK-NEXT: store volatile <2 x i64> [[SEXT24]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP24:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[CMP23:%.*]] = icmp sle <1 x i128> [[TMP24]], [[TMP25]]
+// CHECK-NEXT: [[SEXT24:%.*]] = sext <1 x i1> [[CMP23]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT24]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[CMP25:%.*]] = icmp ule <1 x i128> [[TMP26]], [[TMP27]]
+// CHECK-NEXT: [[SEXT26:%.*]] = sext <1 x i1> [[CMP25]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT26]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP29:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP27:%.*]] = icmp ule <1 x i128> [[TMP28]], [[TMP29]]
+// CHECK-NEXT: [[SEXT28:%.*]] = sext <1 x i1> [[CMP27]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT28]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[CMP29:%.*]] = fcmp ole <2 x double> [[TMP30]], [[TMP31]]
+// CHECK-NEXT: [[SEXT30:%.*]] = sext <2 x i1> [[CMP29]] to <2 x i64>
+// CHECK-NEXT: store volatile <2 x i64> [[SEXT30]], ptr @bl, align 8
// CHECK-NEXT: ret void
//
void test_cmple(void) {
@@ -3377,6 +4123,10 @@ void test_cmple(void) {
bl = ul <= ul2;
bl = bl <= bl2;
+ blll = slll <= slll2;
+ blll = ulll <= ulll2;
+ blll = blll <= blll2;
+
bl = fd <= fd2;
}
@@ -3443,11 +4193,26 @@ void test_cmple(void) {
// CHECK-NEXT: [[CMP21:%.*]] = icmp ult <2 x i64> [[TMP22]], [[TMP23]]
// CHECK-NEXT: [[SEXT22:%.*]] = sext <2 x i1> [[CMP21]] to <2 x i64>
// CHECK-NEXT: store volatile <2 x i64> [[SEXT22]], ptr @bl, align 8
-// CHECK-NEXT: [[TMP24:%.*]] = load volatile <2 x double>, ptr @fd, align 8
-// CHECK-NEXT: [[TMP25:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
-// CHECK-NEXT: [[CMP23:%.*]] = fcmp olt <2 x double> [[TMP24]], [[TMP25]]
-// CHECK-NEXT: [[SEXT24:%.*]] = sext <2 x i1> [[CMP23]] to <2 x i64>
-// CHECK-NEXT: store volatile <2 x i64> [[SEXT24]], ptr @bl, align 8
+// CHECK-NEXT: [[TMP24:%.*]] = load volatile <1 x i128>, ptr @slll, align 8
+// CHECK-NEXT: [[TMP25:%.*]] = load volatile <1 x i128>, ptr @slll2, align 8
+// CHECK-NEXT: [[CMP23:%.*]] = icmp slt <1 x i128> [[TMP24]], [[TMP25]]
+// CHECK-NEXT: [[SEXT24:%.*]] = sext <1 x i1> [[CMP23]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT24]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP26:%.*]] = load volatile <1 x i128>, ptr @ulll, align 8
+// CHECK-NEXT: [[TMP27:%.*]] = load volatile <1 x i128>, ptr @ulll2, align 8
+// CHECK-NEXT: [[CMP25:%.*]] = icmp ult <1 x i128> [[TMP26]], [[TMP27]]
+// CHECK-NEXT: [[SEXT26:%.*]] = sext <1 x i1> [[CMP25]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT26]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP28:%.*]] = load volatile <1 x i128>, ptr @blll, align 8
+// CHECK-NEXT: [[TMP29:%.*]] = load volatile <1 x i128>, ptr @blll2, align 8
+// CHECK-NEXT: [[CMP27:%.*]] = icmp ult <1 x i128> [[TMP28]], [[TMP29]]
+// CHECK-NEXT: [[SEXT28:%.*]] = sext <1 x i1> [[CMP27]] to <1 x i128>
+// CHECK-NEXT: store volatile <1 x i128> [[SEXT28]], ptr @blll, align 8
+// CHECK-NEXT: [[TMP30:%.*]] = load volatile <2 x double>, ptr @fd, align 8
+// CHECK-NEXT: [[TMP31:%.*]] = load volatile <2 x double>, ptr @fd2, align 8
+// CHECK-NEXT: [[CMP29:%.*]] = fcmp olt <2 x double> [[TMP30]], [[TMP31]]
+// CHECK-NEXT: [[SEXT30:%.*]] = sext <2 x i1> [[CMP29]] to <2 x i64>
+// CHECK-NEXT: store volatile <2 x i64> [[SEXT30]], ptr @bl, align 8
// CHECK-NEXT: ret void
//
void test_cmplt(void) {
@@ -3468,6 +4233,10 @@ void test_cmplt(void) {
bl = ul < ul2;
bl = bl < bl2;
+ blll = slll < slll2;
+ blll = ulll < ulll2;
+ blll = blll < blll2;
+
bl = fd < fd2;
}
diff --git a/clang/test/Driver/systemz-march.c b/clang/test/Driver/systemz-march.c
index 31079435d2c6a1..93a11c6c9c013e 100644
--- a/clang/test/Driver/systemz-march.c
+++ b/clang/test/Driver/systemz-march.c
@@ -15,6 +15,7 @@
// RUN: %clang -target s390x -### -S -emit-llvm -march=arch13 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH13 %s
// RUN: %clang -target s390x -### -S -emit-llvm -march=z16 %s 2>&1 | FileCheck --check-prefix=CHECK-Z16 %s
// RUN: %clang -target s390x -### -S -emit-llvm -march=arch14 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH14 %s
+// RUN: %clang -target s390x -### -S -emit-llvm -march=arch15 %s 2>&1 | FileCheck --check-prefix=CHECK-ARCH15 %s
// CHECK-Z9: error: unknown target CPU 'z9'
// CHECK-Z10: "-target-cpu" "z10"
@@ -31,5 +32,6 @@
// CHECK-ARCH13: "-target-cpu" "arch13"
// CHECK-Z16: "-target-cpu" "z16"
// CHECK-ARCH14: "-target-cpu" "arch14"
+// CHECK-ARCH15: "-target-cpu" "arch15"
int x;
diff --git a/clang/test/Misc/target-invalid-cpu-note/systemz.c b/clang/test/Misc/target-invalid-cpu-note/systemz.c
index 22b0208eca902d..b70173f5feec27 100644
--- a/clang/test/Misc/target-invalid-cpu-note/systemz.c
+++ b/clang/test/Misc/target-invalid-cpu-note/systemz.c
@@ -19,4 +19,5 @@
// CHECK-SAME: {{^}}, z15
// CHECK-SAME: {{^}}, arch14
// CHECK-SAME: {{^}}, z16
+// CHECK-SAME: {{^}}, arch15
// CHECK-SAME: {{$}}
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 43f3454ed3c35d..f267f1759cdb5e 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -4391,6 +4391,21 @@
// CHECK_SYSTEMZ_ARCH14: #define __s390x__ 1
// CHECK_SYSTEMZ_ARCH14: #define __zarch__ 1
+// RUN: %clang -march=arch15 -E -dM %s -o - 2>&1 \
+// RUN: -target s390x-unknown-linux \
+// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ARCH15
+// CHECK_SYSTEMZ_ARCH15: #define __ARCH__ 15
+// CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_1 1
+// CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_2 1
+// CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_4 1
+// CHECK_SYSTEMZ_ARCH15: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_8 1
+// CHECK_SYSTEMZ_ARCH15: #define __HTM__ 1
+// CHECK_SYSTEMZ_ARCH15: #define __LONG_DOUBLE_128__ 1
+// CHECK_SYSTEMZ_ARCH15: #define __VX__ 1
+// CHECK_SYSTEMZ_ARCH15: #define __s390__ 1
+// CHECK_SYSTEMZ_ARCH15: #define __s390x__ 1
+// CHECK_SYSTEMZ_ARCH15: #define __zarch__ 1
+
// RUN: %clang -mhtm -E -dM %s -o - 2>&1 \
// RUN: -target s390x-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_HTM
@@ -4407,7 +4422,7 @@
// RUN: %clang -mzvector -E -dM %s -o - 2>&1 \
// RUN: -target s390x-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_SYSTEMZ_ZVECTOR
-// CHECK_SYSTEMZ_ZVECTOR: #define __VEC__ 10304
+// CHECK_SYSTEMZ_ZVECTOR: #define __VEC__ 10305
// Begin nvptx tests ----------------
diff --git a/clang/test/Sema/zvector.c b/clang/test/Sema/zvector.c
index 900c39adc2a371..e1e4ab532426dd 100644
--- a/clang/test/Sema/zvector.c
+++ b/clang/test/Sema/zvector.c
@@ -18,6 +18,10 @@ vector signed long long sl, sl2;
vector unsigned long long ul, ul2;
vector bool long long bl, bl2;
+vector signed __int128 slll, slll2;
+vector unsigned __int128 ulll, ulll2;
+vector bool __int128 blll, blll2;
+
vector double fd, fd2;
vector long ll; // expected-error {{cannot use 'long' with '__vector'}}
@@ -39,6 +43,9 @@ unsigned int ui_scalar;
signed long sl_scalar;
unsigned long ul_scalar;
+signed __int128 slll_scalar;
+unsigned __int128 ulll_scalar;
+
double fd_scalar;
// Verify that __vector is also recognized
@@ -54,6 +61,9 @@ __vector bool int bi3;
__vector signed long long sl3;
__vector unsigned long long ul3;
__vector bool long long bl3;
+__vector signed __int128 slll3;
+__vector unsigned __int128 ulll3;
+__vector bool __int128 blll3;
__vector double fd3;
__vector long ll3; // expected-error {{cannot use 'long' with '__vector'}}
__vector float ff3; // expected-error {{cannot use 'float' with '__vector'}}
@@ -85,6 +95,9 @@ int res_bi[vec_step(bi) == 4 ? 1 : -1];
int res_sl[vec_step(sl) == 2 ? 1 : -1];
int res_ul[vec_step(ul) == 2 ? 1 : -1];
int res_bl[vec_step(bl) == 2 ? 1 : -1];
+int res_slll[vec_step(slll) == 1 ? 1 : -1];
+int res_ulll[vec_step(ulll) == 1 ? 1 : -1];
+int res_blll[vec_step(blll) == 1 ? 1 : -1];
int res_fd[vec_step(fd) == 2 ? 1 : -1];
@@ -111,6 +124,10 @@ void foo(void)
bl = bl2;
fd = fd2;
+ slll = slll2;
+ ulll = ulll2;
+ blll = blll2;
+
sc = uc2; // expected-error {{incompatible type}}
sc = bc2; // expected-error {{incompatible type}}
uc = sc2; // expected-error {{incompatible type}}
@@ -129,31 +146,37 @@ void foo(void)
sc = si2; // expected-error {{incompatible type}}
sc = sl2; // expected-error {{incompatible type}}
sc = fd2; // expected-error {{incompatible type}}
+ sc = slll2; // expected-error {{incompatible type}}
ss = sc2; // expected-error {{incompatible type}}
si = sc2; // expected-error {{incompatible type}}
sl = sc2; // expected-error {{incompatible type}}
fd = sc2; // expected-error {{incompatible type}}
+ slll = sc2; // expected-error {{incompatible type}}
uc = us2; // expected-error {{incompatible type}}
uc = ui2; // expected-error {{incompatible type}}
uc = ul2; // expected-error {{incompatible type}}
uc = fd2; // expected-error {{incompatible type}}
+ uc = ulll2; // expected-error {{incompatible type}}
us = uc2; // expected-error {{incompatible type}}
ui = uc2; // expected-error {{incompatible type}}
ul = uc2; // expected-error {{incompatible type}}
fd = uc2; // expected-error {{incompatible type}}
+ ulll = uc2; // expected-error {{incompatible type}}
bc = us2; // expected-error {{incompatible type}}
bc = ui2; // expected-error {{incompatible type}}
bc = ul2; // expected-error {{incompatible type}}
bc = fd2; // expected-error {{incompatible type}}
+ bc = ulll2; // expected-error {{incompatible type}}
bs = bc2; // expected-error {{incompatible type}}
bi = bc2; // expected-error {{incompatible type}}
bl = bc2; // expected-error {{incompatible type}}
fd = bc2; // expected-error {{incompatible type}}
+ blll = bc2; // expected-error {{incompatible type}}
// -------------------------------------------------------------------------
// Test casts to same element width.
@@ -176,6 +199,10 @@ void foo(void)
ul = (vector unsigned long long)fd2;
fd = (vector double)sl2;
+ slll = (vector signed __int128)blll2;
+ blll = (vector bool __int128)ulll2;
+ ulll = (vector unsigned __int128)slll2;
+
// -------------------------------------------------------------------------
// Test casts to
diff erent element width.
// -------------------------------------------------------------------------
@@ -197,6 +224,10 @@ void foo(void)
ul = (vector unsigned long long)sc2;
fd = (vector double)sc2;
+ slll = (vector signed __int128)bi2;
+ blll = (vector bool __int128)ui2;
+ ulll = (vector unsigned __int128)si2;
+
// -------------------------------------------------------------------------
// Test ++.
// -------------------------------------------------------------------------
@@ -217,6 +248,10 @@ void foo(void)
++ul2;
++bl2; // expected-error {{cannot increment}}
+ ++slll2;
+ ++ulll2;
+ ++blll2; // expected-error {{cannot increment}}
+
++fd2;
sc++;
@@ -235,6 +270,10 @@ void foo(void)
ul++;
bl++; // expected-error {{cannot increment}}
+ slll++;
+ ulll++;
+ blll++; // expected-error {{cannot increment}}
+
fd++;
// -------------------------------------------------------------------------
@@ -257,6 +296,10 @@ void foo(void)
--ul2;
--bl2; // expected-error {{cannot decrement}}
+ --slll2;
+ --ulll2;
+ --blll2; // expected-error {{cannot decrement}}
+
--fd2;
sc--;
@@ -275,6 +318,10 @@ void foo(void)
ul--;
bl--; // expected-error {{cannot decrement}}
+ slll--;
+ ulll--;
+ blll--; // expected-error {{cannot decrement}}
+
fd--;
// -------------------------------------------------------------------------
@@ -297,6 +344,10 @@ void foo(void)
ul = +ul2;
bl = +bl2; // expected-error {{invalid argument type}}
+ slll = +slll2;
+ ulll = +ulll2;
+ blll = +blll2; // expected-error {{invalid argument type}}
+
fd = +fd2;
sc = +si2; // expected-error {{assigning to}}
@@ -323,6 +374,10 @@ void foo(void)
ul = -ul2;
bl = -bl2; // expected-error {{invalid argument type}}
+ slll = -slll2;
+ ulll = -ulll2;
+ blll = -blll2; // expected-error {{invalid argument type}}
+
fd = -fd2;
sc = -si2; // expected-error {{assigning to}}
@@ -349,6 +404,10 @@ void foo(void)
ul = ~ul2;
bl = ~bl2;
+ slll = ~slll2;
+ ulll = ~ulll2;
+ blll = ~blll2;
+
fd = ~fd2; // expected-error {{invalid argument}}
sc = ~si2; // expected-error {{assigning to}}
@@ -398,6 +457,10 @@ void foo(void)
ul = ul + ul2;
bl = bl + bl2; // expected-error {{invalid operands}}
+ slll = slll + slll2;
+ ulll = ulll + ulll2;
+ blll = blll + blll2; // expected-error {{invalid operands}}
+
fd = fd + fd2;
fd = fd + ul2; // expected-error {{cannot convert}}
fd = sl + fd2; // expected-error {{cannot convert}}
@@ -418,6 +481,7 @@ void foo(void)
sc += si2; // expected-error {{cannot convert}}
sc += sl2; // expected-error {{cannot convert}}
sc += fd2; // expected-error {{cannot convert}}
+ sc += slll2; // expected-error {{cannot convert}}
sc += sc_scalar;
sc += uc_scalar; // expected-error {{cannot convert between scalar type 'unsigned char' and vector type '__vector signed char' (vector of 16 'signed char' values) as implicit conversion would cause truncation}}
@@ -436,6 +500,10 @@ void foo(void)
ul += ul2;
bl += bl2; // expected-error {{invalid operands}}
+ slll += slll2;
+ ulll += ulll2;
+ blll += blll2; // expected-error {{invalid operands}}
+
fd += fd2;
// -------------------------------------------------------------------------
@@ -470,6 +538,10 @@ void foo(void)
ul -= ul2;
bl -= bl2; // expected-error {{invalid operands}}
+ slll -= slll2;
+ ulll -= ulll2;
+ blll -= blll2; // expected-error {{invalid operands}}
+
fd -= fd2;
// -------------------------------------------------------------------------
@@ -505,6 +577,11 @@ void foo(void)
ul *= ul2;
bl *= bl2; // expected-error {{invalid operands}}
+ slll *= slll2;
+ ulll *= ulll2;
+ blll *= blll2; // expected-error {{invalid operands}}
+
+
fd *= fd2;
// -------------------------------------------------------------------------
@@ -539,6 +616,10 @@ void foo(void)
ul /= ul2;
bl /= bl2; // expected-error {{invalid operands}}
+ slll /= slll2;
+ ulll /= ulll2;
+ blll /= blll2; // expected-error {{invalid operands}}
+
fd /= fd2;
// -------------------------------------------------------------------------
@@ -573,6 +654,10 @@ void foo(void)
ul %= ul2;
bl %= bl2; // expected-error {{invalid operands}}
+ slll %= slll2;
+ ulll %= ulll2;
+ blll %= blll2; // expected-error {{invalid operands}}
+
fd %= fd2; // expected-error {{invalid operands}}
// -------------------------------------------------------------------------
@@ -637,6 +722,10 @@ void foo(void)
ul &= ul2;
bl &= bl2;
+ slll &= slll2;
+ ulll &= ulll2;
+ blll &= blll2;
+
// -------------------------------------------------------------------------
// Test that & rules apply to | too.
// -------------------------------------------------------------------------
@@ -668,6 +757,10 @@ void foo(void)
ul |= ul2;
bl |= bl2;
+ slll |= slll2;
+ ulll |= ulll2;
+ blll |= blll2;
+
fd |= bl2; // expected-error {{invalid operands}}
fd |= fd2; // expected-error {{invalid operands}}
@@ -702,6 +795,10 @@ void foo(void)
ul ^= ul2;
bl ^= bl2;
+ slll ^= slll2;
+ ulll ^= ulll2;
+ blll ^= blll2;
+
fd ^= bl2; // expected-error {{invalid operands}}
fd ^= fd2; // expected-error {{invalid operands}}
@@ -762,6 +859,12 @@ void foo(void)
ul = ul << ul_scalar;
bl = bl << bl2; // expected-error {{invalid operands}}
+ slll = slll << slll2;
+ slll = slll << slll_scalar;
+ ulll = ulll << ulll2;
+ ulll = ulll << ulll_scalar;
+ blll = blll << blll2; // expected-error {{invalid operands}}
+
fd = fd << fd2; // expected-error {{integer is required}}
fd = fd << ul2; // expected-error {{integer is required}}
fd = sl << fd2; // expected-error {{integer is required}}
@@ -803,6 +906,12 @@ void foo(void)
ul <<= ul_scalar;
bl <<= bl2; // expected-error {{invalid operands}}
+ slll <<= slll2;
+ slll <<= slll_scalar;
+ ulll <<= ulll2;
+ ulll <<= ulll_scalar;
+ blll <<= blll2; // expected-error {{invalid operands}}
+
fd <<= fd2; // expected-error {{integer is required}}
// -------------------------------------------------------------------------
@@ -862,6 +971,12 @@ void foo(void)
ul = ul >> ul_scalar;
bl = bl >> bl2; // expected-error {{invalid operands}}
+ slll = slll >> slll2;
+ slll = slll >> slll_scalar;
+ ulll = ulll >> ulll2;
+ ulll = ulll >> ulll_scalar;
+ blll = blll >> blll2; // expected-error {{invalid operands}}
+
fd = fd >> fd2; // expected-error {{integer is required}}
fd = fd >> ul2; // expected-error {{integer is required}}
fd = sl >> fd2; // expected-error {{integer is required}}
@@ -903,6 +1018,12 @@ void foo(void)
ul >>= ul_scalar;
bl >>= bl2; // expected-error {{invalid operands}}
+ slll >>= slll2;
+ slll >>= slll_scalar;
+ ulll >>= ulll2;
+ ulll >>= ulll_scalar;
+ blll >>= blll2; // expected-error {{invalid operands}}
+
fd >>= fd2; // expected-error {{integer is required}}
// -------------------------------------------------------------------------
@@ -935,6 +1056,10 @@ void foo(void)
(void)(bl == bl2);
(void)(fd == fd2);
+ (void)(slll == slll2);
+ (void)(ulll == ulll2);
+ (void)(blll == blll2);
+
(void)(fd == ul); // expected-error {{cannot convert}}
(void)(ul == fd); // expected-error {{cannot convert}}
@@ -962,6 +1087,10 @@ void foo(void)
(void)(bl != bl2);
(void)(fd != fd2);
+ (void)(slll != slll2);
+ (void)(ulll != ulll2);
+ (void)(blll != blll2);
+
// -------------------------------------------------------------------------
// Test that == rules apply to <= too.
// -------------------------------------------------------------------------
@@ -986,6 +1115,10 @@ void foo(void)
(void)(bl <= bl2);
(void)(fd <= fd2);
+ (void)(slll <= slll2);
+ (void)(ulll <= ulll2);
+ (void)(blll <= blll2);
+
// -------------------------------------------------------------------------
// Test that == rules apply to >= too.
// -------------------------------------------------------------------------
@@ -1010,6 +1143,10 @@ void foo(void)
(void)(bl >= bl2);
(void)(fd >= fd2);
+ (void)(slll >= slll2);
+ (void)(ulll >= ulll2);
+ (void)(blll >= blll2);
+
// -------------------------------------------------------------------------
// Test that == rules apply to < too.
// -------------------------------------------------------------------------
@@ -1034,6 +1171,10 @@ void foo(void)
(void)(bl < bl2);
(void)(fd < fd2);
+ (void)(slll < slll2);
+ (void)(ulll < ulll2);
+ (void)(blll < blll2);
+
// -------------------------------------------------------------------------
// Test that == rules apply to > too.
// -------------------------------------------------------------------------
@@ -1057,4 +1198,8 @@ void foo(void)
(void)(ul > ul2);
(void)(bl > bl2);
(void)(fd > fd2);
+
+ (void)(slll > slll2);
+ (void)(ulll > ulll2);
+ (void)(blll > blll2);
}
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index 4f925979cf8564..38b7463c7b0785 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -445,6 +445,39 @@ let TargetPrefix = "s390" in {
Intrinsic<[llvm_v8i16_ty],
[llvm_v8i16_ty, llvm_i32_ty],
[IntrNoMem, ImmArg<ArgIndex<1>>]>;
+
+ // Instructions from the Vector Enhancements Facility 3
+ def int_s390_vgemb : SystemZUnaryConv<"vgemb", llvm_v16i8_ty, llvm_v8i16_ty>;
+ def int_s390_vgemh : SystemZUnaryConv<"vgemh", llvm_v8i16_ty, llvm_v16i8_ty>;
+ def int_s390_vgemf : SystemZUnaryConv<"vgemf", llvm_v4i32_ty, llvm_v16i8_ty>;
+ def int_s390_vgemg : SystemZUnaryConv<"vgemg", llvm_v2i64_ty, llvm_v16i8_ty>;
+ def int_s390_vgemq : SystemZUnaryConv<"vgemq", llvm_i128_ty, llvm_v16i8_ty>;
+ def int_s390_vuplg : SystemZUnaryConv<"vuplg", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vupllg : SystemZUnaryConv<"vupllg", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vuphg : SystemZUnaryConv<"vuphg", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vuplhg : SystemZUnaryConv<"vuplhg", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vavgq : SystemZBinary<"vavgq", llvm_i128_ty>;
+ def int_s390_vavglq : SystemZBinary<"vavglq", llvm_i128_ty>;
+ def int_s390_veval : SystemZQuaternaryInt<"veval", llvm_v16i8_ty>;
+ def int_s390_vmahg : SystemZTernary<"vmahg", llvm_v2i64_ty>;
+ def int_s390_vmahq : SystemZTernary<"vmahq", llvm_i128_ty>;
+ def int_s390_vmalhg : SystemZTernary<"vmalhg", llvm_v2i64_ty>;
+ def int_s390_vmalhq : SystemZTernary<"vmalhq", llvm_i128_ty>;
+ def int_s390_vmaeg : SystemZTernaryConv<"vmaeg", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vmaleg : SystemZTernaryConv<"vmaleg", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vmaog : SystemZTernaryConv<"vmaog", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vmalog : SystemZTernaryConv<"vmalog", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vmhg : SystemZBinary<"vmhg", llvm_v2i64_ty>;
+ def int_s390_vmhq : SystemZBinary<"vmhq", llvm_i128_ty>;
+ def int_s390_vmlhg : SystemZBinary<"vmlhg", llvm_v2i64_ty>;
+ def int_s390_vmlhq : SystemZBinary<"vmlhq", llvm_i128_ty>;
+ def int_s390_vmeg : SystemZBinaryConv<"vmeg", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vmleg : SystemZBinaryConv<"vmleg", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vmog : SystemZBinaryConv<"vmog", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vmlog : SystemZBinaryConv<"vmlog", llvm_i128_ty, llvm_v2i64_ty>;
+ def int_s390_vceqqs : SystemZBinaryCC<llvm_i128_ty>;
+ def int_s390_vchqs : SystemZBinaryCC<llvm_i128_ty>;
+ def int_s390_vchlqs : SystemZBinaryCC<llvm_i128_ty>;
}
//===----------------------------------------------------------------------===//
@@ -461,4 +494,12 @@ let TargetPrefix = "s390" in {
def int_s390_tdc : Intrinsic<[llvm_i32_ty], [llvm_anyfloat_ty, llvm_i64_ty],
[IntrNoMem]>;
+
+ // Instructions from the Miscellaneous Instruction Extensions Facility 4
+ def int_s390_bdepg : ClangBuiltin<"__builtin_s390_bdepg">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem]>;
+ def int_s390_bextg : ClangBuiltin<"__builtin_s390_bextg">,
+ Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty],
+ [IntrNoMem]>;
}
diff --git a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
index 599afed2199fb7..b892c9ea696026 100644
--- a/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
+++ b/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
@@ -75,7 +75,8 @@ enum MemoryKind {
BDXMem,
BDLMem,
BDRMem,
- BDVMem
+ BDVMem,
+ LXAMem
};
class SystemZOperand : public MCParsedAsmOperand {
@@ -339,6 +340,13 @@ class SystemZOperand : public MCParsedAsmOperand {
addExpr(Inst, Mem.Disp);
Inst.addOperand(MCOperand::createReg(Mem.Index));
}
+ void addLXAAddrOperands(MCInst &Inst, unsigned N) const {
+ assert(N == 3 && "Invalid number of operands");
+ assert(isMem(LXAMem) && "Invalid operand type");
+ Inst.addOperand(MCOperand::createReg(Mem.Base));
+ addExpr(Inst, Mem.Disp);
+ Inst.addOperand(MCOperand::createReg(Mem.Index));
+ }
void addImmTLSOperands(MCInst &Inst, unsigned N) const {
assert(N == 2 && "Invalid number of operands");
assert(Kind == KindImmTLS && "Invalid operand type");
@@ -376,6 +384,7 @@ class SystemZOperand : public MCParsedAsmOperand {
bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(GR64Reg); }
bool isBDRAddr64Disp12() const { return isMemDisp12(BDRMem, GR64Reg); }
bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, GR64Reg); }
+ bool isLXAAddr64Disp20() const { return isMemDisp20(LXAMem, GR64Reg); }
bool isU1Imm() const { return isImm(0, 1); }
bool isU2Imm() const { return isImm(0, 3); }
bool isU3Imm() const { return isImm(0, 7); }
@@ -582,6 +591,9 @@ class SystemZAsmParser : public MCTargetAsmParser {
ParseStatus parseBDVAddr64(OperandVector &Operands) {
return parseAddress(Operands, BDVMem, GR64Reg);
}
+ ParseStatus parseLXAAddr64(OperandVector &Operands) {
+ return parseAddress(Operands, LXAMem, GR64Reg);
+ }
ParseStatus parsePCRel12(OperandVector &Operands) {
return parsePCRel(Operands, -(1LL << 12), (1LL << 12) - 1, false);
}
@@ -1144,15 +1156,20 @@ ParseStatus SystemZAsmParser::parseAddress(OperandVector &Operands,
return Error(StartLoc, "invalid use of indexed addressing");
break;
case BDXMem:
+ case LXAMem:
// If we have Reg1, it must be an address register.
if (HaveReg1) {
+ const unsigned *IndexRegs = Regs;
+ if (MemKind == LXAMem)
+ IndexRegs = SystemZMC::GR32Regs;
+
if (parseAddressRegister(Reg1))
return ParseStatus::Failure;
// If there are two registers, the first one is the index and the
// second is the base. If there is only a single register, it is
// used as base with GAS and as index with HLASM.
if (HaveReg2 || isParsingHLASM())
- Index = Reg1.Num == 0 ? 0 : Regs[Reg1.Num];
+ Index = Reg1.Num == 0 ? 0 : IndexRegs[Reg1.Num];
else
Base = Reg1.Num == 0 ? 0 : Regs[Reg1.Num];
}
@@ -1278,6 +1295,8 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
ResTy = parseBDAddr64(Operands);
else if (Kind == MCK_BDVAddr64Disp12)
ResTy = parseBDVAddr64(Operands);
+ else if (Kind == MCK_LXAAddr64Disp20)
+ ResTy = parseLXAAddr64(Operands);
else if (Kind == MCK_PCRel32)
ResTy = parsePCRel32(Operands);
else if (Kind == MCK_PCRel16)
@@ -1324,6 +1343,8 @@ bool SystemZAsmParser::ParseDirectiveInsn(SMLoc L) {
ZOperand.addBDXAddrOperands(Inst, 3);
else if (ZOperand.isMem(BDVMem))
ZOperand.addBDVAddrOperands(Inst, 3);
+ else if (ZOperand.isMem(LXAMem))
+ ZOperand.addLXAAddrOperands(Inst, 3);
else if (ZOperand.isImm())
ZOperand.addImmOperands(Inst, 1);
else
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp
index fe0f3874765614..85366dfa6c4bdb 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.cpp
@@ -234,6 +234,12 @@ void SystemZInstPrinterCommon::printBDVAddrOperand(const MCInst *MI, int OpNum,
MI->getOperand(OpNum + 2).getReg(), O);
}
+void SystemZInstPrinterCommon::printLXAAddrOperand(const MCInst *MI, int OpNum,
+ raw_ostream &O) {
+ printAddress(&MAI, MI->getOperand(OpNum).getReg(), MI->getOperand(OpNum + 1),
+ MI->getOperand(OpNum + 2).getReg(), O);
+}
+
void SystemZInstPrinterCommon::printCond4Operand(const MCInst *MI, int OpNum,
raw_ostream &O) {
static const char *const CondNames[] = {"o", "h", "nle", "l", "nhe",
diff --git a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h
index 1a11e421691ae3..304aa03d988dc5 100644
--- a/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h
+++ b/llvm/lib/Target/SystemZ/MCTargetDesc/SystemZInstPrinterCommon.h
@@ -58,6 +58,7 @@ class SystemZInstPrinterCommon : public MCInstPrinter {
void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDRAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
+ void printLXAAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
diff --git a/llvm/lib/Target/SystemZ/SystemZFeatures.td b/llvm/lib/Target/SystemZ/SystemZFeatures.td
index e6b95d32c29fad..ec1a7beeab2135 100644
--- a/llvm/lib/Target/SystemZ/SystemZFeatures.td
+++ b/llvm/lib/Target/SystemZ/SystemZFeatures.td
@@ -346,6 +346,45 @@ def Arch14NewFeatures : SystemZFeatureList<[
FeatureProcessorActivityInstrumentation
]>;
+//===----------------------------------------------------------------------===//
+//
+// New features added in the Fifteenth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureMiscellaneousExtensions4 : SystemZFeature<
+ "miscellaneous-extensions-4", "MiscellaneousExtensions4", (all_of FeatureMiscellaneousExtensions4),
+ "Assume that the miscellaneous-extensions facility 4 is installed"
+>;
+
+def FeatureVectorEnhancements3 : SystemZFeature<
+ "vector-enhancements-3", "VectorEnhancements3", (all_of FeatureVectorEnhancements3),
+ "Assume that the vector enhancements facility 3 is installed"
+>;
+
+def FeatureVectorPackedDecimalEnhancement3 : SystemZFeature<
+ "vector-packed-decimal-enhancement-3", "VectorPackedDecimalEnhancement3", (all_of FeatureVectorPackedDecimalEnhancement3),
+ "Assume that the vector packed decimal enhancement facility 3 is installed"
+>;
+
+def FeatureMessageSecurityAssist12 : SystemZFeature<
+ "message-security-assist-extension12", "MessageSecurityAssist12", (all_of FeatureMessageSecurityAssist12),
+ "Assume that the message-security-assist extension facility 12 is installed"
+>;
+
+def FeatureConcurrentFunctions : SystemZFeature<
+ "concurrent-functions", "ConcurrentFunctions", (all_of FeatureConcurrentFunctions),
+ "Assume that the concurrent-functions facility is installed"
+>;
+
+def Arch15NewFeatures : SystemZFeatureList<[
+ FeatureMiscellaneousExtensions4,
+ FeatureVectorEnhancements3,
+ FeatureVectorPackedDecimalEnhancement3,
+ FeatureMessageSecurityAssist12,
+ FeatureConcurrentFunctions
+]>;
+
//===----------------------------------------------------------------------===//
//
// Cumulative supported and unsupported feature sets
@@ -366,9 +405,13 @@ def Arch13SupportedFeatures
: SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>;
def Arch14SupportedFeatures
: SystemZFeatureAdd<Arch13SupportedFeatures.List, Arch14NewFeatures.List>;
+def Arch15SupportedFeatures
+ : SystemZFeatureAdd<Arch14SupportedFeatures.List, Arch15NewFeatures.List>;
-def Arch14UnsupportedFeatures
+def Arch15UnsupportedFeatures
: SystemZFeatureList<[]>;
+def Arch14UnsupportedFeatures
+ : SystemZFeatureAdd<Arch15UnsupportedFeatures.List, Arch15NewFeatures.List>;
def Arch13UnsupportedFeatures
: SystemZFeatureAdd<Arch14UnsupportedFeatures.List, Arch14NewFeatures.List>;
def Arch12UnsupportedFeatures
diff --git a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 884d3a0614a8e3..3d90e3f6f67817 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -1001,6 +1001,16 @@ bool SystemZDAGToDAGISel::tryRISBGZero(SDNode *N) {
if (Count == 1 && N->getOpcode() != ISD::AND)
return false;
+ // Prefer LOAD LOGICAL INDEXED ADDRESS over RISBG in the case where we
+ // can use its displacement to pull in an addition.
+ if (Subtarget->hasMiscellaneousExtensions4() &&
+ RISBG.Rotate >= 1 && RISBG.Rotate <= 4 &&
+ RISBG.Mask == (((uint64_t)1 << 32) - 1) << RISBG.Rotate &&
+ RISBG.Input.getOpcode() == ISD::ADD)
+ if (auto *C = dyn_cast<ConstantSDNode>(RISBG.Input.getOperand(1)))
+ if (isInt<20>(C->getSExtValue()))
+ return false;
+
// Prefer register extensions like LLC over RISBG. Also prefer to start
// out with normal ANDs if one instruction would be enough. We can convert
// these ANDs into an RISBG later if a three-address instruction is useful.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e3dfab962f55fe..4040ab6d45103a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -253,15 +253,24 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UMUL_LOHI, MVT::i128, Expand);
setOperationAction(ISD::ROTR, MVT::i128, Expand);
setOperationAction(ISD::ROTL, MVT::i128, Expand);
- setOperationAction(ISD::MUL, MVT::i128, Expand);
- setOperationAction(ISD::MULHS, MVT::i128, Expand);
- setOperationAction(ISD::MULHU, MVT::i128, Expand);
- setOperationAction(ISD::SDIV, MVT::i128, Expand);
- setOperationAction(ISD::UDIV, MVT::i128, Expand);
- setOperationAction(ISD::SREM, MVT::i128, Expand);
- setOperationAction(ISD::UREM, MVT::i128, Expand);
- setOperationAction(ISD::CTLZ, MVT::i128, Expand);
- setOperationAction(ISD::CTTZ, MVT::i128, Expand);
+
+ // No special instructions for these before arch15.
+ if (!Subtarget.hasVectorEnhancements3()) {
+ setOperationAction(ISD::MUL, MVT::i128, Expand);
+ setOperationAction(ISD::MULHS, MVT::i128, Expand);
+ setOperationAction(ISD::MULHU, MVT::i128, Expand);
+ setOperationAction(ISD::SDIV, MVT::i128, Expand);
+ setOperationAction(ISD::UDIV, MVT::i128, Expand);
+ setOperationAction(ISD::SREM, MVT::i128, Expand);
+ setOperationAction(ISD::UREM, MVT::i128, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i128, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i128, Expand);
+ } else {
+ // Even if we do have a legal 128-bit multiply, we do not
+ // want 64-bit multiply-high operations to use it.
+ setOperationAction(ISD::MULHS, MVT::i64, Custom);
+ setOperationAction(ISD::MULHU, MVT::i64, Custom);
+ }
// Support addition/subtraction with carry.
setOperationAction(ISD::UADDO, MVT::i128, Custom);
@@ -272,6 +281,11 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
// Use VPOPCT and add up partial results.
setOperationAction(ISD::CTPOP, MVT::i128, Custom);
+ // Additional instructions available with arch15.
+ if (Subtarget.hasVectorEnhancements3()) {
+ setOperationAction(ISD::ABS, MVT::i128, Legal);
+ }
+
// We have to use libcalls for these.
setOperationAction(ISD::FP_TO_UINT, MVT::i128, LibCall);
setOperationAction(ISD::FP_TO_SINT, MVT::i128, LibCall);
@@ -339,6 +353,13 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+ // On arch15 we have native support for a 64-bit CTTZ.
+ if (Subtarget.hasMiscellaneousExtensions4()) {
+ setOperationAction(ISD::CTTZ, MVT::i32, Promote);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Promote);
+ setOperationAction(ISD::CTTZ, MVT::i64, Legal);
+ }
+
// On z15 we have native support for a 64-bit CTPOP.
if (Subtarget.hasMiscellaneousExtensions3()) {
setOperationAction(ISD::CTPOP, MVT::i32, Promote);
@@ -433,8 +454,15 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
setOperationAction(ISD::ADD, VT, Legal);
setOperationAction(ISD::SUB, VT, Legal);
- if (VT != MVT::v2i64)
+ if (VT != MVT::v2i64 || Subtarget.hasVectorEnhancements3())
setOperationAction(ISD::MUL, VT, Legal);
+ if (Subtarget.hasVectorEnhancements3() &&
+ VT != MVT::v16i8 && VT != MVT::v8i16) {
+ setOperationAction(ISD::SDIV, VT, Legal);
+ setOperationAction(ISD::UDIV, VT, Legal);
+ setOperationAction(ISD::SREM, VT, Legal);
+ setOperationAction(ISD::UREM, VT, Legal);
+ }
setOperationAction(ISD::ABS, VT, Legal);
setOperationAction(ISD::AND, VT, Legal);
setOperationAction(ISD::OR, VT, Legal);
@@ -2528,6 +2556,7 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
case Intrinsic::s390_vceqhs:
case Intrinsic::s390_vceqfs:
case Intrinsic::s390_vceqgs:
+ case Intrinsic::s390_vceqqs:
Opcode = SystemZISD::VICMPES;
CCValid = SystemZ::CCMASK_VCMP;
return true;
@@ -2536,6 +2565,7 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
case Intrinsic::s390_vchhs:
case Intrinsic::s390_vchfs:
case Intrinsic::s390_vchgs:
+ case Intrinsic::s390_vchqs:
Opcode = SystemZISD::VICMPHS;
CCValid = SystemZ::CCMASK_VCMP;
return true;
@@ -2544,6 +2574,7 @@ static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
case Intrinsic::s390_vchlhs:
case Intrinsic::s390_vchlfs:
case Intrinsic::s390_vchlgs:
+ case Intrinsic::s390_vchlqs:
Opcode = SystemZISD::VICMPHLS;
CCValid = SystemZ::CCMASK_VCMP;
return true;
@@ -3223,6 +3254,8 @@ static void adjustICmp128(SelectionDAG &DAG, const SDLoc &DL,
return;
if (C.Op0.getValueType() != MVT::i128)
return;
+ if (DAG.getSubtarget<SystemZSubtarget>().hasVectorEnhancements3())
+ return;
// (In-)Equality comparisons can be implemented via VCEQGS.
if (C.CCMask == SystemZ::CCMASK_CMP_EQ ||
@@ -3636,6 +3669,18 @@ SDValue SystemZTargetLowering::lowerVectorSETCC(SelectionDAG &DAG,
// matter whether we try the inversion or the swap first, since
// there are no cases where both work.
default:
+ // Optimize sign-bit comparisons to signed compares.
+ if (Mode == CmpMode::Int && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
+ ISD::isConstantSplatVectorAllZeros(CmpOp1.getNode())) {
+ unsigned EltSize = VT.getVectorElementType().getSizeInBits();
+ APInt Mask;
+ if (CmpOp0.getOpcode() == ISD::AND
+ && ISD::isConstantSplatVector(CmpOp0.getOperand(1).getNode(), Mask)
+ && Mask == APInt::getSignMask(EltSize)) {
+ CC = CC == ISD::SETEQ ? ISD::SETGE : ISD::SETLT;
+ CmpOp0 = CmpOp0.getOperand(0);
+ }
+ }
if (unsigned Opcode = getVectorComparisonOrInvert(CC, Mode, Invert))
Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1, Chain);
else {
@@ -3734,6 +3779,42 @@ static SDValue getAbsolute(SelectionDAG &DAG, const SDLoc &DL, SDValue Op,
return Op;
}
+static SDValue getI128Select(SelectionDAG &DAG, const SDLoc &DL,
+ Comparison C, SDValue TrueOp, SDValue FalseOp) {
+ EVT VT = MVT::i128;
+ unsigned Op;
+
+ if (C.CCMask == SystemZ::CCMASK_CMP_NE ||
+ C.CCMask == SystemZ::CCMASK_CMP_GE ||
+ C.CCMask == SystemZ::CCMASK_CMP_LE) {
+ std::swap(TrueOp, FalseOp);
+ C.CCMask ^= C.CCValid;
+ }
+ if (C.CCMask == SystemZ::CCMASK_CMP_LT) {
+ std::swap(C.Op0, C.Op1);
+ C.CCMask = SystemZ::CCMASK_CMP_GT;
+ }
+ switch (C.CCMask) {
+ case SystemZ::CCMASK_CMP_EQ:
+ Op = SystemZISD::VICMPE;
+ break;
+ case SystemZ::CCMASK_CMP_GT:
+ if (C.ICmpType == SystemZICMP::UnsignedOnly)
+ Op = SystemZISD::VICMPHL;
+ else
+ Op = SystemZISD::VICMPH;
+ break;
+ default:
+ llvm_unreachable("Unhandled comparison");
+ break;
+ }
+
+ SDValue Mask = DAG.getNode(Op, DL, VT, C.Op0, C.Op1);
+ TrueOp = DAG.getNode(ISD::AND, DL, VT, TrueOp, Mask);
+ FalseOp = DAG.getNode(ISD::AND, DL, VT, FalseOp, DAG.getNOT(DL, Mask, VT));
+ return DAG.getNode(ISD::OR, DL, VT, TrueOp, FalseOp);
+}
+
SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
SelectionDAG &DAG) const {
SDValue CmpOp0 = Op.getOperand(0);
@@ -3759,6 +3840,13 @@ SDValue SystemZTargetLowering::lowerSELECT_CC(SDValue Op,
return getAbsolute(DAG, DL, FalseOp, C.CCMask & SystemZ::CCMASK_CMP_GT);
}
+ if (Subtarget.hasVectorEnhancements3() &&
+ C.Opcode == SystemZISD::ICMP &&
+ C.Op0.getValueType() == MVT::i128 &&
+ TrueOp.getValueType() == MVT::i128) {
+ return getI128Select(DAG, DL, C, TrueOp, FalseOp);
+ }
+
SDValue CCReg = emitCmp(DAG, DL, C);
SDValue Ops[] = {TrueOp, FalseOp,
DAG.getTargetConstant(C.CCValid, DL, MVT::i32),
@@ -4371,6 +4459,24 @@ SDValue SystemZTargetLowering::lowerGET_DYNAMIC_AREA_OFFSET(
return DAG.getNode(SystemZISD::ADJDYNALLOC, DL, MVT::i64);
}
+SDValue SystemZTargetLowering::lowerMULH(SDValue Op,
+ SelectionDAG &DAG,
+ unsigned Opcode) const {
+ EVT VT = Op.getValueType();
+ SDLoc DL(Op);
+ SDValue Even, Odd;
+
+ // This custom expander is only used on arch15 and later for 64-bit types.
+ assert(!is32Bit(VT));
+ assert(Subtarget.hasMiscellaneousExtensions2());
+
+ // SystemZISD::xMUL_LOHI returns the low result in the odd register and
+ // the high result in the even register. Return the latter.
+ lowerGR128Binary(DAG, DL, VT, Opcode,
+ Op.getOperand(0), Op.getOperand(1), Even, Odd);
+ return Even;
+}
+
SDValue SystemZTargetLowering::lowerSMUL_LOHI(SDValue Op,
SelectionDAG &DAG) const {
EVT VT = Op.getValueType();
@@ -5118,24 +5224,28 @@ SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::s390_vuphb:
case Intrinsic::s390_vuphh:
case Intrinsic::s390_vuphf:
+ case Intrinsic::s390_vuphg:
return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
case Intrinsic::s390_vuplhb:
case Intrinsic::s390_vuplhh:
case Intrinsic::s390_vuplhf:
+ case Intrinsic::s390_vuplhg:
return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
case Intrinsic::s390_vuplb:
case Intrinsic::s390_vuplhw:
case Intrinsic::s390_vuplf:
+ case Intrinsic::s390_vuplg:
return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
case Intrinsic::s390_vupllb:
case Intrinsic::s390_vupllh:
case Intrinsic::s390_vupllf:
+ case Intrinsic::s390_vupllg:
return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
Op.getOperand(1));
@@ -6442,6 +6552,10 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::GET_DYNAMIC_AREA_OFFSET:
return lowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
+ case ISD::MULHS:
+ return lowerMULH(Op, DAG, SystemZISD::SMUL_LOHI);
+ case ISD::MULHU:
+ return lowerMULH(Op, DAG, SystemZISD::UMUL_LOHI);
case ISD::SMUL_LOHI:
return lowerSMUL_LOHI(Op, DAG);
case ISD::UMUL_LOHI:
@@ -7946,9 +8060,9 @@ static bool combineCCMask(SDValue &CCReg, int &CCValid, int &CCMask) {
auto *FalseVal = dyn_cast<ConstantSDNode>(CompareLHS->getOperand(1));
if (!FalseVal)
return false;
- if (CompareRHS->getZExtValue() == FalseVal->getZExtValue())
+ if (CompareRHS->getAPIntValue() == FalseVal->getAPIntValue())
Invert = !Invert;
- else if (CompareRHS->getZExtValue() != TrueVal->getZExtValue())
+ else if (CompareRHS->getAPIntValue() != TrueVal->getAPIntValue())
return false;
// Compute the effective CC mask for the new branch or select.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index afd3d0d989a225..839a5500124442 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -698,6 +698,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue lowerDYNAMIC_STACKALLOC_ELF(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerDYNAMIC_STACKALLOC_XPLINK(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerMULH(SDValue Op, SelectionDAG &DAG, unsigned Opcode) const;
SDValue lowerSMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSDIVREM(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
index ae8f669e9bab43..e16f3ed5f9fbca 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrFormats.td
@@ -1337,6 +1337,74 @@ class InstVRIi<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
let Inst{7-0} = op{7-0};
}
+class InstVRIj<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<8> I3;
+ bits<4> M4;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-24} = 0;
+ let Inst{23-20} = M4;
+ let Inst{19-12} = I3;
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9-8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIk<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<5> V3;
+ bits<5> V4;
+ bits<8> I5;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = V1{3-0};
+ let Inst{35-32} = V2{3-0};
+ let Inst{31-28} = V3{3-0};
+ let Inst{27-24} = 0;
+ let Inst{23-16} = I5;
+ let Inst{15-12} = V4{3-0};
+ let Inst{11} = V1{4};
+ let Inst{10} = V2{4};
+ let Inst{9} = V3{4};
+ let Inst{8} = V4{4};
+ let Inst{7-0} = op{7-0};
+}
+
+class InstVRIl<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
+ : InstSystemZ<6, outs, ins, asmstr, pattern> {
+ field bits<48> Inst;
+ field bits<48> SoftFail = 0;
+
+ bits<5> V1;
+ bits<5> V2;
+ bits<16> I3;
+
+ let Inst{47-40} = op{15-8};
+ let Inst{39-36} = 0;
+ let Inst{35-32} = V1{3-0};
+ let Inst{31-28} = V2{3-0};
+ let Inst{27-12} = I3;
+ let Inst{11} = 0;
+ let Inst{10} = V1{4};
+ let Inst{9} = V2{4};
+ let Inst{8} = 0;
+ let Inst{7-0} = op{7-0};
+}
+
// Depending on the instruction mnemonic, certain bits may be or-ed into
// the M4 value provided as explicit operand. These are passed as m4or.
class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
@@ -1511,11 +1579,13 @@ class InstVRRg<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
field bits<48> SoftFail = 0;
bits<5> V1;
+ bits<16> I2;
let Inst{47-40} = op{15-8};
let Inst{39-36} = 0;
let Inst{35-32} = V1{3-0};
- let Inst{31-12} = 0;
+ let Inst{31-28} = 0;
+ let Inst{27-12} = I2;
let Inst{11} = 0;
let Inst{10} = V1{4};
let Inst{9-8} = 0;
@@ -2187,6 +2257,9 @@ multiclass MnemonicCondBranchAlias<CondVariant V, string from, string to,
// LoadAddress:
// One register output operand and one address operand.
//
+// LoadIndexedAddress:
+// One register output operand and one indexed address operand.
+//
// SideEffectAddress:
// One address operand. No output operands, but causes some side effect.
//
@@ -3079,6 +3152,32 @@ class LoadAddressRIL<string mnemonic, bits<12> opcode,
mnemonic#"\t$R1, $RI2",
[(set GR64:$R1, (operator pcrel32:$RI2))]>;
+multiclass LoadIndexedAddressRXY<string mnemonic, bits<16> opcode,
+ SDPatternOperator ext,
+ SDPatternOperator shift = bitconvert> {
+ def "" : InstRXYa<opcode, (outs GR64:$R1),
+ (ins (lxaaddr20only $B2, $D2, $X2):$XBD2),
+ mnemonic#"\t$R1, $XBD2", []>;
+
+ // Patterns matching LXA with displacement.
+ def : Pat<(add ADDR64:$base,
+ (shift (i64 (ext (add ADDR32:$index, disp20imm32:$disp))))),
+ (!cast<Instruction>(NAME) ADDR64:$base, imm32:$disp, ADDR32:$index)>;
+ def : Pat<(shift (i64 (ext (add ADDR32:$index, disp20imm32:$disp)))),
+ (!cast<Instruction>(NAME) zero_reg, imm32:$disp, ADDR32:$index)>;
+
+ // Patterns matching LXA without displacement. These are only beneficial
+ // if we have a non-trivial shift. Also, we need to add some complexity
+ // to account for the fact that the regular shift patterns have rather
+ // high complexity values due to allowing base + displacement.
+ if !ne(shift, bitconvert) then let AddedComplexity = 2 in {
+ def : Pat<(add ADDR64:$base, (shift (i64 (ext ADDR32:$index)))),
+ (!cast<Instruction>(NAME) ADDR64:$base, 0, ADDR32:$index)>;
+ def : Pat<(shift (i64 (ext ADDR32:$index))),
+ (!cast<Instruction>(NAME) zero_reg, 0, ADDR32:$index)>;
+ }
+}
+
class UnaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
RegisterOperand cls1, RegisterOperand cls2>
: InstRR<opcode, (outs cls1:$R1), (ins cls2:$R2),
@@ -4453,7 +4552,17 @@ class TestRSL<string mnemonic, bits<16> opcode>
class TestVRRg<string mnemonic, bits<16> opcode>
: InstVRRg<opcode, (outs), (ins VR128:$V1),
- mnemonic#"\t$V1", []>;
+ mnemonic#"\t$V1", []> {
+ let I2 = 0;
+}
+
+class TestExtraVRRg<string mnemonic, bits<16> opcode>
+ : InstVRRg<opcode, (outs), (ins VR128:$V1, imm32zx16:$I2),
+ mnemonic#"\t$V1, $I2", []>;
+
+class TestExtraVRIl<string mnemonic, bits<16> opcode>
+ : InstVRIl<opcode, (outs), (ins VR128:$V1, VR128:$V2, imm32zx16:$I3),
+ mnemonic#"\t$V1, $V2, $I3", []>;
class SideEffectTernarySSc<string mnemonic, bits<8> opcode>
: InstSSc<opcode, (outs), (ins (bdladdr12onlylen4 $B1, $D1, $L1):$BDL1,
@@ -4675,6 +4784,11 @@ class TernaryVRIi<string mnemonic, bits<16> opcode, RegisterOperand cls>
(ins cls:$R2, imm32zx8:$I3, imm32zx4:$M4),
mnemonic#"\t$V1, $R2, $I3, $M4", []>;
+class TernaryVRIj<string mnemonic, bits<16> opcode>
+ : InstVRIj<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, imm32zx8:$I3, imm32zx4:$M4),
+ mnemonic#"\t$V1, $V2, $I3, $M4", []>;
+
class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or>
: InstVRRa<opcode, (outs tr1.op:$V1),
@@ -4748,6 +4862,26 @@ class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
let M6 = 0;
}
+class TernaryVRRcInt<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
+ bits<4> type = 0>
+ : InstVRRc<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M5",
+ [(set (tr1.vt tr1.op:$V1), (operator (tr2.vt tr2.op:$V2),
+ (tr2.vt tr2.op:$V3),
+ imm32zx4_timm:$M5))]> {
+ let M4 = type;
+ let M6 = 0;
+}
+
+class TernaryVRRcIntGeneric<string mnemonic, bits<16> opcode>
+ : InstVRRc<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, imm32zx4:$M4, imm32zx4:$M5),
+ mnemonic#"\t$V1, $V2, $V3, $M4, $M5", []> {
+ let M6 = 0;
+}
+
class TernaryVRRcFloat<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
bits<4> type = 0, bits<4> m5 = 0>
@@ -4926,6 +5060,16 @@ class QuaternaryVRIg<string mnemonic, bits<16> opcode>
imm32zx8:$I4, imm32zx4:$M5),
mnemonic#"\t$V1, $V2, $I3, $I4, $M5", []>;
+class QuaternaryVRIk<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator, TypedReg tr>
+ : InstVRIk<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, VR128:$V4, imm32zx8:$I5),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $I5",
+ [(set (tr.vt tr.op:$V1), (operator (tr.vt tr.op:$V2),
+ (tr.vt tr.op:$V3),
+ (tr.vt tr.op:$V4),
+ imm32zx8_timm:$I5))]>;
+
class QuaternaryVRRd<string mnemonic, bits<16> opcode,
SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
TypedReg tr3, TypedReg tr4, bits<4> type,
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
index e70ae5dadcb02d..adfd0a19859c2f 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -884,6 +884,39 @@ let isAsCheapAsAMove = 1, isMoveImm = 1, isReMaterializable = 1 in
def GOT : Alias<6, (outs GR64:$R1), (ins),
[(set GR64:$R1, (global_offset_table))]>;
+// Load (logical) indexed address.
+let Predicates = [FeatureMiscellaneousExtensions4] in {
+ defm LXAB : LoadIndexedAddressRXY<"lxab", 0xE360, sext32>;
+ defm LXAH : LoadIndexedAddressRXY<"lxah", 0xE362, sext32, shl1>;
+ defm LXAF : LoadIndexedAddressRXY<"lxaf", 0xE364, sext32, shl2>;
+ defm LXAG : LoadIndexedAddressRXY<"lxag", 0xE366, sext32, shl3>;
+ defm LXAQ : LoadIndexedAddressRXY<"lxaq", 0xE368, sext32, shl4>;
+ defm LLXAB : LoadIndexedAddressRXY<"llxab", 0xE361, zext32>;
+ defm LLXAH : LoadIndexedAddressRXY<"llxah", 0xE363, zext32, shl1>;
+ defm LLXAF : LoadIndexedAddressRXY<"llxaf", 0xE365, zext32, shl2>;
+ defm LLXAG : LoadIndexedAddressRXY<"llxag", 0xE367, zext32, shl3>;
+ defm LLXAQ : LoadIndexedAddressRXY<"llxaq", 0xE369, zext32, shl4>;
+
+ // Peepholes to use load (logical) indexed address to implement
+ // add + shift of an already extended value.
+ def : Pat<(add ADDR64:$base, (shl1 (assertsext32 ADDR64:$index))),
+ (LXAH ADDR64:$base, 0, (EXTRACT_SUBREG ADDR64:$index, subreg_l32))>;
+ def : Pat<(add ADDR64:$base, (shl2 (assertsext32 ADDR64:$index))),
+ (LXAF ADDR64:$base, 0, (EXTRACT_SUBREG ADDR64:$index, subreg_l32))>;
+ def : Pat<(add ADDR64:$base, (shl3 (assertsext32 ADDR64:$index))),
+ (LXAG ADDR64:$base, 0, (EXTRACT_SUBREG ADDR64:$index, subreg_l32))>;
+ def : Pat<(add ADDR64:$base, (shl4 (assertsext32 ADDR64:$index))),
+ (LXAQ ADDR64:$base, 0, (EXTRACT_SUBREG ADDR64:$index, subreg_l32))>;
+ def : Pat<(add ADDR64:$base, (shl1 (assertzext32 ADDR64:$index))),
+ (LLXAH ADDR64:$base, 0, (EXTRACT_SUBREG ADDR64:$index, subreg_l32))>;
+ def : Pat<(add ADDR64:$base, (shl2 (assertzext32 ADDR64:$index))),
+ (LLXAF ADDR64:$base, 0, (EXTRACT_SUBREG ADDR64:$index, subreg_l32))>;
+ def : Pat<(add ADDR64:$base, (shl3 (assertzext32 ADDR64:$index))),
+ (LLXAG ADDR64:$base, 0, (EXTRACT_SUBREG ADDR64:$index, subreg_l32))>;
+ def : Pat<(add ADDR64:$base, (shl4 (assertzext32 ADDR64:$index))),
+ (LLXAQ ADDR64:$base, 0, (EXTRACT_SUBREG ADDR64:$index, subreg_l32))>;
+}
+
//===----------------------------------------------------------------------===//
// Absolute and Negation
//===----------------------------------------------------------------------===//
@@ -1821,6 +1854,19 @@ let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
def LPDG : BinarySSF<"lpdg", 0xC85, GR128>;
}
+// Compare and load.
+let Predicates = [FeatureConcurrentFunctions], Defs = [CC] in {
+ def CAL : BinarySSF<"cal", 0xC86, GR32>;
+ def CALGF : BinarySSF<"calgf", 0xC8F, GR64>;
+ def CALG : BinarySSF<"calg", 0xC87, GR64>;
+}
+
+// Perform function with concurrent results.
+let Predicates = [FeatureConcurrentFunctions], Uses = [R0D], Defs = [CC],
+ mayLoad = 1, mayStore = 1, hasSideEffects = 1 in {
+ def PFCR : BinaryRSY<"pfcr", 0xEB16, null_frag, GR64>;
+}
+
//===----------------------------------------------------------------------===//
// Translate and convert
//===----------------------------------------------------------------------===//
@@ -1910,6 +1956,11 @@ let mayLoad = 1, mayStore = 1, Uses = [R0L, R1D], Defs = [CC] in {
let Predicates = [FeatureMessageSecurityAssist9] in
def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>;
+
+ let Predicates = [FeatureMessageSecurityAssist12] in {
+ def KIMDOpt : SideEffectTernaryMemMemRRFc<"kimd", 0xB93E, GR64, GR128, imm32zx4>;
+ def KLMDOpt : SideEffectTernaryMemMemRRFc<"klmd", 0xB93F, GR64, GR128, imm32zx4>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -2081,6 +2132,12 @@ let Predicates = [FeatureProcessorAssist] in {
// Miscellaneous Instructions.
//===----------------------------------------------------------------------===//
+// Count leading/trailing zeros.
+let Predicates = [FeatureMiscellaneousExtensions4] in {
+ def CLZG : UnaryRRE<"clzg", 0xB968, ctlz, GR64, GR64>;
+ def CTZG : UnaryRRE<"ctzg", 0xB969, cttz, GR64, GR64>;
+}
+
// Find leftmost one, AKA count leading zeros. The instruction actually
// returns a pair of GR64s, the first giving the number of leading zeros
// and the second giving a copy of the source with the leftmost one bit
@@ -2099,6 +2156,12 @@ let Predicates = [FeatureMiscellaneousExtensions3] in {
let Predicates = [FeaturePopulationCount], Defs = [CC] in
def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
+// Bit deposit and bit extract.
+let Predicates = [FeatureMiscellaneousExtensions4] in {
+ def BDEPG : BinaryRRFa<"bdepg", 0xB96D, int_s390_bdepg, GR64, GR64, GR64>;
+ def BEXTG : BinaryRRFa<"bextg", 0xB96C, int_s390_bextg, GR64, GR64, GR64>;
+}
+
// Search a block of memory for a character.
let mayLoad = 1, Defs = [CC] in
defm SRST : StringRRE<"srst", 0xB25E, z_search_string>;
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index c09f48891c1391..edd20a5de8c63f 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -377,6 +377,16 @@ let Predicates = [FeatureVector] in {
// Select.
def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>;
+
+ // Blend.
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def VBLEND : TernaryVRRdGeneric<"vblend", 0xE789>;
+ def VBLENDB : TernaryVRRd<"vblendb", 0xE789, null_frag, v128b, v128b, 0>;
+ def VBLENDH : TernaryVRRd<"vblendh", 0xE789, null_frag, v128h, v128h, 1>;
+ def VBLENDF : TernaryVRRd<"vblendf", 0xE789, null_frag, v128f, v128f, 2>;
+ def VBLENDG : TernaryVRRd<"vblendg", 0xE789, null_frag, v128g, v128g, 3>;
+ def VBLENDQ : TernaryVRRd<"vblendq", 0xE789, null_frag, v128q, v128q, 4>;
+ }
}
//===----------------------------------------------------------------------===//
@@ -417,29 +427,47 @@ let Predicates = [FeatureVector] in {
def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>;
def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>;
+ // Generate element masks.
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def VGEM : UnaryVRRaGeneric<"vgem", 0xE754>;
+ def VGEMB : UnaryVRRa<"vgemb", 0xE754, int_s390_vgemb, v128b, v128h, 0>;
+ def VGEMH : UnaryVRRa<"vgemh", 0xE754, int_s390_vgemh, v128h, v128b, 1>;
+ def VGEMF : UnaryVRRa<"vgemf", 0xE754, int_s390_vgemf, v128f, v128b, 2>;
+ def VGEMG : UnaryVRRa<"vgemg", 0xE754, int_s390_vgemg, v128g, v128b, 3>;
+ def VGEMQ : UnaryVRRa<"vgemq", 0xE754, int_s390_vgemq, v128q, v128b, 4>;
+ }
+
// Unpack high.
def VUPH : UnaryVRRaGeneric<"vuph", 0xE7D7>;
def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>;
def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>;
def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VUPHG : UnaryVRRa<"vuphg", 0xE7D7, z_unpack_high, v128q, v128g, 3>;
// Unpack logical high.
def VUPLH : UnaryVRRaGeneric<"vuplh", 0xE7D5>;
def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>;
def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>;
def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VUPLHG : UnaryVRRa<"vuplhg", 0xE7D5, z_unpackl_high, v128q, v128g, 3>;
// Unpack low.
def VUPL : UnaryVRRaGeneric<"vupl", 0xE7D6>;
def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>;
def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>;
def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VUPLG : UnaryVRRa<"vuplg", 0xE7D6, z_unpack_low, v128q, v128g, 3>;
// Unpack logical low.
def VUPLL : UnaryVRRaGeneric<"vupll", 0xE7D4>;
def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>;
def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>;
def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VUPLLG : UnaryVRRa<"vupllg", 0xE7D4, z_unpackl_low, v128q, v128g, 3>;
}
//===----------------------------------------------------------------------===//
@@ -466,6 +494,31 @@ defm : GenericVectorOps<v2i64, v2i64>;
defm : GenericVectorOps<v4f32, v4i32>;
defm : GenericVectorOps<v2f64, v2i64>;
+multiclass BlendVectorOps<ValueType type, ValueType inttype,
+ Instruction blend> {
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def : Pat<(type (vselect (inttype (z_vicmpl_zero VR128:$x)),
+ VR128:$y, VR128:$z)),
+ (blend VR128:$y, VR128:$z, VR128:$x)>;
+ def : Pat<(type (vselect (inttype (z_vnot (z_vicmpl_zero VR128:$x))),
+ VR128:$y, VR128:$z)),
+ (blend VR128:$z, VR128:$y, VR128:$x)>;
+ }
+}
+
+defm : BlendVectorOps<v16i8, v16i8, VBLENDB>;
+defm : BlendVectorOps<v8i16, v8i16, VBLENDH>;
+defm : BlendVectorOps<v4i32, v4i32, VBLENDF>;
+defm : BlendVectorOps<v2i64, v2i64, VBLENDG>;
+defm : BlendVectorOps<v4f32, v4i32, VBLENDF>;
+defm : BlendVectorOps<v2f64, v2i64, VBLENDG>;
+
+let Predicates = [FeatureVectorEnhancements3] in {
+ def : Pat<(i128 (or (and VR128:$y, (z_vicmph 0, VR128:$x)),
+ (and VR128:$z, (not (z_vicmph 0, VR128:$x))))),
+ (VBLENDQ VR128:$y, VR128:$z, VR128:$x)>;
+}
+
//===----------------------------------------------------------------------===//
// Integer arithmetic
//===----------------------------------------------------------------------===//
@@ -513,6 +566,8 @@ let Predicates = [FeatureVector] in {
def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VAVGQ : BinaryVRRc<"vavgq", 0xE7F2, int_s390_vavgq, v128q, v128q, 4>;
// Average logical.
def VAVGL : BinaryVRRcGeneric<"vavgl", 0xE7F0>;
@@ -520,6 +575,8 @@ let Predicates = [FeatureVector] in {
def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VAVGLQ : BinaryVRRc<"vavglq", 0xE7F0, int_s390_vavglq, v128q, v128q, 4>;
}
// Checksum.
@@ -531,6 +588,8 @@ let Predicates = [FeatureVector] in {
def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>;
def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>;
def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VCLZQ : UnaryVRRa<"vclzq", 0xE753, ctlz, v128q, v128q, 4>;
// Count trailing zeros.
def VCTZ : UnaryVRRaGeneric<"vctz", 0xE752>;
@@ -538,6 +597,38 @@ let Predicates = [FeatureVector] in {
def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>;
def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VCTZQ : UnaryVRRa<"vctzq", 0xE752, cttz, v128q, v128q, 4>;
+
+ // Divide.
+ let Predicates = [FeatureVectorEnhancements3] in {
+ let hasSideEffects = 1 in {
+ def VD : TernaryVRRcIntGeneric<"vd", 0xE7B2>;
+ def VDF : TernaryVRRcInt<"vdf", 0xE7B2, null_frag, v128f, v128f, 2>;
+ def VDG : TernaryVRRcInt<"vdg", 0xE7B2, null_frag, v128g, v128g, 3>;
+ def VDQ : TernaryVRRcInt<"vdq", 0xE7B2, null_frag, v128q, v128q, 4>;
+ }
+ def : Pat<(v4i32 (sdiv VR128:$x, VR128:$y)), (VDF VR128:$x, VR128:$y, 0)>;
+ def : Pat<(v2i64 (sdiv VR128:$x, VR128:$y)), (VDG VR128:$x, VR128:$y, 0)>;
+ def : Pat<(i128 (sdiv VR128:$x, VR128:$y)), (VDQ VR128:$x, VR128:$y, 0)>;
+ }
+
+ // Divide logical.
+ let Predicates = [FeatureVectorEnhancements3] in {
+ let hasSideEffects = 1 in {
+ def VDL : TernaryVRRcIntGeneric<"vdl", 0xE7B0>;
+ def VDLF : TernaryVRRcInt<"vdlf", 0xE7B0, null_frag, v128f, v128f, 2>;
+ def VDLG : TernaryVRRcInt<"vdlg", 0xE7B0, null_frag, v128g, v128g, 3>;
+ def VDLQ : TernaryVRRcInt<"vdlq", 0xE7B0, null_frag, v128q, v128q, 4>;
+ }
+ def : Pat<(v4i32 (udiv VR128:$x, VR128:$y)), (VDLF VR128:$x, VR128:$y, 0)>;
+ def : Pat<(v2i64 (udiv VR128:$x, VR128:$y)), (VDLG VR128:$x, VR128:$y, 0)>;
+ def : Pat<(i128 (udiv VR128:$x, VR128:$y)), (VDLQ VR128:$x, VR128:$y, 0)>;
+ }
+
+ // Evaluate.
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VEVAL : QuaternaryVRIk<"veval", 0xE788, int_s390_veval, v128b>;
let isCommutable = 1 in {
// Not exclusive or.
@@ -568,6 +659,8 @@ let Predicates = [FeatureVector] in {
def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>;
def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>;
def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VLCQ : UnaryVRRa<"vlcq", 0xE7DE, ineg, v128q, v128q, 4>;
// Load positive.
def VLP : UnaryVRRaGeneric<"vlp", 0xE7DF>;
@@ -575,6 +668,8 @@ let Predicates = [FeatureVector] in {
def VLPH : UnaryVRRa<"vlph", 0xE7DF, abs, v128h, v128h, 1>;
def VLPF : UnaryVRRa<"vlpf", 0xE7DF, abs, v128f, v128f, 2>;
def VLPG : UnaryVRRa<"vlpg", 0xE7DF, abs, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VLPQ : UnaryVRRa<"vlpq", 0xE7DF, abs, v128q, v128q, 4>;
let isCommutable = 1 in {
// Maximum.
@@ -583,6 +678,8 @@ let Predicates = [FeatureVector] in {
def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMXQ : BinaryVRRc<"vmxq", 0xE7FF, null_frag, v128q, v128q, 4>;
// Maximum logical.
def VMXL : BinaryVRRcGeneric<"vmxl", 0xE7FD>;
@@ -590,6 +687,8 @@ let Predicates = [FeatureVector] in {
def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMXLQ : BinaryVRRc<"vmxlq", 0xE7FD, null_frag, v128q, v128q, 4>;
}
let isCommutable = 1 in {
@@ -599,6 +698,8 @@ let Predicates = [FeatureVector] in {
def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMNQ : BinaryVRRc<"vmnq", 0xE7FE, null_frag, v128q, v128q, 4>;
// Minimum logical.
def VMNL : BinaryVRRcGeneric<"vmnl", 0xE7FC>;
@@ -606,6 +707,8 @@ let Predicates = [FeatureVector] in {
def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMNLQ : BinaryVRRc<"vmnlq", 0xE7FC, null_frag, v128q, v128q, 4>;
}
let isCommutable = 1 in {
@@ -614,42 +717,62 @@ let Predicates = [FeatureVector] in {
def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>;
def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def VMALG : TernaryVRRd<"vmalg", 0xE7AA, z_muladd, v128g, v128g, 3>;
+ def VMALQ : TernaryVRRd<"vmalq", 0xE7AA, z_muladd, v128q, v128q, 4>;
+ }
// Multiply and add high.
def VMAH : TernaryVRRdGeneric<"vmah", 0xE7AB>;
def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def VMAHG : TernaryVRRd<"vmahg", 0xE7AB, int_s390_vmahg, v128g, v128g, 3>;
+ def VMAHQ : TernaryVRRd<"vmahq", 0xE7AB, int_s390_vmahq, v128q, v128q, 4>;
+ }
// Multiply and add logical high.
def VMALH : TernaryVRRdGeneric<"vmalh", 0xE7A9>;
def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def VMALHG : TernaryVRRd<"vmalhg", 0xE7A9, int_s390_vmalhg, v128g, v128g, 3>;
+ def VMALHQ : TernaryVRRd<"vmalhq", 0xE7A9, int_s390_vmalhq, v128q, v128q, 4>;
+ }
// Multiply and add even.
def VMAE : TernaryVRRdGeneric<"vmae", 0xE7AE>;
def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMAEG : TernaryVRRd<"vmaeg", 0xE7AE, int_s390_vmaeg, v128q, v128g, 3>;
// Multiply and add logical even.
def VMALE : TernaryVRRdGeneric<"vmale", 0xE7AC>;
def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMALEG : TernaryVRRd<"vmaleg", 0xE7AC, int_s390_vmaleg, v128q, v128g, 3>;
// Multiply and add odd.
def VMAO : TernaryVRRdGeneric<"vmao", 0xE7AF>;
def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMAOG : TernaryVRRd<"vmaog", 0xE7AF, int_s390_vmaog, v128q, v128g, 3>;
// Multiply and add logical odd.
def VMALO : TernaryVRRdGeneric<"vmalo", 0xE7AD>;
def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMALOG : TernaryVRRd<"vmalog", 0xE7AD, int_s390_vmalog, v128q, v128g, 3>;
}
let isCommutable = 1 in {
@@ -658,42 +781,66 @@ let Predicates = [FeatureVector] in {
def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def VMHG : BinaryVRRc<"vmhg", 0xE7A3, int_s390_vmhg, v128g, v128g, 3>;
+ def VMHQ : BinaryVRRc<"vmhq", 0xE7A3, int_s390_vmhq, v128q, v128q, 4>;
+ }
// Multiply logical high.
def VMLH : BinaryVRRcGeneric<"vmlh", 0xE7A1>;
def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def VMLHG : BinaryVRRc<"vmlhg", 0xE7A1, int_s390_vmlhg, v128g, v128g, 3>;
+ def VMLHQ : BinaryVRRc<"vmlhq", 0xE7A1, int_s390_vmlhq, v128q, v128q, 4>;
+ }
// Multiply low.
def VML : BinaryVRRcGeneric<"vml", 0xE7A2>;
def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>;
def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def VMLG : BinaryVRRc<"vmlg", 0xE7A2, mul, v128g, v128g, 3>;
+ def VMLQ : BinaryVRRc<"vmlq", 0xE7A2, mul, v128q, v128q, 4>;
+ }
// Multiply even.
def VME : BinaryVRRcGeneric<"vme", 0xE7A6>;
def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMEG : BinaryVRRc<"vmeg", 0xE7A6, int_s390_vmeg, v128q, v128g, 3>;
// Multiply logical even.
def VMLE : BinaryVRRcGeneric<"vmle", 0xE7A4>;
def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMLEG : BinaryVRRc<"vmleg", 0xE7A4, int_s390_vmleg, v128q, v128g, 3>;
// Multiply odd.
def VMO : BinaryVRRcGeneric<"vmo", 0xE7A7>;
def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMOG : BinaryVRRc<"vmog", 0xE7A7, int_s390_vmog, v128q, v128g, 3>;
// Multiply logical odd.
def VMLO : BinaryVRRcGeneric<"vmlo", 0xE7A5>;
def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VMLOG : BinaryVRRc<"vmlog", 0xE7A5, int_s390_vmlog, v128q, v128g, 3>;
+ }
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def : Pat<(i128 (mulhs VR128:$x, VR128:$y)), (VMHQ VR128:$x, VR128:$y)>;
+ def : Pat<(i128 (mulhu VR128:$x, VR128:$y)), (VMLHQ VR128:$x, VR128:$y)>;
}
// Multiply sum logical.
@@ -730,6 +877,32 @@ let Predicates = [FeatureVector] in {
def VPOPCTG : UnaryVRRa<"vpopctg", 0xE750, ctpop, v128g, v128g, 3>;
}
+ // Remainder.
+ let Predicates = [FeatureVectorEnhancements3] in {
+ let hasSideEffects = 1 in {
+ def VR : TernaryVRRcIntGeneric<"vr", 0xE7B3>;
+ def VRF : TernaryVRRcInt<"vrf", 0xE7B3, null_frag, v128f, v128f, 2>;
+ def VRG : TernaryVRRcInt<"vrg", 0xE7B3, null_frag, v128g, v128g, 3>;
+ def VRQ : TernaryVRRcInt<"vrq", 0xE7B3, null_frag, v128q, v128q, 4>;
+ }
+ def : Pat<(v4i32 (srem VR128:$x, VR128:$y)), (VRF VR128:$x, VR128:$y, 0)>;
+ def : Pat<(v2i64 (srem VR128:$x, VR128:$y)), (VRG VR128:$x, VR128:$y, 0)>;
+ def : Pat<(i128 (srem VR128:$x, VR128:$y)), (VRQ VR128:$x, VR128:$y, 0)>;
+ }
+
+ // Remainder logical.
+ let Predicates = [FeatureVectorEnhancements3] in {
+ let hasSideEffects = 1 in {
+ def VRL : TernaryVRRcIntGeneric<"vrl", 0xE7B1>;
+ def VRLF : TernaryVRRcInt<"vrlf", 0xE7B1, null_frag, v128f, v128f, 2>;
+ def VRLG : TernaryVRRcInt<"vrlg", 0xE7B1, null_frag, v128g, v128g, 3>;
+ def VRLQ : TernaryVRRcInt<"vrlq", 0xE7B1, null_frag, v128q, v128q, 4>;
+ }
+ def : Pat<(v4i32 (urem VR128:$x, VR128:$y)), (VRLF VR128:$x, VR128:$y, 0)>;
+ def : Pat<(v2i64 (urem VR128:$x, VR128:$y)), (VRLG VR128:$x, VR128:$y, 0)>;
+ def : Pat<(i128 (urem VR128:$x, VR128:$y)), (VRLQ VR128:$x, VR128:$y, 0)>;
+ }
+
// Element rotate left logical (with vector shift amount).
def VERLLV : BinaryVRRcGeneric<"verllv", 0xE773>;
def VERLLVB : BinaryVRRc<"verllvb", 0xE773, rotl, v128b, v128b, 0>;
@@ -887,6 +1060,144 @@ multiclass BitwiseVectorOps<ValueType type, SDPatternOperator not_op> {
def : Pat<(type (or VR128:$x, (not_op VR128:$y))),
(VOC VR128:$x, VR128:$y)>;
}
+ let Predicates = [FeatureVectorEnhancements3] in {
+ def : Pat<(type (and VR128:$x, (and VR128:$y, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 1)>;
+ def : Pat<(type (and (not_op VR128:$z), (and VR128:$x, VR128:$y))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 2)>;
+ def : Pat<(type (and VR128:$x, (xor VR128:$y, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 6)>;
+ def : Pat<(type (and VR128:$x, (or VR128:$y, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 7)>;
+ def : Pat<(type (and VR128:$x, (not_op (or VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 8)>;
+ def : Pat<(type (and VR128:$x, (not_op (xor VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 9)>;
+ def : Pat<(type (and VR128:$x, (or VR128:$y, (not_op VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 11)>;
+ def : Pat<(type (and VR128:$x, (not_op (and VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 14)>;
+ def : Pat<(type (and (or VR128:$x, VR128:$y), (xor VR128:$z, (and VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 22)>;
+ def : Pat<(type (or (and VR128:$x, VR128:$y), (and VR128:$z, (or VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 23)>;
+ def : Pat<(type (and (xor VR128:$x, VR128:$y), (xor VR128:$x, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 24)>;
+ def : Pat<(type (and (or VR128:$x, VR128:$y), (not_op (xor VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 25)>;
+ def : Pat<(type (and (or VR128:$x, VR128:$y), (xor VR128:$x, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 26)>;
+ def : Pat<(type (and (or VR128:$x, VR128:$z), (or VR128:$y, (not_op VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 27)>;
+ def : Pat<(type (xor VR128:$x, (and VR128:$y, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 30)>;
+ def : Pat<(type (or VR128:$x, (and VR128:$y, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 31)>;
+ def : Pat<(type (and (not_op VR128:$z), (xor VR128:$x, VR128:$y))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 40)>;
+ def : Pat<(type (and (or VR128:$x, VR128:$y), (not_op (xor VR128:$z, (and VR128:$x, VR128:$y))))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 41)>;
+ def : Pat<(type (and (not_op VR128:$z), (or VR128:$x, VR128:$y))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 42)>;
+ def : Pat<(type (or (and VR128:$x, VR128:$y), (and (not_op VR128:$z), (or VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 43)>;
+ def : Pat<(type (xor VR128:$y, (or VR128:$x, (and VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 44)>;
+ def : Pat<(type (xor VR128:$x, (and VR128:$y, (not_op VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 45)>;
+ def : Pat<(type (and (or VR128:$x, VR128:$y), (not_op (and VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 46)>;
+ def : Pat<(type (or VR128:$x, (and VR128:$y, (not_op VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 47)>;
+ def : Pat<(type (or (xor VR128:$x, VR128:$y), (and VR128:$x, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 61)>;
+ def : Pat<(type (or (xor VR128:$x, VR128:$y), (and VR128:$x, (not_op VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 62)>;
+ def : Pat<(type (xor (or VR128:$x, VR128:$y), (or VR128:$z, (and VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 104)>;
+ def : Pat<(type (xor VR128:$x, (xor VR128:$y, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 105)>;
+ def : Pat<(type (xor VR128:$z, (or VR128:$x, VR128:$y))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 106)>;
+ def : Pat<(type (or (and VR128:$x, VR128:$y), (xor VR128:$z, (or VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 107)>;
+ def : Pat<(type (or (xor VR128:$y, VR128:$z), (and VR128:$x, (not_op VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 110)>;
+ def : Pat<(type (or VR128:$x, (xor VR128:$y, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 111)>;
+ def : Pat<(type (or (xor VR128:$x, VR128:$y), (xor VR128:$x, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 126)>;
+ def : Pat<(type (or VR128:$x, (or VR128:$y, VR128:$z))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 127)>;
+ def : Pat<(type (not_op (or VR128:$x, (or VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 128)>;
+ def : Pat<(type (not_op (or (xor VR128:$x, VR128:$y), (xor VR128:$x, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 129)>;
+ def : Pat<(type (not_op (or VR128:$z, (xor VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 130)>;
+ def : Pat<(type (and (not_op (xor VR128:$x, VR128:$y)), (or VR128:$x, (not_op VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 131)>;
+ def : Pat<(type (xor (or VR128:$y, VR128:$z), (or (not_op VR128:$x), (and VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 134)>;
+ def : Pat<(type (not_op (xor VR128:$x, (or VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 135)>;
+ def : Pat<(type (or (not_op (or VR128:$y, VR128:$z)), (and VR128:$x, (and VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 137)>;
+ def : Pat<(type (and (not_op VR128:$z), (or VR128:$x, (not_op VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 138)>;
+ def : Pat<(type (or (and VR128:$x, VR128:$y), (not_op (or VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 139)>;
+ def : Pat<(type (or (not_op (or VR128:$y, VR128:$z)), (and VR128:$x, (xor VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 142)>;
+ def : Pat<(type (or VR128:$x, (not_op (or VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 143)>;
+ def : Pat<(type (not_op (xor VR128:$x, (xor VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 150)>;
+ def : Pat<(type (or (and VR128:$x, VR128:$y), (not_op (xor VR128:$z, (or VR128:$x, VR128:$y))))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 151)>;
+ def : Pat<(type (not_op (or (and VR128:$x, VR128:$y), (xor VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 152)>;
+ def : Pat<(type (xor VR128:$z, (or VR128:$x, (not_op VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 154)>;
+ def : Pat<(type (or (and VR128:$x, VR128:$y), (not_op (xor VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 155)>;
+ def : Pat<(type (or (not_op (or VR128:$y, VR128:$z)), (xor VR128:$x, (and VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 158)>;
+ def : Pat<(type (or VR128:$x, (not_op (xor VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 159)>;
+ def : Pat<(type (not_op (or VR128:$z, (and VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 168)>;
+ def : Pat<(type (not_op (xor VR128:$z, (and VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 169)>;
+ def : Pat<(type (or (not_op VR128:$z), (and VR128:$x, VR128:$y))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 171)>;
+ def : Pat<(type (and (not_op (and VR128:$x, VR128:$y)), (or VR128:$x, (not_op VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 172)>;
+ def : Pat<(type (not_op (and (xor VR128:$x, VR128:$z), (or VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 173)>;
+ def : Pat<(type (or (not_op VR128:$z), (and VR128:$x, (not_op VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 174)>;
+ def : Pat<(type (or (xor VR128:$x, VR128:$y), (not_op (or VR128:$x, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 188)>;
+ def : Pat<(type (not_op (and (xor VR128:$x, VR128:$z), (xor VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 189)>;
+ def : Pat<(type (or (not_op VR128:$z), (xor VR128:$x, VR128:$y))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 190)>;
+ def : Pat<(type (or (not_op VR128:$z), (or VR128:$x, VR128:$y))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 191)>;
+ def : Pat<(type (or (not_op (or VR128:$x, VR128:$y)), (and (not_op VR128:$z), (xor VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 232)>;
+ def : Pat<(type (xor (not_op (and VR128:$x, VR128:$y)), (and VR128:$z, (or VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 233)>;
+ def : Pat<(type (not_op (and VR128:$z, (or VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 234)>;
+ def : Pat<(type (not_op (and VR128:$z, (xor VR128:$x, VR128:$y)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 235)>;
+ def : Pat<(type (or VR128:$x, (not_op (and VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 239)>;
+ def : Pat<(type (not_op (and VR128:$x, (and VR128:$y, VR128:$z)))),
+ (VEVAL VR128:$x, VR128:$y, VR128:$z, 254)>;
+ }
}
defm : BitwiseVectorOps<v16i8, z_vnot>;
@@ -956,12 +1267,30 @@ defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;
+let Predicates = [FeatureVectorEnhancements3] in {
+ def : Pat<(i128 (or (and VR128:$x, (z_vicmph VR128:$x, VR128:$y)),
+ (and VR128:$y, (not (z_vicmph VR128:$x, VR128:$y))))),
+ (VMXQ VR128:$x, VR128:$y)>;
+ def : Pat<(i128 (or (and VR128:$y, (z_vicmph VR128:$x, VR128:$y)),
+ (and VR128:$x, (not (z_vicmph VR128:$x, VR128:$y))))),
+ (VMNQ VR128:$x, VR128:$y)>;
+}
+
// Unsigned min/max.
defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
+let Predicates = [FeatureVectorEnhancements3] in {
+ def : Pat<(i128 (or (and VR128:$x, (z_vicmphl VR128:$x, VR128:$y)),
+ (and VR128:$y, (not (z_vicmphl VR128:$x, VR128:$y))))),
+ (VMXLQ VR128:$x, VR128:$y)>;
+ def : Pat<(i128 (or (and VR128:$y, (z_vicmphl VR128:$x, VR128:$y)),
+ (and VR128:$x, (not (z_vicmphl VR128:$x, VR128:$y))))),
+ (VMNLQ VR128:$x, VR128:$y)>;
+}
+
// Instantiate full-vector shifts.
multiclass FullVectorShiftOps<SDPatternOperator shift,
Instruction sbit, Instruction sbyte> {
@@ -994,6 +1323,8 @@ let Predicates = [FeatureVector] in {
def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>;
def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>;
def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VECQ : CompareVRRa<"vecq", 0xE7DB, z_scmp, v128q, 4>;
}
// Element compare logical.
@@ -1003,6 +1334,8 @@ let Predicates = [FeatureVector] in {
def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>;
def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>;
def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ def VECLQ : CompareVRRa<"veclq", 0xE7D9, z_ucmp, v128q, 4>;
}
// Compare equal.
@@ -1015,6 +1348,9 @@ let Predicates = [FeatureVector] in {
v128f, v128f, 2>;
defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes,
v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ defm VCEQQ : BinaryVRRbSPair<"vceqq", 0xE7F8, z_vicmpe, z_vicmpes,
+ v128q, v128q, 4>;
// Compare high.
def VCH : BinaryVRRbSPairGeneric<"vch", 0xE7FB>;
@@ -1026,6 +1362,9 @@ let Predicates = [FeatureVector] in {
v128f, v128f, 2>;
defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs,
v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ defm VCHQ : BinaryVRRbSPair<"vchq", 0xE7FB, z_vicmph, z_vicmphs,
+ v128q, v128q, 4>;
// Compare high logical.
def VCHL : BinaryVRRbSPairGeneric<"vchl", 0xE7F9>;
@@ -1037,6 +1376,9 @@ let Predicates = [FeatureVector] in {
v128f, v128f, 2>;
defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls,
v128g, v128g, 3>;
+ let Predicates = [FeatureVectorEnhancements3] in
+ defm VCHLQ : BinaryVRRbSPair<"vchlq", 0xE7F9, z_vicmphl, z_vicmphls,
+ v128q, v128q, 4>;
// Test under mask.
let Defs = [CC] in
@@ -1631,6 +1973,14 @@ let Predicates = [FeatureVector] in {
(VLEG (VGBM 0), bdxaddr12only:$addr, 1)>;
}
+// In-register i128 sign-extensions on arch15.
+let Predicates = [FeatureVectorEnhancements3] in {
+ def : Pat<(i128 (sext_inreg VR128:$x, i8)), (VUPLG (VSEGB VR128:$x))>;
+ def : Pat<(i128 (sext_inreg VR128:$x, i16)), (VUPLG (VSEGH VR128:$x))>;
+ def : Pat<(i128 (sext_inreg VR128:$x, i32)), (VUPLG (VSEGF VR128:$x))>;
+ def : Pat<(i128 (sext_inreg VR128:$x, i64)), (VUPLG VR128:$x)>;
+}
+
// In-register i128 sign-extensions.
let Predicates = [FeatureVector] in {
def : Pat<(i128 (sext_inreg VR128:$x, i8)),
@@ -1643,6 +1993,20 @@ let Predicates = [FeatureVector] in {
(VSRAB (VREPG VR128:$x, 1), (VREPIB 64))>;
}
+// Sign-extensions from GPR to i128 on arch15.
+let Predicates = [FeatureVectorEnhancements3] in {
+ def : Pat<(i128 (sext_inreg (anyext GR32:$x), i8)),
+ (VUPLG (VLVGP (LGBR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$x, subreg_l32)),
+ (LGBR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$x, subreg_l32))))>;
+ def : Pat<(i128 (sext_inreg (anyext GR32:$x), i16)),
+ (VUPLG (VLVGP (LGHR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$x, subreg_l32)),
+ (LGHR (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$x, subreg_l32))))>;
+ def : Pat<(i128 (sext GR32:$x)),
+ (VUPLG (VLVGP (LGFR GR32:$x), (LGFR GR32:$x)))>;
+ def : Pat<(i128 (sext GR64:$x)),
+ (VUPLG (VLVGP GR64:$x, GR64:$x))>;
+}
+
// Sign-extensions from GPR to i128.
let Predicates = [FeatureVector] in {
def : Pat<(i128 (sext_inreg (anyext GR32:$x), i8)),
@@ -2025,3 +2389,14 @@ let Predicates = [FeatureVectorPackedDecimalEnhancement2] in {
def VUPKZL : BinaryVRRk<"vupkzl", 0xE65C>;
}
}
+
+let Predicates = [FeatureVectorPackedDecimalEnhancement3] in {
+ def VCVBQ : BinaryVRRk<"vcvbq", 0xE64E>;
+ let Defs = [CC] in
+ def VCVDQ : TernaryVRIj<"vcvdq", 0xE64A>;
+
+ let Defs = [CC] in {
+ def VTPOpt : TestExtraVRRg<"vtp", 0xE65F>;
+ def VTZ : TestExtraVRIl<"vtz", 0xE67F>;
+ }
+}
diff --git a/llvm/lib/Target/SystemZ/SystemZOperands.td b/llvm/lib/Target/SystemZ/SystemZOperands.td
index e7b45a40a3cc0b..22dcc4a6d7cd39 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperands.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperands.td
@@ -165,6 +165,13 @@ class BDVMode<string bitsize, string dispsize>
!cast<Operand>("disp"#dispsize#"imm"#bitsize),
!cast<RegisterOperand>("VR128"))>;
+// An addressing mode with a base, 32-bit displacement and 32-bit index.
+class LXAMode<string bitsize, string dispsize>
+ : AddressOperand<bitsize, dispsize, "", "LXAAddr",
+ (ops !cast<RegisterOperand>("ADDR"#bitsize),
+ !cast<Operand>("disp"#dispsize#"imm32"),
+ !cast<RegisterOperand>("ADDR32"))>;
+
//===----------------------------------------------------------------------===//
// Extracting immediate operands from nodes
// These all create MVT::i64 nodes to ensure the value is not sign-extended
@@ -601,18 +608,20 @@ def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
// Addressing modes
//===----------------------------------------------------------------------===//
+class DispOp<ValueType vt, code pred> : Operand<vt>, PatLeaf<(vt imm), pred>;
+
// 12-bit displacement operands.
let EncoderMethod = "getImmOpValue<SystemZ::FK_390_U12Imm>",
DecoderMethod = "decodeU12ImmOperand" in {
- def disp12imm32 : Operand<i32>;
- def disp12imm64 : Operand<i64>;
+ def disp12imm32 : DispOp<i32, [{ return N->getAPIntValue().isIntN(12); }]>;
+ def disp12imm64 : DispOp<i64, [{ return N->getAPIntValue().isIntN(12); }]>;
}
// 20-bit displacement operands.
let EncoderMethod = "getImmOpValue<SystemZ::FK_390_S20Imm>",
DecoderMethod = "decodeS20ImmOperand" in {
- def disp20imm32 : Operand<i32>;
- def disp20imm64 : Operand<i64>;
+ def disp20imm32 : DispOp<i32, [{ return N->getAPIntValue().isSignedIntN(20); }]>;
+ def disp20imm64 : DispOp<i64, [{ return N->getAPIntValue().isSignedIntN(20); }]>;
}
def BDAddr32Disp12 : AddressAsmOperand<"BDAddr", "32", "12">;
@@ -625,6 +634,7 @@ def BDLAddr64Disp12Len4 : AddressAsmOperand<"BDLAddr", "64", "12", "Len4">;
def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">;
def BDRAddr64Disp12 : AddressAsmOperand<"BDRAddr", "64", "12">;
def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">;
+def LXAAddr64Disp20 : AddressAsmOperand<"LXAAddr", "64", "20">;
// DAG patterns and operands for addressing modes. Each mode has
// the form <type><range><group>[<len>] where:
@@ -635,6 +645,7 @@ def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">;
// mviaddr : like bdaddr, but reject cases with a natural index
// bdxaddr : base + displacement + index
// laaddr : like bdxaddr, but used for Load Address operations
+// lxaaddr : like bdxaddr, but used for Load (Logical) Indexed Address
// dynalloc : base + displacement + index + ADJDYNALLOC
// bdladdr : base + displacement with a length field
// bdvaddr : base + displacement with a vector index
@@ -669,6 +680,7 @@ def bdxaddr20pair : BDXMode<"BDXAddr", "64", "20", "Pair">;
def dynalloc12only : BDXMode<"DynAlloc", "64", "12", "Only">;
def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">;
def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">;
+def lxaaddr20only : LXAMode< "64", "20">;
def bdladdr12onlylen4 : BDLMode<"BDLAddr", "64", "12", "Only", "4">;
def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">;
def bdraddr12only : BDRMode<"BDRAddr", "64", "12", "Only">;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 15b334b042d2dd..39670adaa257e9 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -154,6 +154,8 @@ def SDT_ZExtractVectorElt : SDTypeProfile<1, 2,
SDTCisVT<2, i32>]>;
def SDT_ZReplicate : SDTypeProfile<1, 1,
[SDTCisVec<0>]>;
+def SDT_ZVecUnpack : SDTypeProfile<1, 1,
+ [SDTCisVec<1>]>;
def SDT_ZVecUnaryConv : SDTypeProfile<1, 1,
[SDTCisVec<0>,
SDTCisVec<1>]>;
@@ -164,6 +166,13 @@ def SDT_ZVecUnaryCC : SDTypeProfile<2, 1,
[SDTCisVec<0>,
SDTCisVT<1, i32>,
SDTCisSameAs<0, 2>]>;
+def SDT_ZVecCompare : SDTypeProfile<1, 2,
+ [SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def SDT_ZVecCompareCC : SDTypeProfile<2, 2,
+ [SDTCisVT<1, i32>,
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 2>]>;
def SDT_ZVecBinary : SDTypeProfile<1, 2,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@@ -345,10 +354,10 @@ def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConvCC>;
def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConvCC>;
-def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>;
-def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>;
-def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>;
-def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>;
+def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnpack>;
+def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnpack>;
+def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnpack>;
+def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnpack>;
def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR",
SDT_ZVecBinaryInt>;
def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR",
@@ -358,12 +367,12 @@ def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR",
def z_vrotl_by_scalar : SDNode<"SystemZISD::VROTL_BY_SCALAR",
SDT_ZVecBinaryInt>;
def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZBinaryConv>;
-def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
-def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
-def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
-def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinaryCC>;
-def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinaryCC>;
-def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinaryCC>;
+def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecCompare>;
+def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecCompare>;
+def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecCompare>;
+def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecCompareCC>;
+def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecCompareCC>;
+def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecCompareCC>;
def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
def z_strict_vfcmpe : SDNode<"SystemZISD::STRICT_VFCMPE",
SDT_ZVecBinaryConv, [SDNPHasChain]>;
@@ -535,6 +544,12 @@ def z_ucmp : PatFrag<(ops node:$a, node:$b), (z_icmp node:$a, node:$b, timm), [{
def z_tm_reg : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, timm)>;
def z_tm_mem : PatFrag<(ops node:$a, node:$b), (z_tm node:$a, node:$b, 0)>;
+// Shifts by small immediate amounts.
+def shl1 : PatFrag<(ops node:$src), (shl node:$src, (i32 1))>;
+def shl2 : PatFrag<(ops node:$src), (shl node:$src, (i32 2))>;
+def shl3 : PatFrag<(ops node:$src), (shl node:$src, (i32 3))>;
+def shl4 : PatFrag<(ops node:$src), (shl node:$src, (i32 4))>;
+
// Register sign-extend operations. Sub-32-bit values are represented as i32s.
def sext8 : PatFrag<(ops node:$src), (sext_inreg node:$src, i8)>;
def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
@@ -550,6 +565,15 @@ def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
+// Match a 64-bit value that is guaranteed to have been sign-
+// or zero-extended from a 32-bit value.
+def assertsext32 : PatFrag<(ops node:$src), (assertsext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+def assertzext32 : PatFrag<(ops node:$src), (assertzext node:$src), [{
+ return cast<VTSDNode>(N->getOperand(1))->getVT() == MVT::i32;
+}]>;
+
// Match a load or a non-extending atomic load.
def z_load : PatFrags<(ops node:$ptr),
[(load node:$ptr),
diff --git a/llvm/lib/Target/SystemZ/SystemZProcessors.td b/llvm/lib/Target/SystemZ/SystemZProcessors.td
index d00b94d002420e..75b6671dc77234 100644
--- a/llvm/lib/Target/SystemZ/SystemZProcessors.td
+++ b/llvm/lib/Target/SystemZ/SystemZProcessors.td
@@ -41,3 +41,4 @@ def : ProcessorModel<"z15", Z15Model, Arch13SupportedFeatures.List>;
def : ProcessorModel<"arch14", Z16Model, Arch14SupportedFeatures.List>;
def : ProcessorModel<"z16", Z16Model, Arch14SupportedFeatures.List>;
+def : ProcessorModel<"arch15", Z16Model, Arch15SupportedFeatures.List>;
diff --git a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
index e4e84460399df3..6c376e4bf622c3 100644
--- a/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZSubtarget.cpp
@@ -44,9 +44,11 @@ SystemZSubtarget &SystemZSubtarget::initializeSubtargetDependencies(
if (!HasVector) {
HasVectorEnhancements1 = false;
HasVectorEnhancements2 = false;
+ HasVectorEnhancements3 = false;
HasVectorPackedDecimal = false;
HasVectorPackedDecimalEnhancement = false;
HasVectorPackedDecimalEnhancement2 = false;
+ HasVectorPackedDecimalEnhancement3 = false;
}
return *this;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 772efcdf8f9fc1..2b948329394193 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -648,12 +648,16 @@ InstructionCost SystemZTTIImpl::getArithmeticInstrCost(
return VF * DivMulSeqCost +
BaseT::getScalarizationOverhead(VTy, Args, Tys, CostKind);
}
- if ((SignedDivRem || UnsignedDivRem) && VF > 4)
- // Temporary hack: disable high vectorization factors with integer
- // division/remainder, which will get scalarized and handled with
- // GR128 registers. The mischeduler is not clever enough to avoid
- // spilling yet.
- return 1000;
+ if (SignedDivRem || UnsignedDivRem) {
+ if (ST->hasVectorEnhancements3() && ScalarBits >= 32)
+ return NumVectors * DivInstrCost;
+ else if (VF > 4)
+ // Temporary hack: disable high vectorization factors with integer
+ // division/remainder, which will get scalarized and handled with
+ // GR128 registers. The mischeduler is not clever enough to avoid
+ // spilling yet.
+ return 1000;
+ }
// These FP operations are supported with a single vector instruction for
// double (base implementation assumes float generally costs 2). For
@@ -900,8 +904,11 @@ InstructionCost SystemZTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt)) {
if (Src->isIntegerTy(1)) {
- if (DstScalarBits == 128)
+ if (DstScalarBits == 128) {
+ if (Opcode == Instruction::SExt && ST->hasVectorEnhancements3())
+ return 0;/*VCEQQ*/
return 5 /*branch seq.*/;
+ }
if (ST->hasLoadStoreOnCond2())
return 2; // li 0; loc 1
@@ -1089,9 +1096,18 @@ InstructionCost SystemZTTIImpl::getCmpSelInstrCost(
return Cost;
}
case Instruction::Select:
- if (ValTy->isFloatingPointTy() || isInt128InVR(ValTy))
- return 4; // No LOC for FP / i128 - costs a conditional jump.
- return 1; // Load On Condition / Select Register.
+ if (ValTy->isFloatingPointTy())
+ return 4; // No LOC for FP - costs a conditional jump.
+
+ // When selecting based on an i128 comparison, LOC / VSEL is possible
+ // if i128 comparisons are directly supported.
+ if (I != nullptr)
+ if (ICmpInst *CI = dyn_cast<ICmpInst>(I->getOperand(0)))
+ if (CI->getOperand(0)->getType()->isIntegerTy(128))
+ return ST->hasVectorEnhancements3() ? 1 : 4;
+
+ // Load On Condition / Select Register available, except for i128.
+ return !isInt128InVR(ValTy) ? 1 : 4;
}
}
else if (ST->hasVector()) {
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 979b44b22338e1..ba7032025150e5 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -424,8 +424,11 @@ StringRef getCPUNameFromS390Model(unsigned int Id, bool HaveVectorSupport) {
return HaveVectorSupport? "z15" : "zEC12";
case 3931:
case 3932:
- default:
return HaveVectorSupport? "z16" : "zEC12";
+ case 9175:
+ case 9176:
+ default:
+ return HaveVectorSupport? "arch15" : "zEC12";
}
}
} // end anonymous namespace
diff --git a/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll b/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll
index 7edef36ee32f5d..2f13d7e3ef9b1c 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/divrem-reg.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s --check-prefixes=CHECK,Z13
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s --check-prefixes=CHECK,ARC15
; Check costs of divisions by register
;
@@ -8,279 +10,486 @@
; Scalar sdiv
define i64 @fun0(i64 %a, i64 %b) {
+; CHECK-LABEL: 'fun0'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv i64 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+;
%r = sdiv i64 %a, %b
ret i64 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv i64
}
define i32 @fun1(i32 %a, i32 %b) {
+; CHECK-LABEL: 'fun1'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv i32 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r
+;
%r = sdiv i32 %a, %b
ret i32 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv i32 %a, %b
}
define i16 @fun2(i16 %a, i16 %b) {
+; CHECK-LABEL: 'fun2'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv i16 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %r
+;
%r = sdiv i16 %a, %b
ret i16 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv i16 %a, %b
}
define i8 @fun3(i8 %a, i8 %b) {
+; CHECK-LABEL: 'fun3'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv i8 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %r
+;
%r = sdiv i8 %a, %b
ret i8 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv i8 %a, %b
}
; Vector sdiv
define <2 x i64> @fun4(<2 x i64> %a, <2 x i64> %b) {
+; Z13-LABEL: 'fun4'
+; Z13-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r = sdiv <2 x i64> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+;
+; ARC15-LABEL: 'fun4'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i64> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+;
%r = sdiv <2 x i64> %a, %b
ret <2 x i64> %r
-; CHECK: Cost Model: Found an estimated cost of 47 for instruction: %r = sdiv <2 x i64>
}
define <4 x i32> @fun5(<4 x i32> %a, <4 x i32> %b) {
+; Z13-LABEL: 'fun5'
+; Z13-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = sdiv <4 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+;
+; ARC15-LABEL: 'fun5'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <4 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+;
%r = sdiv <4 x i32> %a, %b
ret <4 x i32> %r
-; CHECK: Cost Model: Found an estimated cost of 94 for instruction: %r = sdiv <4 x i32>
}
define <2 x i32> @fun6(<2 x i32> %a, <2 x i32> %b) {
+; Z13-LABEL: 'fun6'
+; Z13-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r = sdiv <2 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+;
+; ARC15-LABEL: 'fun6'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = sdiv <2 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+;
%r = sdiv <2 x i32> %a, %b
ret <2 x i32> %r
-; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %r = sdiv <2 x i32>
}
define <8 x i16> @fun7(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: 'fun7'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <8 x i16> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r
+;
%r = sdiv <8 x i16> %a, %b
ret <8 x i16> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <8 x i16>
}
define <4 x i16> @fun8(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: 'fun8'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = sdiv <4 x i16> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r
+;
%r = sdiv <4 x i16> %a, %b
ret <4 x i16> %r
-; CHECK: Cost Model: Found an estimated cost of 94 for instruction: %r = sdiv <4 x i16>
}
define <16 x i8> @fun9(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: 'fun9'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <16 x i8> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r
+;
%r = sdiv <16 x i8> %a, %b
ret <16 x i8> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <16 x i8>
}
define <8 x i8> @fun10(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: 'fun10'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <8 x i8> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r
+;
%r = sdiv <8 x i8> %a, %b
ret <8 x i8> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <8 x i8>
}
; Scalar udiv
define i64 @fun11(i64 %a, i64 %b) {
+; CHECK-LABEL: 'fun11'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv i64 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+;
%r = udiv i64 %a, %b
ret i64 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv i64 %a, %b
}
define i32 @fun12(i32 %a, i32 %b) {
+; CHECK-LABEL: 'fun12'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv i32 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r
+;
%r = udiv i32 %a, %b
ret i32 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv i32
}
define i16 @fun13(i16 %a, i16 %b) {
+; CHECK-LABEL: 'fun13'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv i16 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %r
+;
%r = udiv i16 %a, %b
ret i16 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv i16
}
define i8 @fun14(i8 %a, i8 %b) {
+; CHECK-LABEL: 'fun14'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv i8 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %r
+;
%r = udiv i8 %a, %b
ret i8 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv i8
}
; Vector udiv
define <2 x i64> @fun15(<2 x i64> %a, <2 x i64> %b) {
+; Z13-LABEL: 'fun15'
+; Z13-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r = udiv <2 x i64> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+;
+; ARC15-LABEL: 'fun15'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i64> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+;
%r = udiv <2 x i64> %a, %b
ret <2 x i64> %r
-; CHECK: Cost Model: Found an estimated cost of 47 for instruction: %r = udiv <2 x i64>
}
define <4 x i32> @fun16(<4 x i32> %a, <4 x i32> %b) {
+; Z13-LABEL: 'fun16'
+; Z13-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = udiv <4 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+;
+; ARC15-LABEL: 'fun16'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <4 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+;
%r = udiv <4 x i32> %a, %b
ret <4 x i32> %r
-; CHECK: Cost Model: Found an estimated cost of 94 for instruction: %r = udiv <4 x i32>
}
define <2 x i32> @fun17(<2 x i32> %a, <2 x i32> %b) {
+; Z13-LABEL: 'fun17'
+; Z13-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r = udiv <2 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+;
+; ARC15-LABEL: 'fun17'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = udiv <2 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+;
%r = udiv <2 x i32> %a, %b
ret <2 x i32> %r
-; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %r = udiv <2 x i32>
}
define <8 x i16> @fun18(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: 'fun18'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = udiv <8 x i16> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r
+;
%r = udiv <8 x i16> %a, %b
ret <8 x i16> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = udiv <8 x i16>
}
define <4 x i16> @fun19(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: 'fun19'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = udiv <4 x i16> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r
+;
%r = udiv <4 x i16> %a, %b
ret <4 x i16> %r
-; CHECK: Cost Model: Found an estimated cost of 94 for instruction: %r = udiv <4 x i16>
}
define <16 x i8> @fun20(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: 'fun20'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = udiv <16 x i8> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r
+;
%r = udiv <16 x i8> %a, %b
ret <16 x i8> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = udiv <16 x i8>
}
define <8 x i8> @fun21(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: 'fun21'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = udiv <8 x i8> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r
+;
%r = udiv <8 x i8> %a, %b
ret <8 x i8> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = udiv <8 x i8>
}
; Scalar srem
define i64 @fun22(i64 %a, i64 %b) {
+; CHECK-LABEL: 'fun22'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem i64 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+;
%r = srem i64 %a, %b
ret i64 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = srem i64
}
define i32 @fun23(i32 %a, i32 %b) {
+; CHECK-LABEL: 'fun23'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem i32 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r
+;
%r = srem i32 %a, %b
ret i32 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = srem i32
}
define i16 @fun24(i16 %a, i16 %b) {
+; CHECK-LABEL: 'fun24'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem i16 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %r
+;
%r = srem i16 %a, %b
ret i16 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = srem i16
}
define i8 @fun25(i8 %a, i8 %b) {
+; CHECK-LABEL: 'fun25'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem i8 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %r
+;
%r = srem i8 %a, %b
ret i8 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = srem i8
}
; Vector srem
define <2 x i64> @fun26(<2 x i64> %a, <2 x i64> %b) {
+; Z13-LABEL: 'fun26'
+; Z13-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r = srem <2 x i64> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+;
+; ARC15-LABEL: 'fun26'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i64> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+;
%r = srem <2 x i64> %a, %b
ret <2 x i64> %r
-; CHECK: Cost Model: Found an estimated cost of 47 for instruction: %r = srem <2 x i64>
}
define <4 x i32> @fun27(<4 x i32> %a, <4 x i32> %b) {
+; Z13-LABEL: 'fun27'
+; Z13-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = srem <4 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+;
+; ARC15-LABEL: 'fun27'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <4 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+;
%r = srem <4 x i32> %a, %b
ret <4 x i32> %r
-; CHECK: Cost Model: Found an estimated cost of 94 for instruction: %r = srem <4 x i32>
}
define <2 x i32> @fun28(<2 x i32> %a, <2 x i32> %b) {
+; Z13-LABEL: 'fun28'
+; Z13-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r = srem <2 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+;
+; ARC15-LABEL: 'fun28'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = srem <2 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+;
%r = srem <2 x i32> %a, %b
ret <2 x i32> %r
-; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %r = srem <2 x i32>
}
define <8 x i16> @fun29(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: 'fun29'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = srem <8 x i16> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r
+;
%r = srem <8 x i16> %a, %b
ret <8 x i16> %r
-; CHECK: ost Model: Found an estimated cost of 1000 for instruction: %r = srem <8 x i16>
}
define <4 x i16> @fun30(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: 'fun30'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = srem <4 x i16> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r
+;
%r = srem <4 x i16> %a, %b
ret <4 x i16> %r
-; CHECK: Cost Model: Found an estimated cost of 94 for instruction: %r = srem <4 x i16>
}
define <16 x i8> @fun31(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: 'fun31'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = srem <16 x i8> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r
+;
%r = srem <16 x i8> %a, %b
ret <16 x i8> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = srem <16 x i8>
}
define <8 x i8> @fun32(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: 'fun32'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = srem <8 x i8> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r
+;
%r = srem <8 x i8> %a, %b
ret <8 x i8> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = srem <8 x i8>
}
; Scalar urem
define i64 @fun33(i64 %a, i64 %b) {
+; CHECK-LABEL: 'fun33'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem i64 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i64 %r
+;
%r = urem i64 %a, %b
ret i64 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = urem i64
}
define i32 @fun34(i32 %a, i32 %b) {
+; CHECK-LABEL: 'fun34'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem i32 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 %r
+;
%r = urem i32 %a, %b
ret i32 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = urem i32
}
define i16 @fun35(i16 %a, i16 %b) {
+; CHECK-LABEL: 'fun35'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem i16 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i16 %r
+;
%r = urem i16 %a, %b
ret i16 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = urem i16
}
define i8 @fun36(i8 %a, i8 %b) {
+; CHECK-LABEL: 'fun36'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem i8 %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i8 %r
+;
%r = urem i8 %a, %b
ret i8 %r
-; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %r = urem i8
}
; Vector urem
define <2 x i64> @fun37(<2 x i64> %a, <2 x i64> %b) {
+; Z13-LABEL: 'fun37'
+; Z13-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %r = urem <2 x i64> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+;
+; ARC15-LABEL: 'fun37'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i64> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i64> %r
+;
%r = urem <2 x i64> %a, %b
ret <2 x i64> %r
-; CHECK: Cost Model: Found an estimated cost of 47 for instruction: %r = urem <2 x i64>
}
define <4 x i32> @fun38(<4 x i32> %a, <4 x i32> %b) {
+; Z13-LABEL: 'fun38'
+; Z13-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = urem <4 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+;
+; ARC15-LABEL: 'fun38'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <4 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %r
+;
%r = urem <4 x i32> %a, %b
ret <4 x i32> %r
-; CHECK: Cost Model: Found an estimated cost of 94 for instruction: %r = urem <4 x i32>
}
define <2 x i32> @fun39(<2 x i32> %a, <2 x i32> %b) {
+; Z13-LABEL: 'fun39'
+; Z13-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %r = urem <2 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+;
+; ARC15-LABEL: 'fun39'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %r = urem <2 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %r
+;
%r = urem <2 x i32> %a, %b
ret <2 x i32> %r
-; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %r = urem <2 x i32>
}
define <8 x i16> @fun40(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: 'fun40'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <8 x i16> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i16> %r
+;
%r = urem <8 x i16> %a, %b
ret <8 x i16> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <8 x i16>
}
define <4 x i16> @fun41(<4 x i16> %a, <4 x i16> %b) {
+; CHECK-LABEL: 'fun41'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %r = urem <4 x i16> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i16> %r
+;
%r = urem <4 x i16> %a, %b
ret <4 x i16> %r
-; CHECK: Cost Model: Found an estimated cost of 94 for instruction: %r = urem <4 x i16>
}
define <16 x i8> @fun42(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: 'fun42'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <16 x i8> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x i8> %r
+;
%r = urem <16 x i8> %a, %b
ret <16 x i8> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <16 x i8>
}
define <8 x i8> @fun43(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: 'fun43'
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <8 x i8> %a, %b
+; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i8> %r
+;
%r = urem <8 x i8> %a, %b
ret <8 x i8> %r
-; CHECK: Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <8 x i8>
+}
+
+; Also test some wider inputs:
+define <8 x i64> @fun44(<8 x i64> %a, <8 x i64> %b) {
+; Z13-LABEL: 'fun44'
+; Z13-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = sdiv <8 x i64> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %r
+;
+; ARC15-LABEL: 'fun44'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %r = sdiv <8 x i64> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i64> %r
+;
+ %r = sdiv <8 x i64> %a, %b
+ ret <8 x i64> %r
+}
+
+define <8 x i32> @fun45(<8 x i32> %a, <8 x i32> %b) {
+; Z13-LABEL: 'fun45'
+; Z13-NEXT: Cost Model: Found an estimated cost of 1000 for instruction: %r = urem <8 x i32> %a, %b
+; Z13-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %r
+;
+; ARC15-LABEL: 'fun45'
+; ARC15-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %r = urem <8 x i32> %a, %b
+; ARC15-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x i32> %r
+;
+ %r = urem <8 x i32> %a, %b
+ ret <8 x i32> %r
}
diff --git a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
index 66da6de3bc7681..105e634cea1ace 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/i128-cmp-ext-conv.ll
@@ -1,10 +1,12 @@
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s --check-prefixes=CHECK,Z13
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s --check-prefixes=CHECK,ARC15
;
define i128 @fun1(i128 %val1, i128 %val2) {
; CHECK-LABEL: 'fun1'
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2
-; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v128 = sext i1 %cmp to i128
+; Z13: Cost Model: Found an estimated cost of 5 for instruction: %v128 = sext i1 %cmp to i128
+; ARC15: Cost Model: Found an estimated cost of 0 for instruction: %v128 = sext i1 %cmp to i128
%cmp = icmp eq i128 %val1, %val2
%v128 = sext i1 %cmp to i128
ret i128 %v128
@@ -24,13 +26,39 @@ define i128 @fun3(i128 %val1, i128 %val2,
; CHECK-LABEL: 'fun3'
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp eq i128 %val1, %val2
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %add = add i128 %val3, %val4
-; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add
+; Z13: Cost Model: Found an estimated cost of 4 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add
+; ARC15: Cost Model: Found an estimated cost of 1 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add
%cmp = icmp eq i128 %val1, %val2
%add = add i128 %val3, %val4
%sel = select i1 %cmp, i128 %val3, i128 %add
ret i128 %sel
}
+define i64 @fun3_sel64(i128 %val1, i128 %val2,
+ i64 %val3, i64 %val4) {
+; CHECK-LABEL: 'fun3_sel64'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp ugt i128 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %add = add i64 %val3, %val4
+; Z13: Cost Model: Found an estimated cost of 4 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %add
+; ARC15: Cost Model: Found an estimated cost of 1 for instruction: %sel = select i1 %cmp, i64 %val3, i64 %add
+ %cmp = icmp ugt i128 %val1, %val2
+ %add = add i64 %val3, %val4
+ %sel = select i1 %cmp, i64 %val3, i64 %add
+ ret i64 %sel
+}
+
+define i128 @fun3_cmp64(i64 %val1, i64 %val2,
+ i128 %val3, i128 %val4) {
+; CHECK-LABEL: 'fun3_cmp64'
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %cmp = icmp slt i64 %val1, %val2
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %add = add i128 %val3, %val4
+; CHECk: Cost Model: Found an estimated cost of 4 for instruction: %sel = select i1 %cmp, i128 %val3, i128 %add
+ %cmp = icmp slt i64 %val1, %val2
+ %add = add i128 %val3, %val4
+ %sel = select i1 %cmp, i128 %val3, i128 %add
+ ret i128 %sel
+}
+
define i128 @fun4(ptr %src) {
; CHECK-LABEL: 'fun4'
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res = sext i64 %v to i128
diff --git a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
index fc4d19c5cdf9e5..bf5cbfb48a77bd 100644
--- a/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
+++ b/llvm/test/Analysis/CostModel/SystemZ/int-arith.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=systemz-unknown -mcpu=arch15 | FileCheck %s -check-prefix=ARC15
;
; Note: The scalarized vector instructions costs are not including any
; extracts, due to the undef operands.
@@ -131,18 +132,22 @@ define void @mul() {
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res5 = mul <2 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res6 = mul <2 x i32> undef, undef
; CHECK: Cost Model: Found an estimated cost of 3 for instruction: %res7 = mul <2 x i64> undef, undef
+; ARC15: Cost Model: Found an estimated cost of 1 for instruction: %res7 = mul <2 x i64> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res8 = mul <4 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res9 = mul <4 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res10 = mul <4 x i32> undef, undef
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %res11 = mul <4 x i64> undef, undef
+; ARC15: Cost Model: Found an estimated cost of 2 for instruction: %res11 = mul <4 x i64> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res12 = mul <8 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res13 = mul <8 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res14 = mul <8 x i32> undef, undef
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %res15 = mul <8 x i64> undef, undef
+; ARC15: Cost Model: Found an estimated cost of 4 for instruction: %res15 = mul <8 x i64> undef, undef
; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %res16 = mul <16 x i8> undef, undef
; CHECK: Cost Model: Found an estimated cost of 2 for instruction: %res17 = mul <16 x i16> undef, undef
; CHECK: Cost Model: Found an estimated cost of 4 for instruction: %res18 = mul <16 x i32> undef, undef
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %res19 = mul <16 x i64> undef, undef
+; ARC15: Cost Model: Found an estimated cost of 8 for instruction: %res19 = mul <16 x i64> undef, undef
ret void;
}
diff --git a/llvm/test/CodeGen/SystemZ/args-12.ll b/llvm/test/CodeGen/SystemZ/args-12.ll
index d6d533f22d3a38..f8954eee550f50 100644
--- a/llvm/test/CodeGen/SystemZ/args-12.ll
+++ b/llvm/test/CodeGen/SystemZ/args-12.ll
@@ -2,6 +2,7 @@
; Test the handling of i128 argument values
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
declare void @bar(i64, i64, i64, i64, i128,
i64, i64, i64, i64, i128)
diff --git a/llvm/test/CodeGen/SystemZ/args-13.ll b/llvm/test/CodeGen/SystemZ/args-13.ll
index 50636f23e859d3..d9e986cbb6a4b1 100644
--- a/llvm/test/CodeGen/SystemZ/args-13.ll
+++ b/llvm/test/CodeGen/SystemZ/args-13.ll
@@ -2,6 +2,7 @@
; Test incoming i128 arguments.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
; Do some arithmetic so that we can see the register being used.
define void @f1(ptr %r2, i16 %r3, i32 %r4, i64 %r5, i128 %r6) {
diff --git a/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll b/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll
new file mode 100644
index 00000000000000..f5b0aaa243a793
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/bitop-intrinsics.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test bit deposit / extract intrinsics
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+declare i64 @llvm.s390.bdepg(i64, i64)
+declare i64 @llvm.s390.bextg(i64, i64)
+
+; BDEPG.
+define i64 @test_bdepg(i64 %a, i64 %b) {
+; CHECK-LABEL: test_bdepg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bdepg %r2, %r2, %r3
+; CHECK-NEXT: br %r14
+ %res = call i64 @llvm.s390.bdepg(i64 %a, i64 %b)
+ ret i64 %res
+}
+
+; BEXTG.
+define i64 @test_bextg(i64 %a, i64 %b) {
+; CHECK-LABEL: test_bextg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: bextg %r2, %r2, %r3
+; CHECK-NEXT: br %r14
+ %res = call i64 @llvm.s390.bextg(i64 %a, i64 %b)
+ ret i64 %res
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/int-abs-03.ll b/llvm/test/CodeGen/SystemZ/int-abs-03.ll
new file mode 100644
index 00000000000000..238b2431c9b30b
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-abs-03.ll
@@ -0,0 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test 128-bit absolute value in vector registers on arch15
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+define i128 @f1(i128 %src) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vlpq %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt i128 %src, 0
+ %neg = sub i128 0, %src
+ %res = select i1 %cmp, i128 %neg, i128 %src
+ ret i128 %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-add-19.ll b/llvm/test/CodeGen/SystemZ/int-add-19.ll
index 6a4eea1027db8e..a9bce2c827ff96 100644
--- a/llvm/test/CodeGen/SystemZ/int-add-19.ll
+++ b/llvm/test/CodeGen/SystemZ/int-add-19.ll
@@ -2,6 +2,7 @@
; Test 128-bit addition in vector registers on z13 and later
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
define i128 @f1(i128 %a, i128 %b) {
; CHECK-LABEL: f1:
diff --git a/llvm/test/CodeGen/SystemZ/int-cmp-64.ll b/llvm/test/CodeGen/SystemZ/int-cmp-64.ll
new file mode 100644
index 00000000000000..be212ef2a72118
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-cmp-64.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test 128-bit comparisons in vector registers on arch15
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 -verify-machineinstrs | FileCheck %s
+
+; Equality comparison.
+define i64 @f1(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vecq %v1, %v0
+; CHECK-NEXT: selgre %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp eq i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Inequality comparison.
+define i64 @f2(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vecq %v1, %v0
+; CHECK-NEXT: selgrlh %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp ne i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Signed greater-than comparison.
+define i64 @f3(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vecq %v1, %v0
+; CHECK-NEXT: selgrh %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp sgt i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Signed less-than comparison.
+define i64 @f4(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vecq %v1, %v0
+; CHECK-NEXT: selgrl %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp slt i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Signed greater-or-equal comparison.
+define i64 @f5(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vecq %v1, %v0
+; CHECK-NEXT: selgrhe %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp sge i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Signed less-or-equal comparison.
+define i64 @f6(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vecq %v1, %v0
+; CHECK-NEXT: selgrle %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp sle i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Unsigned greater-than comparison.
+define i64 @f7(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: veclq %v1, %v0
+; CHECK-NEXT: selgrh %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp ugt i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Unsigned less-than comparison.
+define i64 @f8(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: veclq %v1, %v0
+; CHECK-NEXT: selgrl %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp ult i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Unsigned greater-or-equal comparison.
+define i64 @f9(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: veclq %v1, %v0
+; CHECK-NEXT: selgrhe %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp uge i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Unsigned less-or-equal comparison.
+define i64 @f10(i128 %value1, i128 %value2, i64 %a, i64 %b) {
+; CHECK-LABEL: f10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: veclq %v1, %v0
+; CHECK-NEXT: selgrle %r2, %r4, %r5
+; CHECK-NEXT: br %r14
+ %cond = icmp ule i128 %value1, %value2
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Select between i128 values.
+define i128 @f11(i64 %value1, i64 %value2, i128 %a, i128 %b) {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r5), 3
+; CHECK-NEXT: cgrje %r3, %r4, .LBB10_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vl %v1, 0(%r6), 3
+; CHECK-NEXT: vaq %v0, %v0, %v1
+; CHECK-NEXT: .LBB10_2:
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cond = icmp eq i64 %value1, %value2
+ %sum = add i128 %a, %b
+ %res = select i1 %cond, i128 %a, i128 %sum
+ ret i128 %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-conv-15.ll b/llvm/test/CodeGen/SystemZ/int-conv-15.ll
new file mode 100644
index 00000000000000..bea0bb8890315c
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-conv-15.ll
@@ -0,0 +1,414 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test 128-bit arithmetic in vector registers on arch15
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Sign extension from i64.
+define i128 @f1(i64 %a) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlvgp %v0, %r3, %r3
+; CHECK-NEXT: vuplg %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = sext i64 %a to i128
+ ret i128 %res
+}
+
+; Sign extension from i64 from memory.
+define i128 @f2(ptr %ptr) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepg %v0, 0(%r3)
+; CHECK-NEXT: vrepib %v1, 64
+; CHECK-NEXT: vsrab %v0, %v0, %v1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i64, ptr %ptr
+ %res = sext i64 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i64.
+define i128 @f3(i64 %a) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vlvgg %v0, %r3, 1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = zext i64 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i64 from memory.
+define i128 @f4(ptr %ptr) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vleg %v0, 0(%r3), 1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i64, ptr %ptr
+ %res = zext i64 %a to i128
+ ret i128 %res
+}
+
+; Truncation to i64.
+define i64 @f5(i128 %a) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vlgvg %r2, %v0, 1
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i64
+ ret i64 %res
+}
+
+; Truncation to i64 in memory.
+define void @f6(ptr %ptr, i128 %a) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vsteg %v0, 0(%r2), 1
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i64
+ store i64 %res, ptr %ptr
+ ret void
+}
+
+; Sign extension from i32.
+define i128 @f7(i32 %a) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgfr %r0, %r3
+; CHECK-NEXT: vlvgp %v0, %r0, %r0
+; CHECK-NEXT: vuplg %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = sext i32 %a to i128
+ ret i128 %res
+}
+
+; Sign extension from i32 from memory.
+define i128 @f8(ptr %ptr) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepf %v0, 0(%r3)
+; CHECK-NEXT: vrepib %v1, 96
+; CHECK-NEXT: vsrab %v0, %v0, %v1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i32, ptr %ptr
+ %res = sext i32 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i32.
+define i128 @f9(i32 %a) {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vlvgf %v0, %r3, 3
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = zext i32 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i32 from memory.
+define i128 @f10(ptr %ptr) {
+; CHECK-LABEL: f10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vlef %v0, 0(%r3), 3
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i32, ptr %ptr
+ %res = zext i32 %a to i128
+ ret i128 %res
+}
+
+; Truncation to i32.
+define i32 @f11(i128 %a) {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vlgvf %r2, %v0, 3
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i32
+ ret i32 %res
+}
+
+; Truncation to i32 in memory.
+define void @f12(ptr %ptr, i128 %a) {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vstef %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i32
+ store i32 %res, ptr %ptr
+ ret void
+}
+
+; Sign extension from i16.
+define i128 @f13(i16 %a) {
+; CHECK-LABEL: f13:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r3l killed $r3l def $r3d
+; CHECK-NEXT: lghr %r0, %r3
+; CHECK-NEXT: vlvgp %v0, %r0, %r0
+; CHECK-NEXT: vuplg %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = sext i16 %a to i128
+ ret i128 %res
+}
+
+; Sign extension from i16 from memory.
+define i128 @f14(ptr %ptr) {
+; CHECK-LABEL: f14:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlreph %v0, 0(%r3)
+; CHECK-NEXT: vrepib %v1, 112
+; CHECK-NEXT: vsrab %v0, %v0, %v1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i16, ptr %ptr
+ %res = sext i16 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i16.
+define i128 @f15(i16 %a) {
+; CHECK-LABEL: f15:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vlvgh %v0, %r3, 7
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = zext i16 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i16 from memory.
+define i128 @f16(ptr %ptr) {
+; CHECK-LABEL: f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vleh %v0, 0(%r3), 7
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i16, ptr %ptr
+ %res = zext i16 %a to i128
+ ret i128 %res
+}
+
+; Truncation to i16.
+define i16 @f17(i128 %a) {
+; CHECK-LABEL: f17:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vlgvf %r2, %v0, 3
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i16
+ ret i16 %res
+}
+
+; Truncation to i16 in memory.
+define void @f18(ptr %ptr, i128 %a) {
+; CHECK-LABEL: f18:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vsteh %v0, 0(%r2), 7
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i16
+ store i16 %res, ptr %ptr
+ ret void
+}
+
+; Sign extension from i8.
+define i128 @f19(i8 %a) {
+; CHECK-LABEL: f19:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r3l killed $r3l def $r3d
+; CHECK-NEXT: lgbr %r0, %r3
+; CHECK-NEXT: vlvgp %v0, %r0, %r0
+; CHECK-NEXT: vuplg %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = sext i8 %a to i128
+ ret i128 %res
+}
+
+; Sign extension from i8 from memory.
+define i128 @f20(ptr %ptr) {
+; CHECK-LABEL: f20:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vlrepb %v0, 0(%r3)
+; CHECK-NEXT: vrepib %v1, 120
+; CHECK-NEXT: vsrab %v0, %v0, %v1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i8, ptr %ptr
+ %res = sext i8 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i8.
+define i128 @f21(i8 %a) {
+; CHECK-LABEL: f21:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vlvgb %v0, %r3, 15
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = zext i8 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i8 from memory.
+define i128 @f22(ptr %ptr) {
+; CHECK-LABEL: f22:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vleb %v0, 0(%r3), 15
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i8, ptr %ptr
+ %res = zext i8 %a to i128
+ ret i128 %res
+}
+
+; Truncation to i8.
+define i8 @f23(i128 %a) {
+; CHECK-LABEL: f23:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vlgvf %r2, %v0, 3
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i8
+ ret i8 %res
+}
+
+; Truncation to i8 in memory.
+define void @f24(ptr %ptr, i128 %a) {
+; CHECK-LABEL: f24:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vsteb %v0, 0(%r2), 15
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i8
+ store i8 %res, ptr %ptr
+ ret void
+}
+
+; Sign extension from i1.
+define i128 @f25(i1 %a) {
+; CHECK-LABEL: f25:
+; CHECK: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI24_0
+; CHECK-NEXT: vl %v1, 0(%r1), 3
+; CHECK-NEXT: vlvgp %v0, %r3, %r3
+; CHECK-NEXT: vn %v0, %v0, %v1
+; CHECK-NEXT: vlcq %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = sext i1 %a to i128
+ ret i128 %res
+}
+
+; Sign extension from i1 from memory.
+define i128 @f26(ptr %ptr) {
+; CHECK-LABEL: f26:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vleb %v0, 0(%r3), 15
+; CHECK-NEXT: larl %r1, .LCPI25_0
+; CHECK-NEXT: vl %v1, 0(%r1), 3
+; CHECK-NEXT: vn %v0, %v0, %v1
+; CHECK-NEXT: vlcq %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i1, ptr %ptr
+ %res = sext i1 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i1.
+define i128 @f27(i1 %a) {
+; CHECK-LABEL: f27:
+; CHECK: # %bb.0:
+; CHECK-NEXT: larl %r1, .LCPI26_0
+; CHECK-NEXT: vl %v1, 0(%r1), 3
+; CHECK-NEXT: vlvgp %v0, %r3, %r3
+; CHECK-NEXT: vn %v0, %v0, %v1
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = zext i1 %a to i128
+ ret i128 %res
+}
+
+; Zero extension from i1 from memory.
+define i128 @f28(ptr %ptr) {
+; CHECK-LABEL: f28:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vleb %v0, 0(%r3), 15
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %a = load i1, ptr %ptr
+ %res = zext i1 %a to i128
+ ret i128 %res
+}
+
+; Truncation to i1.
+define i1 @f29(i128 %a) {
+; CHECK-LABEL: f29:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r2), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vlgvf %r2, %v0, 3
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i1
+ ret i1 %res
+}
+
+; Truncation to i1 in memory.
+define void @f30(ptr %ptr, i128 %a) {
+; CHECK-LABEL: f30:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: larl %r1, .LCPI29_0
+; CHECK-NEXT: vl %v1, 0(%r1), 3
+; CHECK-NEXT: vaq %v0, %v0, %v0
+; CHECK-NEXT: vn %v0, %v0, %v1
+; CHECK-NEXT: vsteb %v0, 0(%r2), 15
+; CHECK-NEXT: br %r14
+ %op = add i128 %a, %a
+ %res = trunc i128 %op to i1
+ store i1 %res, ptr %ptr
+ ret void
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-div-08.ll b/llvm/test/CodeGen/SystemZ/int-div-08.ll
new file mode 100644
index 00000000000000..a3723c1257974c
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-div-08.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test 128-bit division and remainder in vector registers on arch15
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Divide signed.
+define i128 @f1(i128 %a, i128 %b) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vdq %v0, %v1, %v0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = sdiv i128 %a, %b
+ ret i128 %res
+}
+
+; Divide unsigned.
+define i128 @f2(i128 %a, i128 %b) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vdlq %v0, %v1, %v0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = udiv i128 %a, %b
+ ret i128 %res
+}
+
+; Remainder signed.
+define i128 @f3(i128 %a, i128 %b) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vrq %v0, %v1, %v0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = srem i128 %a, %b
+ ret i128 %res
+}
+
+; Remainder unsigned.
+define i128 @f4(i128 %a, i128 %b) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vrlq %v0, %v1, %v0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = urem i128 %a, %b
+ ret i128 %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-max-02.ll b/llvm/test/CodeGen/SystemZ/int-max-02.ll
new file mode 100644
index 00000000000000..bd5e9593e25e99
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-max-02.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test i128 maximum on arch15.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Test with slt.
+define i128 @f1(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vmxq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt i128 %val1, %val2
+ %ret = select i1 %cmp, i128 %val2, i128 %val1
+ ret i128 %ret
+}
+
+; Test with sle.
+define i128 @f2(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmxq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle i128 %val1, %val2
+ %ret = select i1 %cmp, i128 %val2, i128 %val1
+ ret i128 %ret
+}
+
+; Test with sgt.
+define i128 @f3(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmxq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt i128 %val1, %val2
+ %ret = select i1 %cmp, i128 %val1, i128 %val2
+ ret i128 %ret
+}
+
+; Test with sge.
+define i128 @f4(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vmxq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge i128 %val1, %val2
+ %ret = select i1 %cmp, i128 %val1, i128 %val2
+ ret i128 %ret
+}
+
+; Test with ult.
+define i128 @f5(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vmxlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult i128 %val1, %val2
+ %ret = select i1 %cmp, i128 %val2, i128 %val1
+ ret i128 %ret
+}
+
+; Test with ule.
+define i128 @f6(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmxlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule i128 %val1, %val2
+ %ret = select i1 %cmp, i128 %val2, i128 %val1
+ ret i128 %ret
+}
+
+; Test with ugt.
+define i128 @f7(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmxlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt i128 %val1, %val2
+ %ret = select i1 %cmp, i128 %val1, i128 %val2
+ ret i128 %ret
+}
+
+; Test with uge.
+define i128 @f8(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vmxlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge i128 %val1, %val2
+ %ret = select i1 %cmp, i128 %val1, i128 %val2
+ ret i128 %ret
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-min-02.ll b/llvm/test/CodeGen/SystemZ/int-min-02.ll
new file mode 100644
index 00000000000000..e4cdd25fbc0066
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-min-02.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test i128 minimum on arch15.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Test with slt.
+define i128 @f1(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmnq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt i128 %val2, %val1
+ %ret = select i1 %cmp, i128 %val2, i128 %val1
+ ret i128 %ret
+}
+
+; Test with sle.
+define i128 @f2(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vmnq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp sle i128 %val2, %val1
+ %ret = select i1 %cmp, i128 %val2, i128 %val1
+ ret i128 %ret
+}
+
+; Test with sgt.
+define i128 @f3(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vmnq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp sgt i128 %val2, %val1
+ %ret = select i1 %cmp, i128 %val1, i128 %val2
+ ret i128 %ret
+}
+
+; Test with sge.
+define i128 @f4(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmnq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge i128 %val2, %val1
+ %ret = select i1 %cmp, i128 %val1, i128 %val2
+ ret i128 %ret
+}
+
+; Test with ult.
+define i128 @f5(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmnlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp ult i128 %val2, %val1
+ %ret = select i1 %cmp, i128 %val2, i128 %val1
+ ret i128 %ret
+}
+
+; Test with ule.
+define i128 @f6(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vmnlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp ule i128 %val2, %val1
+ %ret = select i1 %cmp, i128 %val2, i128 %val1
+ ret i128 %ret
+}
+
+; Test with ugt.
+define i128 @f7(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vmnlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp ugt i128 %val2, %val1
+ %ret = select i1 %cmp, i128 %val1, i128 %val2
+ ret i128 %ret
+}
+
+; Test with uge.
+define i128 @f8(i128 %val1, i128 %val2) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmnlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %cmp = icmp uge i128 %val2, %val1
+ %ret = select i1 %cmp, i128 %val1, i128 %val2
+ ret i128 %ret
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-14.ll b/llvm/test/CodeGen/SystemZ/int-mul-14.ll
new file mode 100644
index 00000000000000..e7e0889634d104
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-mul-14.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test 128-bit multiplication in vector registers on arch15
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Multiplication.
+define i128 @f1(i128 %a, i128 %b) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmlq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = mul i128 %a, %b
+ ret i128 %res
+}
+
+; Multiply-and-add.
+define i128 @f2(i128 %a, i128 %b, i128 %add) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r5), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vl %v2, 0(%r3), 3
+; CHECK-NEXT: vmalq %v0, %v2, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %mul = mul i128 %a, %b
+ %res = add i128 %mul, %add
+ ret i128 %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-15.ll b/llvm/test/CodeGen/SystemZ/int-mul-15.ll
new file mode 100644
index 00000000000000..a4a0faa0cb0c8f
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-mul-15.ll
@@ -0,0 +1,229 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
+; Test high-part i64->i128 multiplications on arch15.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Check zero-extended multiplication in which only the high part is used.
+define i64 @f1(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlgr %r2, %r4
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check sign-extended multiplication in which only the high part is used.
+define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mgrk %r2, %r3, %r4
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %ax = sext i64 %a to i128
+ %bx = sext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check zero-extended multiplication in which only part of the high half
+; is used. FIXME: Should use MLGR as well.
+define i64 @f3(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vgbm %v1, 0
+; CHECK-NEXT: vlvgg %v1, %r3, 1
+; CHECK-NEXT: vlvgg %v0, %r4, 1
+; CHECK-NEXT: vmlq %v0, %v1, %v0
+; CHECK-NEXT: vrepib %v1, 67
+; CHECK-NEXT: vsrlb %v0, %v0, %v1
+; CHECK-NEXT: vsrl %v0, %v0, %v1
+; CHECK-NEXT: vlgvg %r2, %v0, 1
+; CHECK-NEXT: br %r14
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 67
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check zero-extended multiplication in which the result is split into
+; high and low halves. FIXME: Should use MLGR as well.
+define i64 @f4(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgbm %v0, 0
+; CHECK-NEXT: vgbm %v1, 0
+; CHECK-NEXT: vlvgg %v1, %r3, 1
+; CHECK-NEXT: vlvgg %v0, %r4, 1
+; CHECK-NEXT: vmlq %v0, %v1, %v0
+; CHECK-NEXT: vrepib %v1, 64
+; CHECK-NEXT: vsrlb %v1, %v0, %v1
+; CHECK-NEXT: vo %v0, %v1, %v0
+; CHECK-NEXT: vlgvg %r2, %v0, 1
+; CHECK-NEXT: br %r14
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ %low = trunc i128 %mulx to i64
+ %or = or i64 %high, %low
+ ret i64 %or
+}
+
+; Check division by a constant, which should use multiplication instead.
+define i64 @f5(i64 %dummy, i64 %a) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llihf %r0, 1782028570
+; CHECK-NEXT: oilf %r0, 598650223
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlgr %r2, %r0
+; CHECK-NEXT: srlg %r2, %r2, 9
+; CHECK-NEXT: br %r14
+ %res = udiv i64 %a, 1234
+ ret i64 %res
+}
+
+; Check MLG with no displacement.
+define i64 @f6(i64 %dummy, i64 %a, ptr %src) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlg %r2, 0(%r4)
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %b = load i64, ptr %src
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the high end of the aligned MLG range.
+define i64 @f7(i64 %dummy, i64 %a, ptr %src) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlg %r2, 524280(%r4)
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i64, ptr %src, i64 65535
+ %b = load i64, ptr %ptr
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the next doubleword up, which requires separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f8(i64 %dummy, i64 %a, ptr %src) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: agfi %r4, 524288
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlg %r2, 0(%r4)
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i64, ptr %src, i64 65536
+ %b = load i64, ptr %ptr
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the high end of the negative aligned MLG range.
+define i64 @f9(i64 %dummy, i64 %a, ptr %src) {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlg %r2, -8(%r4)
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i64, ptr %src, i64 -1
+ %b = load i64, ptr %ptr
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the low end of the MLG range.
+define i64 @f10(i64 %dummy, i64 %a, ptr %src) {
+; CHECK-LABEL: f10:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlg %r2, -524288(%r4)
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i64, ptr %src, i64 -65536
+ %b = load i64, ptr %ptr
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check the next doubleword down, which needs separate address logic.
+; Other sequences besides this one would be OK.
+define i64 @f11(ptr %dest, i64 %a, ptr %src) {
+; CHECK-LABEL: f11:
+; CHECK: # %bb.0:
+; CHECK-NEXT: agfi %r4, -524296
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlg %r2, 0(%r4)
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %ptr = getelementptr i64, ptr %src, i64 -65537
+ %b = load i64, ptr %ptr
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
+; Check that MLG allows an index.
+define i64 @f12(ptr %dest, i64 %a, i64 %src, i64 %index) {
+; CHECK-LABEL: f12:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r3d killed $r3d def $r2q
+; CHECK-NEXT: mlg %r2, 524287(%r5,%r4)
+; CHECK-NEXT: # kill: def $r2d killed $r2d killed $r2q
+; CHECK-NEXT: br %r14
+ %add1 = add i64 %src, %index
+ %add2 = add i64 %add1, 524287
+ %ptr = inttoptr i64 %add2 to ptr
+ %b = load i64, ptr %ptr
+ %ax = zext i64 %a to i128
+ %bx = zext i64 %b to i128
+ %mulx = mul i128 %ax, %bx
+ %highx = lshr i128 %mulx, 64
+ %high = trunc i128 %highx to i64
+ ret i64 %high
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/int-mul-16.ll b/llvm/test/CodeGen/SystemZ/int-mul-16.ll
new file mode 100644
index 00000000000000..d84ca93e3b12cd
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-mul-16.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test high-part i128->i256 multiplications on arch15.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Multiply high signed.
+define i128 @f1(i128 %a, i128 %b) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmhq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %exta = sext i128 %a to i256
+ %extb = sext i128 %b to i256
+ %extres = mul i256 %exta, %extb
+ %shiftres = lshr i256 %extres, 128
+ %res = trunc i256 %shiftres to i128
+ ret i128 %res
+}
+
+; Multiply high unsigned.
+define i128 @f2(i128 %a, i128 %b) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmlhq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %exta = zext i128 %a to i256
+ %extb = zext i128 %b to i256
+ %extres = mul i256 %exta, %extb
+ %shiftres = lshr i256 %extres, 128
+ %res = trunc i256 %shiftres to i128
+ ret i128 %res
+}
+
+;; ; Multiply-and-add high signed.
+;; define i128 @f3(i128 %a, i128 %b, i128 %add) {
+;; ; CHECX-LABEL: f3:
+;; ; CHECX: # %bb.0:
+;; ; CHECX-NEXT: vl %v0, 0(%r3), 3
+;; ; CHECX-NEXT: vl %v1, 0(%r4), 3
+;; ; CHECX-NEXT: vl %v2, 0(%r5), 3
+;; ; CHECX-NEXT: vmhq %v0, %v0, %v1, %v2
+;; ; CHECX-NEXT: vst %v0, 0(%r2), 3
+;; ; CHECX-NEXT: br %r14
+;; %exta = sext i128 %a to i256
+;; %extb = sext i128 %b to i256
+;; %extadd = sext i128 %add to i256
+;; %extmul = mul i256 %exta, %extb
+;; %extres = add i256 %extmul, %extadd
+;; %shiftres = lshr i256 %extres, 128
+;; %res = trunc i256 %shiftres to i128
+;; ret i128 %res
+;; }
diff --git a/llvm/test/CodeGen/SystemZ/int-neg-04.ll b/llvm/test/CodeGen/SystemZ/int-neg-04.ll
new file mode 100644
index 00000000000000..05b7b397e735d9
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/int-neg-04.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test 128-bit negation in vector registers on arch15
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+define i128 @f1(i128 %src) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vlcq %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = sub i128 0, %src
+ ret i128 %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/int-sub-12.ll b/llvm/test/CodeGen/SystemZ/int-sub-12.ll
index 2e30f1c33aebe6..8f7d816d5cbd2a 100644
--- a/llvm/test/CodeGen/SystemZ/int-sub-12.ll
+++ b/llvm/test/CodeGen/SystemZ/int-sub-12.ll
@@ -2,6 +2,7 @@
; Test 128-bit subtraction in vector registers on z13 and later
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
define i128 @f1(i128 %a, i128 %b) {
; CHECK-LABEL: f1:
diff --git a/llvm/test/CodeGen/SystemZ/llxa-01.ll b/llvm/test/CodeGen/SystemZ/llxa-01.ll
new file mode 100644
index 00000000000000..19bc6ef31a2866
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/llxa-01.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD LOGICAL INDEXED ADDRESS byte instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; DO NOT USE: LLXAB with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: algfr %r2, %r3
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LLXAB with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 zeroext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: agr %r2, %r3
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAB with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxab %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAB with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxab %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LLXAB with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: algfr %r2, %r3
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LLXAB with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: algfr %r2, %r3
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LLXAB with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgfr %r2, %r2
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ ret i64 %idxprom
+}
+
+; LLXAB with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxab %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = zext i32 %idxoff to i64
+ ret i64 %idxprom
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/llxa-02.ll b/llvm/test/CodeGen/SystemZ/llxa-02.ll
new file mode 100644
index 00000000000000..0ca2527dcb25e2
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/llxa-02.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD LOGICAL INDEXED ADDRESS halfword instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; LLXAH with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxah %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAH with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 zeroext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxah %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAH with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxah %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAH with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxah %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAH with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: llxah %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAH with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: llxah %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LLXAH with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: risbgn %r2, %r2, 31, 190, 1
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %idxshift = shl i64 %idxprom, 1
+ ret i64 %idxshift
+}
+
+; LLXAH with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxah %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = zext i32 %idxoff to i64
+ %idxshift = shl i64 %idxprom, 1
+ ret i64 %idxshift
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/llxa-03.ll b/llvm/test/CodeGen/SystemZ/llxa-03.ll
new file mode 100644
index 00000000000000..b6c9406785188e
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/llxa-03.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD LOGICAL INDEXED ADDRESS word instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; LLXAF with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaf %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAF with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 zeroext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaf %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAF with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaf %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAF with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaf %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAF with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: llxaf %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAF with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: llxaf %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LLXAF with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: risbgn %r2, %r2, 30, 189, 2
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %idxshift = shl i64 %idxprom, 2
+ ret i64 %idxshift
+}
+
+; LLXAF with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaf %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = zext i32 %idxoff to i64
+ %idxshift = shl i64 %idxprom, 2
+ ret i64 %idxshift
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/llxa-04.ll b/llvm/test/CodeGen/SystemZ/llxa-04.ll
new file mode 100644
index 00000000000000..9c5cd2f54bc67c
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/llxa-04.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD LOGICAL INDEXED ADDRESS doubleword instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; LLXAG with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxag %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAG with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 zeroext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxag %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAG with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxag %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAG with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxag %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAG with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: llxag %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAG with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: llxag %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LLXAG with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: risbgn %r2, %r2, 29, 188, 3
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %idxshift = shl i64 %idxprom, 3
+ ret i64 %idxshift
+}
+
+; LLXAG with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxag %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = zext i32 %idxoff to i64
+ %idxshift = shl i64 %idxprom, 3
+ ret i64 %idxshift
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/llxa-05.ll b/llvm/test/CodeGen/SystemZ/llxa-05.ll
new file mode 100644
index 00000000000000..eba400f6d2564b
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/llxa-05.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD LOGICAL INDEXED ADDRESS quadword instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; LLXAQ with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaq %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAQ with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 zeroext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaq %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAQ with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaq %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAQ with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaq %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAQ with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: llxaq %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LLXAQ with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: llxaq %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = zext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LLXAQ with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: risbgn %r2, %r2, 28, 187, 4
+; CHECK-NEXT: br %r14
+ %idxprom = zext i32 %idx to i64
+ %idxshift = shl i64 %idxprom, 4
+ ret i64 %idxshift
+}
+
+; LLXAQ with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llxaq %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = zext i32 %idxoff to i64
+ %idxshift = shl i64 %idxprom, 4
+ ret i64 %idxshift
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/lxa-01.ll b/llvm/test/CodeGen/SystemZ/lxa-01.ll
new file mode 100644
index 00000000000000..fb3edeaaeb3812
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/lxa-01.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD INDEXED ADDRESS byte instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; DO NOT USE: LXAB with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: agfr %r2, %r3
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LXAB with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 signext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: agr %r2, %r3
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAB with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxab %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAB with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxab %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LXAB with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: agfr %r2, %r3
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LXAB with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: agfr %r2, %r3
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i8, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; DO NOT USE: LXAB with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lgfr %r2, %r2
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ ret i64 %idxprom
+}
+
+; LXAB with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxab %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = sext i32 %idxoff to i64
+ ret i64 %idxprom
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/lxa-02.ll b/llvm/test/CodeGen/SystemZ/lxa-02.ll
new file mode 100644
index 00000000000000..64816fa24838ec
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/lxa-02.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD INDEXED ADDRESS halfword instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; LXAH with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxah %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAH with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 signext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxah %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAH with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxah %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAH with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxah %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAH with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: lxah %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAH with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: lxah %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i16, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAH with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxah %r2, 0(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %idxshift = shl i64 %idxprom, 1
+ ret i64 %idxshift
+}
+
+; LXAH with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxah %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = sext i32 %idxoff to i64
+ %idxshift = shl i64 %idxprom, 1
+ ret i64 %idxshift
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/lxa-03.ll b/llvm/test/CodeGen/SystemZ/lxa-03.ll
new file mode 100644
index 00000000000000..e73d43a48ebd8a
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/lxa-03.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD INDEXED ADDRESS word instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; LXAF with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaf %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAF with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 signext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaf %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAF with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaf %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAF with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaf %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAF with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: lxaf %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAF with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: lxaf %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i32, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAF with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaf %r2, 0(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %idxshift = shl i64 %idxprom, 2
+ ret i64 %idxshift
+}
+
+; LXAF with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaf %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = sext i32 %idxoff to i64
+ %idxshift = shl i64 %idxprom, 2
+ ret i64 %idxshift
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/lxa-04.ll b/llvm/test/CodeGen/SystemZ/lxa-04.ll
new file mode 100644
index 00000000000000..7b6764cf22fafc
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/lxa-04.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD INDEXED ADDRESS doubleword instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; LXAG with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxag %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAG with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 signext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxag %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAG with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxag %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAG with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxag %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAG with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: lxag %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAG with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: lxag %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i64, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAG with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxag %r2, 0(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %idxshift = shl i64 %idxprom, 3
+ ret i64 %idxshift
+}
+
+; LXAG with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxag %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = sext i32 %idxoff to i64
+ %idxshift = shl i64 %idxprom, 3
+ ret i64 %idxshift
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/lxa-05.ll b/llvm/test/CodeGen/SystemZ/lxa-05.ll
new file mode 100644
index 00000000000000..0a45cba0b3f83a
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/lxa-05.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of LOAD INDEXED ADDRESS quadword instructions.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; LXAQ with base and index.
+define dso_local ptr @f0(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaq %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAQ with base and index (implied extension).
+define dso_local ptr @f1(ptr %ptr, i32 signext %idx) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaq %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAQ with base and index and max displacement.
+define dso_local ptr @f2(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaq %r2, 524287(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524287
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAQ with base and index and min displacement.
+define dso_local ptr @f3(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaq %r2, -524288(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAQ with base and index and max displacement overflow.
+define dso_local ptr @f4(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, 524288
+; CHECK-NEXT: lxaq %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 524288
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAQ with base and index and min displacement overflow.
+define dso_local ptr @f5(ptr %ptr, i32 %idx) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: afi %r3, -524289
+; CHECK-NEXT: lxaq %r2, 0(%r3,%r2)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, -524289
+ %idxprom = sext i32 %idxoff to i64
+ %arrayidx = getelementptr inbounds i128, ptr %ptr, i64 %idxprom
+ ret ptr %arrayidx
+}
+
+; LXAQ with index.
+define dso_local i64 @f6(i32 %idx) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaq %r2, 0(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxprom = sext i32 %idx to i64
+ %idxshift = shl i64 %idxprom, 4
+ ret i64 %idxshift
+}
+
+; LXAQ with index and displacement.
+define dso_local i64 @f7(i32 %idx) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lxaq %r2, 1(%r2,0)
+; CHECK-NEXT: br %r14
+ %idxoff = add i32 %idx, 1
+ %idxprom = sext i32 %idxoff to i64
+ %idxshift = shl i64 %idxprom, 4
+ ret i64 %idxshift
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll
new file mode 100644
index 00000000000000..f18ee2418383c0
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-03.ll
@@ -0,0 +1,101 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+;
+; FIXME: two consecutive immediate adds not fused in i16/i8 functions.
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+
+define i64 @f0(i64 %arg) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clzg %r2, %r2
+; CHECK-NEXT: br %r14
+ %1 = tail call i64 @llvm.ctlz.i64(i64 %arg, i1 false)
+ ret i64 %1
+}
+
+define i64 @f1(i64 %arg) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: clzg %r2, %r2
+; CHECK-NEXT: br %r14
+ %1 = tail call i64 @llvm.ctlz.i64(i64 %arg, i1 true)
+ ret i64 %1
+}
+
+define i32 @f2(i32 %arg) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: llgfr %r0, %r2
+; CHECK-NEXT: clzg %r2, %r0
+; CHECK-NEXT: aghi %r2, -32
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i32 @llvm.ctlz.i32(i32 %arg, i1 false)
+ ret i32 %1
+}
+
+define i32 @f3(i32 %arg) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: sllg %r0, %r2, 32
+; CHECK-NEXT: clzg %r2, %r0
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i32 @llvm.ctlz.i32(i32 %arg, i1 true)
+ ret i32 %1
+}
+
+define i16 @f4(i16 %arg) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: llghr %r0, %r2
+; CHECK-NEXT: clzg %r0, %r0
+; CHECK-NEXT: aghi %r0, -32
+; CHECK-NEXT: ahik %r2, %r0, -16
+; CHECK-NEXT: br %r14
+ %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 false)
+ ret i16 %1
+}
+
+define i16 @f5(i16 %arg) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: sllg %r0, %r2, 48
+; CHECK-NEXT: clzg %r2, %r0
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i16 @llvm.ctlz.i16(i16 %arg, i1 true)
+ ret i16 %1
+}
+
+define i8 @f6(i8 %arg) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: llgcr %r0, %r2
+; CHECK-NEXT: clzg %r0, %r0
+; CHECK-NEXT: aghi %r0, -32
+; CHECK-NEXT: ahik %r2, %r0, -24
+; CHECK-NEXT: br %r14
+ %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 false)
+ ret i8 %1
+}
+
+define i8 @f7(i8 %arg) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: sllg %r0, %r2, 56
+; CHECK-NEXT: clzg %r2, %r0
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i8 @llvm.ctlz.i8(i8 %arg, i1 true)
+ ret i8 %1
+}
diff --git a/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll b/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll
new file mode 100644
index 00000000000000..bb50e6f417c425
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/scalar-ctlz-04.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+declare i128 @llvm.ctlz.i128(i128, i1)
+
+define i128 @f1(i128 %a) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vclzq %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = tail call i128 @llvm.ctlz.i128(i128 %a, i1 false)
+ ret i128 %res
+}
+
+define i128 @f2(i128 %a) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vclzq %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = tail call i128 @llvm.ctlz.i128(i128 %a, i1 true)
+ ret i128 %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll b/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll
new file mode 100644
index 00000000000000..2f3a72160ae27c
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/scalar-cttz-03.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i8 @llvm.cttz.i8(i8, i1)
+
+define i64 @f0(i64 %arg) {
+; CHECK-LABEL: f0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ctzg %r2, %r2
+; CHECK-NEXT: br %r14
+ %1 = tail call i64 @llvm.cttz.i64(i64 %arg, i1 false)
+ ret i64 %1
+}
+
+define i64 @f1(i64 %arg) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ctzg %r2, %r2
+; CHECK-NEXT: br %r14
+ %1 = tail call i64 @llvm.cttz.i64(i64 %arg, i1 true)
+ ret i64 %1
+}
+
+define i32 @f2(i32 %arg) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: oihl %r2, 1
+; CHECK-NEXT: ctzg %r2, %r2
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i32 @llvm.cttz.i32(i32 %arg, i1 false)
+ ret i32 %1
+}
+
+define i32 @f3(i32 %arg) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: ctzg %r2, %r2
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i32 @llvm.cttz.i32(i32 %arg, i1 true)
+ ret i32 %1
+}
+
+define i16 @f4(i16 %arg) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: oilh %r2, 1
+; CHECK-NEXT: ctzg %r2, %r2
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i16 @llvm.cttz.i16(i16 %arg, i1 false)
+ ret i16 %1
+}
+
+define i16 @f5(i16 %arg) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: ctzg %r2, %r2
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i16 @llvm.cttz.i16(i16 %arg, i1 true)
+ ret i16 %1
+}
+
+define i8 @f6(i8 %arg) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: oill %r2, 256
+; CHECK-NEXT: ctzg %r2, %r2
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i8 @llvm.cttz.i8(i8 %arg, i1 false)
+ ret i8 %1
+}
+
+define i8 @f7(i8 %arg) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: # kill: def $r2l killed $r2l def $r2d
+; CHECK-NEXT: ctzg %r2, %r2
+; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
+; CHECK-NEXT: br %r14
+ %1 = tail call i8 @llvm.cttz.i8(i8 %arg, i1 true)
+ ret i8 %1
+}
diff --git a/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll b/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll
new file mode 100644
index 00000000000000..f440871fd4ff0b
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/scalar-cttz-04.ll
@@ -0,0 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test 128-bit arithmetic in vector registers on arch15
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+declare i128 @llvm.cttz.i128(i128, i1)
+
+define i128 @f1(i128 %a) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vctzq %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = tail call i128 @llvm.cttz.i128(i128 %a, i1 false)
+ ret i128 %res
+}
+
+define i128 @f2(i128 %a) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vctzq %v0, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = tail call i128 @llvm.cttz.i128(i128 %a, i1 true)
+ ret i128 %res
+}
diff --git a/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll b/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll
new file mode 100644
index 00000000000000..3f6c86e685ea10
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-cmp-09.ll
@@ -0,0 +1,248 @@
+; Test usage of VBLEND on arch15.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
+; CHECK-LABEL: f1:
+; CHECK: vblendb %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <16 x i8> %val1, zeroinitializer
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val3
+ ret <16 x i8> %ret
+}
+
+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vblendb %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <16 x i8> %val1, zeroinitializer
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val3
+ ret <16 x i8> %ret
+}
+
+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
+; CHECK-LABEL: f3:
+; CHECK: vblendb %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <16 x i8> %val1, <i8 128, i8 128, i8 128, i8 128,
+ i8 128, i8 128, i8 128, i8 128,
+ i8 128, i8 128, i8 128, i8 128,
+ i8 128, i8 128, i8 128, i8 128>;
+ %cmp = icmp ne <16 x i8> %mask, zeroinitializer
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val3
+ ret <16 x i8> %ret
+}
+
+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
+; CHECK-LABEL: f4:
+; CHECK: vblendb %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <16 x i8> %val1, <i8 128, i8 128, i8 128, i8 128,
+ i8 128, i8 128, i8 128, i8 128,
+ i8 128, i8 128, i8 128, i8 128,
+ i8 128, i8 128, i8 128, i8 128>;
+ %cmp = icmp eq <16 x i8> %mask, zeroinitializer
+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val3
+ ret <16 x i8> %ret
+}
+
+define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
+; CHECK-LABEL: f5:
+; CHECK: vblendh %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <8 x i16> %val1, zeroinitializer
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val3
+ ret <8 x i16> %ret
+}
+
+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
+; CHECK-LABEL: f6:
+; CHECK: vblendh %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <8 x i16> %val1, zeroinitializer
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val3
+ ret <8 x i16> %ret
+}
+
+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
+; CHECK-LABEL: f7:
+; CHECK: vblendh %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <8 x i16> %val1, <i16 32768, i16 32768, i16 32768, i16 32768,
+ i16 32768, i16 32768, i16 32768, i16 32768>;
+ %cmp = icmp ne <8 x i16> %mask, zeroinitializer
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val3
+ ret <8 x i16> %ret
+}
+
+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
+; CHECK-LABEL: f8:
+; CHECK: vblendh %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <8 x i16> %val1, <i16 32768, i16 32768, i16 32768, i16 32768,
+ i16 32768, i16 32768, i16 32768, i16 32768>;
+ %cmp = icmp eq <8 x i16> %mask, zeroinitializer
+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val3
+ ret <8 x i16> %ret
+}
+
+define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
+; CHECK-LABEL: f9:
+; CHECK: vblendf %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <4 x i32> %val1, zeroinitializer
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val3
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
+; CHECK-LABEL: f10:
+; CHECK: vblendf %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <4 x i32> %val1, zeroinitializer
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val3
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
+; CHECK-LABEL: f11:
+; CHECK: vblendf %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <4 x i32> %val1, <i32 2147483648, i32 2147483648,
+ i32 2147483648, i32 2147483648>;
+ %cmp = icmp ne <4 x i32> %mask, zeroinitializer
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val3
+ ret <4 x i32> %ret
+}
+
+define <4 x i32> @f12(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
+; CHECK-LABEL: f12:
+; CHECK: vblendf %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <4 x i32> %val1, <i32 2147483648, i32 2147483648,
+ i32 2147483648, i32 2147483648>;
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val3
+ ret <4 x i32> %ret
+}
+
+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
+; CHECK-LABEL: f13:
+; CHECK: vblendg %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <2 x i64> %val1, zeroinitializer
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val3
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @f14(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
+; CHECK-LABEL: f14:
+; CHECK: vblendg %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <2 x i64> %val1, zeroinitializer
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val3
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @f15(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
+; CHECK-LABEL: f15:
+; CHECK: vblendg %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <2 x i64> %val1, <i64 9223372036854775808,
+ i64 9223372036854775808>;
+ %cmp = icmp ne <2 x i64> %mask, zeroinitializer
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val3
+ ret <2 x i64> %ret
+}
+
+define <2 x i64> @f16(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
+; CHECK-LABEL: f16:
+; CHECK: vblendg %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <2 x i64> %val1, <i64 9223372036854775808,
+ i64 9223372036854775808>;
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val3
+ ret <2 x i64> %ret
+}
+
+define <4 x float> @f17(<4 x i32> %val1, <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f17:
+; CHECK: vblendf %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <4 x i32> %val1, zeroinitializer
+ %ret = select <4 x i1> %cmp, <4 x float> %val2, <4 x float> %val3
+ ret <4 x float> %ret
+}
+
+define <4 x float> @f18(<4 x i32> %val1, <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f18:
+; CHECK: vblendf %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <4 x i32> %val1, zeroinitializer
+ %ret = select <4 x i1> %cmp, <4 x float> %val2, <4 x float> %val3
+ ret <4 x float> %ret
+}
+
+define <4 x float> @f19(<4 x i32> %val1, <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f19:
+; CHECK: vblendf %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <4 x i32> %val1, <i32 2147483648, i32 2147483648,
+ i32 2147483648, i32 2147483648>;
+ %cmp = icmp ne <4 x i32> %mask, zeroinitializer
+ %ret = select <4 x i1> %cmp, <4 x float> %val2, <4 x float> %val3
+ ret <4 x float> %ret
+}
+
+define <4 x float> @f20(<4 x i32> %val1, <4 x float> %val2, <4 x float> %val3) {
+; CHECK-LABEL: f20:
+; CHECK: vblendf %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <4 x i32> %val1, <i32 2147483648, i32 2147483648,
+ i32 2147483648, i32 2147483648>;
+ %cmp = icmp eq <4 x i32> %mask, zeroinitializer
+ %ret = select <4 x i1> %cmp, <4 x float> %val2, <4 x float> %val3
+ ret <4 x float> %ret
+}
+
+define <2 x double> @f21(<2 x i64> %val1, <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f21:
+; CHECK: vblendg %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp slt <2 x i64> %val1, zeroinitializer
+ %ret = select <2 x i1> %cmp, <2 x double> %val2, <2 x double> %val3
+ ret <2 x double> %ret
+}
+
+define <2 x double> @f22(<2 x i64> %val1, <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f22:
+; CHECK: vblendg %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %cmp = icmp sge <2 x i64> %val1, zeroinitializer
+ %ret = select <2 x i1> %cmp, <2 x double> %val2, <2 x double> %val3
+ ret <2 x double> %ret
+}
+
+define <2 x double> @f23(<2 x i64> %val1, <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f23:
+; CHECK: vblendg %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <2 x i64> %val1, <i64 9223372036854775808,
+ i64 9223372036854775808>;
+ %cmp = icmp ne <2 x i64> %mask, zeroinitializer
+ %ret = select <2 x i1> %cmp, <2 x double> %val2, <2 x double> %val3
+ ret <2 x double> %ret
+}
+
+define <2 x double> @f24(<2 x i64> %val1, <2 x double> %val2, <2 x double> %val3) {
+; CHECK-LABEL: f24:
+; CHECK: vblendg %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+ %mask = and <2 x i64> %val1, <i64 9223372036854775808,
+ i64 9223372036854775808>;
+ %cmp = icmp eq <2 x i64> %mask, zeroinitializer
+ %ret = select <2 x i1> %cmp, <2 x double> %val2, <2 x double> %val3
+ ret <2 x double> %ret
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-div-03.ll b/llvm/test/CodeGen/SystemZ/vec-div-03.ll
new file mode 100644
index 00000000000000..96b161948e39b9
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-div-03.ll
@@ -0,0 +1,76 @@
+; Test vector division on arch15.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Test a v4i32 signed division.
+define <4 x i32> @f1(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vdf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sdiv <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 unsigned division.
+define <4 x i32> @f2(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f2:
+; CHECK: vdlf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = udiv <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 signed remainder.
+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f3:
+; CHECK: vrf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = srem <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 unsigned remainder.
+define <4 x i32> @f4(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
+; CHECK-LABEL: f4:
+; CHECK: vrlf %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = urem <4 x i32> %val1, %val2
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 signed division.
+define <2 x i64> @f5(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f5:
+; CHECK: vdg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = sdiv <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 unsigned division.
+define <2 x i64> @f6(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f6:
+; CHECK: vdlg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = udiv <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 signed remainder.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f7:
+; CHECK: vrg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = srem <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 unsigned remainder.
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f8:
+; CHECK: vrlg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = urem <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-eval.ll b/llvm/test/CodeGen/SystemZ/vec-eval.ll
new file mode 100644
index 00000000000000..262ab0ea8bb2be
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-eval.ll
@@ -0,0 +1,3896 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test use of VECTOR EVALUATE for combined boolean operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+define <16 x i8> @eval0(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval0:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vgbm %v24, 0
+; CHECK-NEXT: br %r14
+entry:
+ ret <16 x i8> zeroinitializer
+}
+
+define <16 x i8> @eval1(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 1
+; CHECK-NEXT: br %r14
+entry:
+ %and = and <16 x i8> %src2, %src1
+ %and1 = and <16 x i8> %and, %src3
+ ret <16 x i8> %and1
+}
+
+define <16 x i8> @eval2(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 2
+; CHECK-NEXT: br %r14
+entry:
+ %and = and <16 x i8> %src2, %src1
+ %not = xor <16 x i8> %src3, splat(i8 -1)
+ %and1 = and <16 x i8> %and, %not
+ ret <16 x i8> %and1
+}
+
+define <16 x i8> @eval3(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v24, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %and = and <16 x i8> %src2, %src1
+ ret <16 x i8> %and
+}
+
+define <16 x i8> @eval4(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 2
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not, %src1
+ %and1 = and <16 x i8> %and, %src3
+ ret <16 x i8> %and1
+}
+
+define <16 x i8> @eval5(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v24, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %or4 = and <16 x i8> %src3, %src1
+ ret <16 x i8> %or4
+}
+
+define <16 x i8> @eval6(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 6
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or5 = and <16 x i8> %0, %src1
+ ret <16 x i8> %or5
+}
+
+define <16 x i8> @eval7(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval7:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 7
+; CHECK-NEXT: br %r14
+entry:
+ %and118 = or <16 x i8> %src3, %src2
+ %or8 = and <16 x i8> %and118, %src1
+ ret <16 x i8> %or8
+}
+
+define <16 x i8> @eval8(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 8
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src1
+ ret <16 x i8> %and2
+}
+
+define <16 x i8> @eval9(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval9:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v26, %v28, 9
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src3
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %or5 = and <16 x i8> %1, %src1
+ ret <16 x i8> %or5
+}
+
+define <16 x i8> @eval10(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval10:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v24, %v24, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src3, splat(i8 -1)
+ %or6 = and <16 x i8> %not1, %src1
+ ret <16 x i8> %or6
+}
+
+define <16 x i8> @eval11(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval11:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v26, %v28, 11
+; CHECK-NEXT: br %r14
+entry:
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and219 = or <16 x i8> %src3.not, %src2
+ %or9 = and <16 x i8> %and219, %src1
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval12(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval12:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v24, %v24, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not, %src1
+ ret <16 x i8> %and
+}
+
+define <16 x i8> @eval13(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval13:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 11
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src2, splat(i8 -1)
+ %and19 = or <16 x i8> %not, %src3
+ %or9 = and <16 x i8> %and19, %src1
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval14(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval14:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 14
+; CHECK-NEXT: br %r14
+entry:
+ %and20.demorgan = and <16 x i8> %src3, %src2
+ %and20 = xor <16 x i8> %and20.demorgan, splat(i8 -1)
+ %or10 = and <16 x i8> %and20, %src1
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval15(<16 x i8> returned %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval15:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: br %r14
+entry:
+ ret <16 x i8> %src1
+}
+
+define <16 x i8> @eval16(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 2
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %and1 = and <16 x i8> %and, %src3
+ ret <16 x i8> %and1
+}
+
+define <16 x i8> @eval17(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval17:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %or4 = and <16 x i8> %src3, %src2
+ ret <16 x i8> %or4
+}
+
+define <16 x i8> @eval18(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval18:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 6
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src1
+ %or5 = and <16 x i8> %0, %src2
+ ret <16 x i8> %or5
+}
+
+define <16 x i8> @eval19(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval19:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 7
+; CHECK-NEXT: br %r14
+entry:
+ %and118 = or <16 x i8> %src3, %src1
+ %or8 = and <16 x i8> %and118, %src2
+ ret <16 x i8> %or8
+}
+
+define <16 x i8> @eval20(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval20:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 6
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src1
+ %or5 = and <16 x i8> %0, %src3
+ ret <16 x i8> %or5
+}
+
+define <16 x i8> @eval21(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval21:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 7
+; CHECK-NEXT: br %r14
+entry:
+ %or518 = or <16 x i8> %src2, %src1
+ %or8 = and <16 x i8> %or518, %src3
+ ret <16 x i8> %or8
+}
+
+define <16 x i8> @eval22(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval22:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v26, %v24
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 31
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src1
+ %or5 = and <16 x i8> %0, %src3
+ %not7 = xor <16 x i8> %src3, splat(i8 -1)
+ %1 = and <16 x i8> %not7, %src1
+ %and8 = and <16 x i8> %1, %src2
+ %or9 = or <16 x i8> %and8, %or5
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval23(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval23:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v28, %v26, %v24, 6
+; CHECK-NEXT: veval %v24, %v0, %v26, %v24, 31
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src1
+ %or5 = and <16 x i8> %0, %src3
+ %and6 = and <16 x i8> %src2, %src1
+ %or12 = or <16 x i8> %or5, %and6
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval24(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval24:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 2
+; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %and1 = and <16 x i8> %and, %src3
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and5 = and <16 x i8> %1, %src1
+ %or6 = or <16 x i8> %and5, %and1
+ ret <16 x i8> %or6
+}
+
+define <16 x i8> @eval25(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval25:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v26
+; CHECK-NEXT: vn %v1, %v28, %v26
+; CHECK-NEXT: veval %v24, %v1, %v24, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and5 = and <16 x i8> %1, %src1
+ %2 = and <16 x i8> %src3, %src2
+ %or9 = or <16 x i8> %and5, %2
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval26(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval26:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v26, %v24
+; CHECK-NEXT: vsel %v24, %v0, %v24, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %and1 = and <16 x i8> %and, %src3
+ %not4 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not4, %src1
+ %or10 = or <16 x i8> %and1, %0
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval27(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval27:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v26, %v24
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 11
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %and1 = and <16 x i8> %and, %src3
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and527 = or <16 x i8> %src3.not, %src2
+ %0 = and <16 x i8> %and527, %src1
+ %or13 = or <16 x i8> %0, %and1
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval28(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval28:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v28, %v24
+; CHECK-NEXT: vsel %v24, %v0, %v24, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %0 = and <16 x i8> %not, %src3
+ %and1 = and <16 x i8> %0, %src2
+ %not2 = xor <16 x i8> %src2, splat(i8 -1)
+ %and3 = and <16 x i8> %not2, %src1
+ %or10 = or <16 x i8> %and1, %and3
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval29(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval29:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsel %v24, %v28, %v24, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src2, splat(i8 -1)
+ %and3 = and <16 x i8> %not2, %src1
+ %0 = and <16 x i8> %src3, %src2
+ %or13 = or <16 x i8> %0, %and3
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval30(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval30:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 2
+; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %0 = and <16 x i8> %not, %src2
+ %and1 = and <16 x i8> %0, %src3
+ %and328.demorgan = and <16 x i8> %src3, %src2
+ %and328 = xor <16 x i8> %and328.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %and328, %src1
+ %or14 = or <16 x i8> %1, %and1
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval31(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval31:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v26, %v24
+; CHECK-NEXT: veval %v24, %v24, %v0, %v28, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %and1 = and <16 x i8> %and, %src3
+ %or17 = or <16 x i8> %and1, %src1
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval32(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 8
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src2
+ ret <16 x i8> %and2
+}
+
+define <16 x i8> @eval33(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval33:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 9
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src1, %src3
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %or5 = and <16 x i8> %1, %src2
+ ret <16 x i8> %or5
+}
+
+define <16 x i8> @eval34(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval34:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src3, splat(i8 -1)
+ %or6 = and <16 x i8> %not1, %src2
+ ret <16 x i8> %or6
+}
+
+define <16 x i8> @eval35(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval35:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 11
+; CHECK-NEXT: br %r14
+entry:
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and219 = or <16 x i8> %src3.not, %src1
+ %or9 = and <16 x i8> %and219, %src2
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval36(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval36:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src2
+ %not3 = xor <16 x i8> %src2, splat(i8 -1)
+ %and4 = and <16 x i8> %not3, %src1
+ %and5 = and <16 x i8> %and4, %src3
+ %or6 = or <16 x i8> %and2, %and5
+ ret <16 x i8> %or6
+}
+
+define <16 x i8> @eval37(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval37:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: vn %v1, %v28, %v24
+; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src2
+ %2 = and <16 x i8> %src3, %src1
+ %or9 = or <16 x i8> %and2, %2
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval38(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval38:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v24, %v26
+; CHECK-NEXT: vsel %v24, %v0, %v26, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src3, splat(i8 -1)
+ %not3 = xor <16 x i8> %src2, splat(i8 -1)
+ %and4 = and <16 x i8> %not3, %src1
+ %and5 = and <16 x i8> %and4, %src3
+ %0 = and <16 x i8> %not1, %src2
+ %or10 = or <16 x i8> %0, %and5
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval39(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval39:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v24, %v26
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 11
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not3 = xor <16 x i8> %src2, splat(i8 -1)
+ %and4 = and <16 x i8> %not3, %src1
+ %and5 = and <16 x i8> %and4, %src3
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and227 = or <16 x i8> %src3.not, %src1
+ %0 = and <16 x i8> %and227, %src2
+ %or13 = or <16 x i8> %0, %and5
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval40(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval40:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 40
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src1
+ %1 = xor <16 x i8> %src3, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ ret <16 x i8> %or7
+}
+
+define <16 x i8> @eval41(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval41:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v26, %v24
+; CHECK-NEXT: vn %v1, %v26, %v24
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src1
+ %1 = xor <16 x i8> %src3, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %and8 = and <16 x i8> %src2, %src1
+ %and9 = and <16 x i8> %and8, %src3
+ %or10 = or <16 x i8> %or7, %and9
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval42(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval42:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 42
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src3, splat(i8 -1)
+ %and1021 = or <16 x i8> %src2, %src1
+ %or11 = and <16 x i8> %and1021, %not1
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval43(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval43:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v26, %v24
+; CHECK-NEXT: vn %v1, %v26, %v24
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src1
+ %1 = xor <16 x i8> %src3, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %and8 = and <16 x i8> %src2, %src1
+ %or14 = or <16 x i8> %or7, %and8
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval44(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval44:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v28, %v24
+; CHECK-NEXT: vsel %v24, %v0, %v24, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src2
+ %not3 = xor <16 x i8> %src2, splat(i8 -1)
+ %and4 = and <16 x i8> %not3, %src1
+ %or11 = or <16 x i8> %and2, %and4
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval45(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval45:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47
+; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src2
+ %not3 = xor <16 x i8> %src2, splat(i8 -1)
+ %and4 = and <16 x i8> %not3, %src1
+ %and12 = and <16 x i8> %src2, %src1
+ %and13 = and <16 x i8> %and12, %src3
+ %or11 = or <16 x i8> %and13, %and4
+ %or14 = or <16 x i8> %or11, %and2
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval46(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval46:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 8
+; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src2
+ %and429.demorgan = and <16 x i8> %src3, %src2
+ %and429 = xor <16 x i8> %and429.demorgan, splat(i8 -1)
+ %2 = and <16 x i8> %and429, %src1
+ %or15 = or <16 x i8> %and2, %2
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval47(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval47:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: veval %v24, %v24, %v26, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src2
+ %or18 = or <16 x i8> %and2, %src1
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval48(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval48:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v24, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ ret <16 x i8> %and
+}
+
+define <16 x i8> @eval49(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval49:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 11
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and819 = or <16 x i8> %not, %src3
+ %or9 = and <16 x i8> %and819, %src2
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval50(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval50:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 14
+; CHECK-NEXT: br %r14
+entry:
+ %and920.demorgan = and <16 x i8> %src3, %src1
+ %and920 = xor <16 x i8> %and920.demorgan, splat(i8 -1)
+ %or10 = and <16 x i8> %and920, %src2
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval51(<16 x i8> %src1, <16 x i8> returned %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval51:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vlr %v24, %v26
+; CHECK-NEXT: br %r14
+entry:
+ ret <16 x i8> %src2
+}
+
+define <16 x i8> @eval52(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval52:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v0, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %not7 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = and <16 x i8> %not7, %src3
+ %and9 = and <16 x i8> %0, %src1
+ %or10 = or <16 x i8> %and9, %and
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval53(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval53:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsel %v24, %v28, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %0 = and <16 x i8> %src3, %src1
+ %or13 = or <16 x i8> %0, %and
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval54(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval54:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v1, %v28, %v24
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not7 = xor <16 x i8> %src2, splat(i8 -1)
+ %and8 = and <16 x i8> %not7, %src1
+ %and9 = and <16 x i8> %and8, %src3
+ %and28.demorgan = and <16 x i8> %src3, %src1
+ %and28 = xor <16 x i8> %and28.demorgan, splat(i8 -1)
+ %0 = and <16 x i8> %and28, %src2
+ %or14 = or <16 x i8> %and9, %0
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval55(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval55:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v24, %v26
+; CHECK-NEXT: veval %v24, %v26, %v0, %v28, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not7 = xor <16 x i8> %src2, splat(i8 -1)
+ %and8 = and <16 x i8> %not7, %src1
+ %and9 = and <16 x i8> %and8, %src3
+ %or17 = or <16 x i8> %and9, %src2
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval56(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval56:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v0, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and10 = and <16 x i8> %1, %src1
+ %or11 = or <16 x i8> %and10, %and
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval57(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval57:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 47
+; CHECK-NEXT: veval %v24, %v1, %v24, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and10 = and <16 x i8> %1, %src1
+ %and12 = and <16 x i8> %src2, %src1
+ %and13 = and <16 x i8> %and12, %src3
+ %or11 = or <16 x i8> %and13, %and
+ %or14 = or <16 x i8> %or11, %and10
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval58(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval58:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v28, %v28
+; CHECK-NEXT: vsel %v24, %v0, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %not1 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not1, %src1
+ %or15 = or <16 x i8> %0, %and
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval59(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval59:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v0, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and = and <16 x i8> %not, %src2
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and1036 = or <16 x i8> %src3.not, %src2
+ %0 = and <16 x i8> %and1036, %src1
+ %or18 = or <16 x i8> %0, %and
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval60(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval60:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v24, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %or15 = xor <16 x i8> %src2, %src1
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval61(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval61:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v24, %v28
+; CHECK-NEXT: veval %v24, %v26, %v24, %v0, 61
+; CHECK-NEXT: br %r14
+entry:
+ %or15 = xor <16 x i8> %src2, %src1
+ %0 = and <16 x i8> %src1, %src3
+ %and17 = and <16 x i8> %0, %src2
+ %or18 = or <16 x i8> %and17, %or15
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval62(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval62:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v24, %v28
+; CHECK-NEXT: veval %v24, %v26, %v24, %v0, 61
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src3, splat(i8 -1)
+ %or15 = xor <16 x i8> %src2, %src1
+ %0 = and <16 x i8> %not1, %src1
+ %and18 = and <16 x i8> %0, %src2
+ %or19 = or <16 x i8> %and18, %or15
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval63(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval63:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v24, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %or22 = or <16 x i8> %src2, %src1
+ ret <16 x i8> %or22
+}
+
+define <16 x i8> @eval64(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 8
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %and2 = and <16 x i8> %and, %src3
+ ret <16 x i8> %and2
+}
+
+define <16 x i8> @eval65(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval65:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 9
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src1, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %or5 = and <16 x i8> %1, %src3
+ ret <16 x i8> %or5
+}
+
+define <16 x i8> @eval66(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval66:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v1, %v28, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %and2 = and <16 x i8> %and, %src3
+ %not4 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not4, %src1
+ %and5 = and <16 x i8> %0, %src2
+ %or6 = or <16 x i8> %and5, %and2
+ ret <16 x i8> %or6
+}
+
+define <16 x i8> @eval67(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval67:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: vn %v1, %v26, %v24
+; CHECK-NEXT: veval %v24, %v1, %v28, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %and2 = and <16 x i8> %and, %src3
+ %and3 = and <16 x i8> %src2, %src1
+ %or9 = or <16 x i8> %and2, %and3
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval68(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval68:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or6 = and <16 x i8> %not1, %src3
+ ret <16 x i8> %or6
+}
+
+define <16 x i8> @eval69(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval69:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 11
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or620 = or <16 x i8> %not1, %src1
+ %or9 = and <16 x i8> %or620, %src3
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval70(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval70:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v24, %v28
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or6 = and <16 x i8> %not1, %src3
+ %not8 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not8, %src1
+ %and9 = and <16 x i8> %0, %src2
+ %or10 = or <16 x i8> %and9, %or6
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval71(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval71:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsel %v24, %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or6 = and <16 x i8> %not1, %src3
+ %and7 = and <16 x i8> %src2, %src1
+ %or13 = or <16 x i8> %or6, %and7
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval72(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval72:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 40
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ ret <16 x i8> %or7
+}
+
+define <16 x i8> @eval73(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval73:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v24
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v24, %v1, %v0, %v26, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %and8 = and <16 x i8> %src2, %src1
+ %and9 = and <16 x i8> %and8, %src3
+ %or10 = or <16 x i8> %or7, %and9
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval74(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval74:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v24
+; CHECK-NEXT: vnc %v1, %v24, %v28
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not5 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %2 = and <16 x i8> %not5, %src1
+ %and10 = and <16 x i8> %2, %src2
+ %or11 = or <16 x i8> %or7, %and10
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval75(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval75:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v24
+; CHECK-NEXT: vsel %v24, %v24, %v0, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %and8 = and <16 x i8> %src2, %src1
+ %or14 = or <16 x i8> %or7, %and8
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval76(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval76:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v28, %v26, %v24, 8
+; CHECK-NEXT: veval %v24, %v0, %v24, %v26, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = or <16 x i8> %src2, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src3
+ %and4 = and <16 x i8> %not1, %src1
+ %or11 = or <16 x i8> %and2, %and4
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval77(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval77:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: vn %v1, %v26, %v24
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT: veval %v0, %v28, %v0, %v1, 7
+; CHECK-NEXT: veval %v24, %v0, %v24, %v26, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and4 = and <16 x i8> %not1, %src1
+ %and12 = and <16 x i8> %src2, %src1
+ %and228 = or <16 x i8> %and, %and12
+ %0 = and <16 x i8> %and228, %src3
+ %or14 = or <16 x i8> %0, %and4
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval78(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval78:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: vno %v1, %v28, %v28
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT: veval %v1, %v24, %v1, %v26, 11
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and2 = and <16 x i8> %and, %src3
+ %not5 = xor <16 x i8> %src3, splat(i8 -1)
+ %and429 = or <16 x i8> %not5, %not1
+ %0 = and <16 x i8> %and429, %src1
+ %or15 = or <16 x i8> %0, %and2
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval79(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval79:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: veval %v24, %v24, %v28, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %and2 = and <16 x i8> %and, %src3
+ %or18 = or <16 x i8> %and2, %src1
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval80(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval80:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v24, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or6 = and <16 x i8> %not, %src3
+ ret <16 x i8> %or6
+}
+
+define <16 x i8> @eval81(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval81:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 11
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or620 = or <16 x i8> %not, %src2
+ %or9 = and <16 x i8> %or620, %src3
+ ret <16 x i8> %or9
+}
+
+define <16 x i8> @eval82(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval82:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or6 = and <16 x i8> %not, %src3
+ %not8 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not8, %src2
+ %and9 = and <16 x i8> %0, %src1
+ %or10 = or <16 x i8> %and9, %or6
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval83(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval83:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsel %v24, %v26, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or6 = and <16 x i8> %not, %src3
+ %and7 = and <16 x i8> %src2, %src1
+ %or13 = or <16 x i8> %or6, %and7
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval84(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval84:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 14
+; CHECK-NEXT: br %r14
+entry:
+ %or621.demorgan = and <16 x i8> %src2, %src1
+ %or621 = xor <16 x i8> %or621.demorgan, splat(i8 -1)
+ %or10 = and <16 x i8> %or621, %src3
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval85(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> returned %src3) {
+; CHECK-LABEL: eval85:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vlr %v24, %v28
+; CHECK-NEXT: br %r14
+entry:
+ ret <16 x i8> %src3
+}
+
+define <16 x i8> @eval86(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval86:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v26, %v24
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v1, %v28, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %or629.demorgan = and <16 x i8> %src2, %src1
+ %or629 = xor <16 x i8> %or629.demorgan, splat(i8 -1)
+ %or10 = and <16 x i8> %or629, %src3
+ %not12 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not12, %src1
+ %and13 = and <16 x i8> %0, %src2
+ %or14 = or <16 x i8> %and13, %or10
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval87(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval87:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 31
+; CHECK-NEXT: br %r14
+entry:
+ %and11 = and <16 x i8> %src2, %src1
+ %or17 = or <16 x i8> %and11, %src3
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval88(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval88:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or6 = and <16 x i8> %not, %src3
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and10 = and <16 x i8> %1, %src1
+ %or11 = or <16 x i8> %and10, %or6
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval89(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval89:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v28, %v26, %v24, 11
+; CHECK-NEXT: veval %v24, %v1, %v24, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and10 = and <16 x i8> %1, %src1
+ %or629 = or <16 x i8> %not, %src2
+ %2 = and <16 x i8> %or629, %src3
+ %or14 = or <16 x i8> %and10, %2
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval90(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval90:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v24, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %or15 = xor <16 x i8> %src3, %src1
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval91(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval91:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or6 = and <16 x i8> %not, %src3
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and1037 = or <16 x i8> %src3.not, %src2
+ %0 = and <16 x i8> %and1037, %src1
+ %or18 = or <16 x i8> %0, %or6
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval92(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval92:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or6 = and <16 x i8> %not, %src3
+ %and8 = and <16 x i8> %not1, %src1
+ %or15 = or <16 x i8> %or6, %and8
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval93(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval93:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and8 = and <16 x i8> %not1, %src1
+ %or18 = or <16 x i8> %and8, %src3
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval94(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval94:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnn %v0, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or6 = and <16 x i8> %not, %src3
+ %and838.demorgan = and <16 x i8> %src3, %src2
+ %and838 = xor <16 x i8> %and838.demorgan, splat(i8 -1)
+ %0 = and <16 x i8> %and838, %src1
+ %or19 = or <16 x i8> %0, %or6
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval95(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval95:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v24, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %or22 = or <16 x i8> %src3, %src1
+ ret <16 x i8> %or22
+}
+
+define <16 x i8> @eval96(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval96:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 40
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %src1, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ ret <16 x i8> %or7
+}
+
+define <16 x i8> @eval97(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval97:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %src1, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %and8 = and <16 x i8> %src2, %src1
+ %and9 = and <16 x i8> %and8, %src3
+ %or10 = or <16 x i8> %or7, %and9
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval98(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval98:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not5 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %src1, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %2 = and <16 x i8> %not5, %src1
+ %and10 = and <16 x i8> %2, %src2
+ %or11 = or <16 x i8> %or7, %and10
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval99(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval99:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v26, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %src1, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %and8 = and <16 x i8> %src2, %src1
+ %or14 = or <16 x i8> %or7, %and8
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval100(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval100:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v28, %v24
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and6 = and <16 x i8> %1, %src2
+ %2 = and <16 x i8> %not1, %src3
+ %or11 = or <16 x i8> %and6, %2
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval101(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval101:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: veval %v1, %v28, %v24, %v26, 11
+; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and6 = and <16 x i8> %1, %src2
+ %2 = or <16 x i8> %not1, %src1
+ %3 = and <16 x i8> %2, %src3
+ %or14 = or <16 x i8> %and6, %3
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval102(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval102:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %or15 = xor <16 x i8> %src2, %src3
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval103(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval103:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v24, %v28
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = and <16 x i8> %not1, %src3
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and637 = or <16 x i8> %src3.not, %src1
+ %1 = and <16 x i8> %and637, %src2
+ %or18 = or <16 x i8> %1, %0
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval104(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval104:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: vno %v1, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %src1, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %2 = or <16 x i8> %src3, %src2
+ %3 = xor <16 x i8> %2, splat(i8 -1)
+ %and11 = and <16 x i8> %3, %src1
+ %or12 = or <16 x i8> %or7, %and11
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval105(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval105:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 105
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or15 = xor <16 x i8> %0, %src1
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval106(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval106:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v28, %v28
+; CHECK-NEXT: vx %v1, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v0, %v1, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not5 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %src1, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %2 = and <16 x i8> %not5, %src1
+ %or16 = or <16 x i8> %or7, %2
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval107(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval107:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: voc %v1, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %src1, splat(i8 -1)
+ %or7 = and <16 x i8> %0, %1
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and1137 = or <16 x i8> %src3.not, %src2
+ %2 = and <16 x i8> %and1137, %src1
+ %or19 = or <16 x i8> %or7, %2
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval108(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval108:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2
+; CHECK-NEXT: vo %v1, %v28, %v24
+; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47
+; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and2 = and <16 x i8> %and, %src3
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and6 = and <16 x i8> %1, %src2
+ %and9 = and <16 x i8> %not1, %src1
+ %or7 = or <16 x i8> %and2, %and9
+ %or16 = or <16 x i8> %or7, %and6
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval109(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval109:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: vn %v2, %v26, %v24
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT: vo %v1, %v28, %v24
+; CHECK-NEXT: veval %v0, %v28, %v0, %v2, 7
+; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47
+; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and6 = and <16 x i8> %1, %src2
+ %and9 = and <16 x i8> %not1, %src1
+ %and17 = and <16 x i8> %src2, %src1
+ %and237 = or <16 x i8> %and, %and17
+ %2 = and <16 x i8> %and237, %src3
+ %or16 = or <16 x i8> %2, %and9
+ %or19 = or <16 x i8> %or16, %and6
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval110(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval110:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: vgbm %v1, 65535
+; CHECK-NEXT: vo %v2, %v28, %v24
+; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2
+; CHECK-NEXT: veval %v0, %v0, %v26, %v2, 47
+; CHECK-NEXT: veval %v1, %v26, %v1, %v28, 190
+; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and2 = and <16 x i8> %and, %src3
+ %not5 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and6 = and <16 x i8> %1, %src2
+ %or7 = or <16 x i8> %and6, %and2
+ %and938 = or <16 x i8> %not5, %not1
+ %2 = and <16 x i8> %and938, %src1
+ %or20 = or <16 x i8> %or7, %2
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval111(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval111:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 111
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or23 = or <16 x i8> %0, %src1
+ ret <16 x i8> %or23
+}
+
+define <16 x i8> @eval112(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval112:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v28, %v26, %v24, 8
+; CHECK-NEXT: veval %v24, %v0, %v26, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %0 = or <16 x i8> %src2, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src3
+ %and4 = and <16 x i8> %not, %src2
+ %or11 = or <16 x i8> %and2, %and4
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval113(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval113:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: vn %v1, %v26, %v24
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT: veval %v0, %v28, %v0, %v1, 7
+; CHECK-NEXT: veval %v24, %v0, %v26, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and4 = and <16 x i8> %not, %src2
+ %and12 = and <16 x i8> %src2, %src1
+ %and228 = or <16 x i8> %and, %and12
+ %0 = and <16 x i8> %and228, %src3
+ %or14 = or <16 x i8> %0, %and4
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval114(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval114:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: vgbm %v1, 65535
+; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2
+; CHECK-NEXT: veval %v1, %v24, %v1, %v28, 190
+; CHECK-NEXT: veval %v24, %v0, %v1, %v26, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and2 = and <16 x i8> %and, %src3
+ %not5 = xor <16 x i8> %src3, splat(i8 -1)
+ %and429 = or <16 x i8> %not5, %not
+ %0 = and <16 x i8> %and429, %src2
+ %or15 = or <16 x i8> %and2, %0
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval115(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval115:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: veval %v24, %v26, %v28, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %and2 = and <16 x i8> %and, %src3
+ %or18 = or <16 x i8> %and2, %src2
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval116(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval116:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and4 = and <16 x i8> %not, %src2
+ %0 = and <16 x i8> %not1, %src3
+ %or15 = or <16 x i8> %0, %and4
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval117(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval117:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %and4 = and <16 x i8> %not, %src2
+ %or18 = or <16 x i8> %and4, %src3
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval118(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval118:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnn %v0, %v28, %v24
+; CHECK-NEXT: vsel %v24, %v0, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = and <16 x i8> %not1, %src3
+ %and438.demorgan = and <16 x i8> %src3, %src1
+ %and438 = xor <16 x i8> %and438.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %and438, %src2
+ %or19 = or <16 x i8> %0, %1
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval119(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval119:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %or22 = or <16 x i8> %src3, %src2
+ ret <16 x i8> %or22
+}
+
+define <16 x i8> @eval120(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval120:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2
+; CHECK-NEXT: vo %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47
+; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and2 = and <16 x i8> %and, %src3
+ %and4 = and <16 x i8> %not, %src2
+ %or11 = or <16 x i8> %and2, %and4
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and15 = and <16 x i8> %1, %src1
+ %or16 = or <16 x i8> %or11, %and15
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval121(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval121:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vgbm %v0, 65535
+; CHECK-NEXT: vn %v2, %v26, %v24
+; CHECK-NEXT: veval %v0, %v24, %v0, %v26, 40
+; CHECK-NEXT: vo %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v28, %v0, %v2, 7
+; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47
+; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and4 = and <16 x i8> %not, %src2
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and15 = and <16 x i8> %1, %src1
+ %and17 = and <16 x i8> %src2, %src1
+ %and237 = or <16 x i8> %and, %and17
+ %2 = and <16 x i8> %and237, %src3
+ %or16 = or <16 x i8> %2, %and4
+ %or19 = or <16 x i8> %or16, %and15
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval122(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval122:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2
+; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47
+; CHECK-NEXT: veval %v24, %v0, %v24, %v28, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and2 = and <16 x i8> %and, %src3
+ %and4 = and <16 x i8> %not, %src2
+ %not5 = xor <16 x i8> %src3, splat(i8 -1)
+ %or11 = or <16 x i8> %and2, %and4
+ %0 = and <16 x i8> %not5, %src1
+ %or20 = or <16 x i8> %or11, %0
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval123(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval123:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2
+; CHECK-NEXT: voc %v1, %v26, %v28
+; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47
+; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and2 = and <16 x i8> %and, %src3
+ %and4 = and <16 x i8> %not, %src2
+ %or11 = or <16 x i8> %and2, %and4
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and1545 = or <16 x i8> %src3.not, %src2
+ %0 = and <16 x i8> %and1545, %src1
+ %or23 = or <16 x i8> %or11, %0
+ ret <16 x i8> %or23
+}
+
+define <16 x i8> @eval124(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval124:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: vx %v1, %v24, %v26
+; CHECK-NEXT: veval %v24, %v1, %v28, %v0, 47
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src2, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src3
+ %or11 = xor <16 x i8> %src1, %src2
+ %or20 = or <16 x i8> %or11, %and2
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval125(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval125:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 111
+; CHECK-NEXT: br %r14
+entry:
+ %or11 = xor <16 x i8> %src1, %src2
+ %or23 = or <16 x i8> %or11, %src3
+ ret <16 x i8> %or23
+}
+
+define <16 x i8> @eval126(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval126:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v24, %v24
+; CHECK-NEXT: vgbm %v1, 65535
+; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 2
+; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 47
+; CHECK-NEXT: veval %v1, %v26, %v1, %v28, 190
+; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and2 = and <16 x i8> %and, %src3
+ %and4 = and <16 x i8> %not, %src2
+ %not5 = xor <16 x i8> %src3, splat(i8 -1)
+ %or11 = or <16 x i8> %and2, %and4
+ %and1346 = or <16 x i8> %not5, %not1
+ %0 = and <16 x i8> %and1346, %src1
+ %or24 = or <16 x i8> %or11, %0
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval127(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval127:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v28, %v26, %v24, 8
+; CHECK-NEXT: veval %v24, %v0, %v24, %v26, 127
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src2, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and2 = and <16 x i8> %1, %src3
+ %2 = or <16 x i8> %and2, %src1
+ %or27 = or <16 x i8> %2, %src2
+ ret <16 x i8> %or27
+}
+
+define <16 x i8> @eval128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval128:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 128
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and3.demorgan = or <16 x i8> %and.demorgan, %src3
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ ret <16 x i8> %and3
+}
+
+define <16 x i8> @eval129(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval129:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: vn %v1, %v26, %v24
+; CHECK-NEXT: veval %v24, %v1, %v28, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and3.demorgan = or <16 x i8> %and.demorgan, %src3
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ %and4 = and <16 x i8> %src2, %src1
+ %and5 = and <16 x i8> %and4, %src3
+ %or6 = or <16 x i8> %and5, %and3
+ ret <16 x i8> %or6
+}
+
+define <16 x i8> @eval130(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval130:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 130
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src1
+ %or7.demorgan = or <16 x i8> %0, %src3
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ ret <16 x i8> %or7
+}
+
+define <16 x i8> @eval131(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval131:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v26, %v24
+; CHECK-NEXT: vx %v1, %v26, %v24
+; CHECK-NEXT: veval %v24, %v0, %v28, %v1, 139
+; CHECK-NEXT: br %r14
+entry:
+ %and4 = and <16 x i8> %src2, %src1
+ %0 = xor <16 x i8> %src2, %src1
+ %or7.demorgan = or <16 x i8> %0, %src3
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and9 = and <16 x i8> %and4, %src3
+ %or10 = or <16 x i8> %and9, %or7
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval132(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval132:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 130
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src1
+ %1 = or <16 x i8> %0, %src2
+ %or7 = xor <16 x i8> %1, splat(i8 -1)
+ ret <16 x i8> %or7
+}
+
+define <16 x i8> @eval133(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval133:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v24
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v24, %v1, %v0, %v26, 143
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src1
+ %1 = or <16 x i8> %0, %src2
+ %or7 = xor <16 x i8> %1, splat(i8 -1)
+ %and8 = and <16 x i8> %src2, %src1
+ %and9 = and <16 x i8> %and8, %src3
+ %or10 = or <16 x i8> %and9, %or7
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval134(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval134:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v24
+; CHECK-NEXT: vnc %v1, %v24, %v28
+; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src1
+ %1 = or <16 x i8> %0, %src2
+ %or7 = xor <16 x i8> %1, splat(i8 -1)
+ %2 = and <16 x i8> %not2, %src1
+ %and10 = and <16 x i8> %2, %src2
+ %or11 = or <16 x i8> %and10, %or7
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval135(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval135:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v24
+; CHECK-NEXT: veval %v24, %v24, %v26, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src1
+ %1 = or <16 x i8> %0, %src2
+ %or7 = xor <16 x i8> %1, splat(i8 -1)
+ %and8 = and <16 x i8> %src2, %src1
+ %or14 = or <16 x i8> %and8, %or7
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval136(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval136:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @eval137(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval137:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v26, %v28, 137
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and9 = and <16 x i8> %src2, %src1
+ %and10 = and <16 x i8> %and9, %src3
+ %or11 = or <16 x i8> %and10, %1
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval138(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval138:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 127
+; CHECK-NEXT: veval %v24, %v24, %v28, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = or <16 x i8> %src2, %src1
+ %and3.demorgan = or <16 x i8> %0, %src3
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %not2, %src1
+ %or12 = or <16 x i8> %1, %and3
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval139(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval139:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 11
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src2, %src1
+ %and3.demorgan = or <16 x i8> %0, %src3
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and729 = or <16 x i8> %src3.not, %src2
+ %1 = and <16 x i8> %and729, %src1
+ %or15 = or <16 x i8> %1, %and3
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval140(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval140:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 127
+; CHECK-NEXT: veval %v24, %v24, %v26, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = or <16 x i8> %src1, %src3
+ %and3.demorgan = or <16 x i8> %0, %src2
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ %and5 = and <16 x i8> %not1, %src1
+ %or12 = or <16 x i8> %and5, %and3
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval141(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval141:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = or <16 x i8> %src2, %src1
+ %and3.demorgan = or <16 x i8> %0, %src3
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ %and5 = and <16 x i8> %not1, %src1
+ %and13 = and <16 x i8> %src2, %src1
+ %and14 = and <16 x i8> %and13, %src3
+ %or12 = or <16 x i8> %and14, %and5
+ %or15 = or <16 x i8> %or12, %and3
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval142(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval142:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 127
+; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src2, %src1
+ %and3.demorgan = or <16 x i8> %0, %src3
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ %and530.demorgan = and <16 x i8> %src3, %src2
+ %and530 = xor <16 x i8> %and530.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %and530, %src1
+ %or16 = or <16 x i8> %1, %and3
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval143(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval143:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 143
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src2
+ %.not = xor <16 x i8> %0, splat(i8 -1)
+ %or19 = or <16 x i8> %.not, %src1
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval144(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval144:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 130
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ ret <16 x i8> %or7
+}
+
+define <16 x i8> @eval145(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval145:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and8 = and <16 x i8> %src2, %src1
+ %and9 = and <16 x i8> %and8, %src3
+ %or10 = or <16 x i8> %and9, %or7
+ ret <16 x i8> %or10
+}
+
+define <16 x i8> @eval146(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval146:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %not2, %src1
+ %and10 = and <16 x i8> %1, %src2
+ %or11 = or <16 x i8> %and10, %or7
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval147(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval147:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v24, %v26, %v24, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and8 = and <16 x i8> %src2, %src1
+ %or14 = or <16 x i8> %and8, %or7
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval148(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval148:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and9 = and <16 x i8> %not1, %src1
+ %and10 = and <16 x i8> %and9, %src3
+ %or11 = or <16 x i8> %and10, %or7
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval149(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval149:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v24, %v28, %v24, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %src3, %src1
+ %or14 = or <16 x i8> %1, %or7
+ ret <16 x i8> %or14
+}
+
+define <16 x i8> @eval150(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval150:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v2, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v1, %v2, %v0, 191
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and9 = and <16 x i8> %not1, %src1
+ %and10 = and <16 x i8> %and9, %src3
+ %or11 = or <16 x i8> %and10, %or7
+ %1 = and <16 x i8> %not2, %src1
+ %and14 = and <16 x i8> %1, %src2
+ %or15 = or <16 x i8> %or11, %and14
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval151(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval151:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 31
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and9 = and <16 x i8> %not1, %src1
+ %and10 = and <16 x i8> %and9, %src3
+ %and12 = and <16 x i8> %src2, %src1
+ %or11 = or <16 x i8> %and10, %and12
+ %or18 = or <16 x i8> %or11, %or7
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval152(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval152:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT: veval %v24, %v28, %v26, %v0, 234
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %1 = or <16 x i8> %src3, %src2
+ %2 = and <16 x i8> %or7.demorgan, %1
+ %or12 = xor <16 x i8> %2, splat(i8 -1)
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval153(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval153:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT: veval %v2, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v24, %v2, %v0, %v1, 239
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %1 = or <16 x i8> %src3, %src2
+ %2 = and <16 x i8> %or7.demorgan, %1
+ %or12 = xor <16 x i8> %2, splat(i8 -1)
+ %and13 = and <16 x i8> %src2, %src1
+ %and14 = and <16 x i8> %and13, %src3
+ %or15 = or <16 x i8> %and14, %or12
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval154(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval154:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT: veval %v2, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v2, %v0, %v1, 239
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %1 = or <16 x i8> %src3, %src2
+ %2 = and <16 x i8> %or7.demorgan, %1
+ %or12 = xor <16 x i8> %2, splat(i8 -1)
+ %3 = and <16 x i8> %not2, %src1
+ %and15 = and <16 x i8> %3, %src2
+ %or16 = or <16 x i8> %and15, %or12
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval155(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval155:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v1, %v28, %v26
+; CHECK-NEXT: vn %v2, %v26, %v24
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT: veval %v24, %v2, %v0, %v1, 239
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %1 = or <16 x i8> %src3, %src2
+ %2 = and <16 x i8> %or7.demorgan, %1
+ %or12 = xor <16 x i8> %2, splat(i8 -1)
+ %and13 = and <16 x i8> %src2, %src1
+ %or19 = or <16 x i8> %and13, %or12
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval156(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval156:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT: veval %v24, %v24, %v26, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and9 = and <16 x i8> %not1, %src1
+ %or16 = or <16 x i8> %and9, %or7
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval157(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval157:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and9 = and <16 x i8> %not1, %src1
+ %and17 = and <16 x i8> %src2, %src1
+ %and18 = and <16 x i8> %and17, %src3
+ %or16 = or <16 x i8> %and18, %and9
+ %or19 = or <16 x i8> %or16, %or7
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval158(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval158:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 111
+; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src2
+ %or7.demorgan = or <16 x i8> %0, %src1
+ %or7 = xor <16 x i8> %or7.demorgan, splat(i8 -1)
+ %and938.demorgan = and <16 x i8> %src3, %src2
+ %and938 = xor <16 x i8> %and938.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %and938, %src1
+ %or20 = or <16 x i8> %1, %or7
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval159(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval159:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v26, %v28, 159
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src3
+ %.not = xor <16 x i8> %0, splat(i8 -1)
+ %or23 = or <16 x i8> %.not, %src1
+ ret <16 x i8> %or23
+}
+
+define <16 x i8> @eval160(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval160:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v24, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ ret <16 x i8> %1
+}
+
+define <16 x i8> @eval161(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval161:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 137
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and9 = and <16 x i8> %src2, %src1
+ %and10 = and <16 x i8> %and9, %src3
+ %or11 = or <16 x i8> %and10, %1
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval162(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval162:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v28, %v24, %v26, 127
+; CHECK-NEXT: veval %v24, %v26, %v28, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %and3.demorgan = or <16 x i8> %0, %src2
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %not2, %src2
+ %or12 = or <16 x i8> %1, %and3
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval163(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval163:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 11
+; CHECK-NEXT: veval %v24, %v1, %v0, %v28, 143
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src2, %src1
+ %and3.demorgan = or <16 x i8> %0, %src3
+ %and3 = xor <16 x i8> %and3.demorgan, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and729 = or <16 x i8> %src3.not, %src1
+ %1 = and <16 x i8> %and729, %src2
+ %or15 = or <16 x i8> %1, %and3
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval164(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval164:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: vx %v1, %v28, %v24
+; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 234
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %src3, %src1
+ %2 = or <16 x i8> %1, %src2
+ %3 = and <16 x i8> %0, %2
+ %or12 = xor <16 x i8> %3, splat(i8 -1)
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval165(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval165:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnx %v24, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src3, %src1
+ %or15 = xor <16 x i8> %0, splat(i8 -1)
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval166(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval166:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v2, %v26, %v28, %v24, 111
+; CHECK-NEXT: veval %v24, %v1, %v0, %v2, 239
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %1 = and <16 x i8> %not2, %src1
+ %and15 = and <16 x i8> %1, %src2
+ %2 = xor <16 x i8> %src3, %src1
+ %3 = or <16 x i8> %2, %src2
+ %4 = and <16 x i8> %0, %3
+ %or12 = xor <16 x i8> %4, splat(i8 -1)
+ %or16 = or <16 x i8> %and15, %or12
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval167(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval167:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v0, %v0, %v26, %v24, 31
+; CHECK-NEXT: veval %v24, %v0, %v28, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and10 = and <16 x i8> %not1, %src1
+ %and11 = and <16 x i8> %and10, %src3
+ %and13 = and <16 x i8> %src2, %src1
+ %or8 = or <16 x i8> %and11, %and13
+ %or19 = or <16 x i8> %or8, %1
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval168(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval168:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 168
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src2, %src1
+ %1 = or <16 x i8> %0, %src3
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval169(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval169:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 169
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src2, %src1
+ %1 = xor <16 x i8> %0, %src3
+ %or16 = xor <16 x i8> %1, splat(i8 -1)
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval170(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval170:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v24, %v28, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ ret <16 x i8> %not2
+}
+
+define <16 x i8> @eval171(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval171:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: veval %v24, %v0, %v24, %v28, 139
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and1238 = or <16 x i8> %src3.not, %src2
+ %2 = and <16 x i8> %and1238, %src1
+ %or20 = or <16 x i8> %2, %1
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval172(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval172:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: veval %v24, %v24, %v26, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and10 = and <16 x i8> %not1, %src1
+ %or17 = or <16 x i8> %and10, %1
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval173(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval173:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v0, %v0, %v24, %v26, 47
+; CHECK-NEXT: veval %v24, %v0, %v28, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and10 = and <16 x i8> %not1, %src1
+ %and18 = and <16 x i8> %src2, %src1
+ %and19 = and <16 x i8> %and18, %src3
+ %or8 = or <16 x i8> %and19, %and10
+ %or20 = or <16 x i8> %or8, %1
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval174(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval174:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: vn %v1, %v28, %v26
+; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and1039.demorgan = and <16 x i8> %src3, %src2
+ %and1039 = xor <16 x i8> %and1039.demorgan, splat(i8 -1)
+ %2 = and <16 x i8> %and1039, %src1
+ %or21 = or <16 x i8> %2, %1
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval175(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval175:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v24, %v24, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %or24 = or <16 x i8> %src3.not, %src1
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval176(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval176:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 138
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and522 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and522, %not
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval177(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval177:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and529 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and529, %not
+ %and13 = and <16 x i8> %src2, %src1
+ %and14 = and <16 x i8> %and13, %src3
+ %or15 = or <16 x i8> %or12, %and14
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval178(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval178:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v1, %v26, %v24
+; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 138
+; CHECK-NEXT: veval %v24, %v0, %v1, %v28, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and530 = or <16 x i8> %not2, %src2
+ %or12 = and <16 x i8> %and530, %not
+ %0 = and <16 x i8> %src2, %src1
+ %and15 = and <16 x i8> %0, %not2
+ %or16 = or <16 x i8> %or12, %and15
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval179(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval179:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v26, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and537 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and537, %not
+ %and13 = and <16 x i8> %src2, %src1
+ %or19 = or <16 x i8> %or12, %and13
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval180(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval180:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and530 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and530, %not
+ %and14 = and <16 x i8> %not1, %src1
+ %and15 = and <16 x i8> %and14, %src3
+ %or16 = or <16 x i8> %or12, %and15
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval181(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval181:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v28, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and537 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and537, %not
+ %0 = and <16 x i8> %src3, %src1
+ %or19 = or <16 x i8> %or12, %0
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval182(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval182:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v0, %v1, %v0, %v24, 47
+; CHECK-NEXT: vn %v1, %v26, %v24
+; CHECK-NEXT: veval %v24, %v0, %v1, %v28, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and538 = or <16 x i8> %not2, %src2
+ %or12 = and <16 x i8> %and538, %not
+ %and14 = and <16 x i8> %not1, %src1
+ %and15 = and <16 x i8> %and14, %src3
+ %or16 = or <16 x i8> %or12, %and15
+ %0 = and <16 x i8> %src2, %src1
+ %and19 = and <16 x i8> %0, %not2
+ %or20 = or <16 x i8> %or16, %and19
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval183(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval183:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v1, %v1, %v26, %v24, 31
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and545 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and545, %not
+ %and14 = and <16 x i8> %not1, %src1
+ %and15 = and <16 x i8> %and14, %src3
+ %and17 = and <16 x i8> %src2, %src1
+ %or16 = or <16 x i8> %and15, %and17
+ %or23 = or <16 x i8> %or16, %or12
+ ret <16 x i8> %or23
+}
+
+define <16 x i8> @eval184(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval184:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: vno %v1, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and531 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and531, %not
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and16 = and <16 x i8> %1, %src1
+ %or17 = or <16 x i8> %or12, %and16
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval185(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval185:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: vnx %v1, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and538 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and538, %not
+ %0 = xor <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %or17 = and <16 x i8> %1, %src1
+ %or20 = or <16 x i8> %or17, %or12
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval186(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval186:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v28, %v28
+; CHECK-NEXT: voc %v1, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v0, %v1, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and539 = or <16 x i8> %not2, %src2
+ %or12 = and <16 x i8> %and539, %not
+ %0 = and <16 x i8> %not2, %src1
+ %or21 = or <16 x i8> %or12, %0
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval187(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval187:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+entry:
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and546 = or <16 x i8> %src3.not, %src2
+ ret <16 x i8> %and546
+}
+
+define <16 x i8> @eval188(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval188:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: voc %v1, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v0, %v1, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and539 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and539, %not
+ %and14 = and <16 x i8> %not1, %src1
+ %or21 = or <16 x i8> %or12, %and14
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval189(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval189:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v1, %v1, %v24, %v26, 47
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and546 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and546, %not
+ %and14 = and <16 x i8> %not1, %src1
+ %and22 = and <16 x i8> %src2, %src1
+ %and23 = and <16 x i8> %and22, %src3
+ %or21 = or <16 x i8> %and23, %and14
+ %or24 = or <16 x i8> %or21, %or12
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval190(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval190:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v26, %v28
+; CHECK-NEXT: vnn %v1, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and547 = or <16 x i8> %src3.not, %src2
+ %or12 = and <16 x i8> %and547, %not
+ %and1448.demorgan = and <16 x i8> %src3, %src2
+ %and1448 = xor <16 x i8> %and1448.demorgan, splat(i8 -1)
+ %0 = and <16 x i8> %and1448, %src1
+ %or25 = or <16 x i8> %or12, %0
+ ret <16 x i8> %or25
+}
+
+define <16 x i8> @eval191(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval191:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 191
+; CHECK-NEXT: br %r14
+entry:
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and554 = or <16 x i8> %src2, %src1
+ %or28 = or <16 x i8> %and554, %src3.not
+ ret <16 x i8> %or28
+}
+
+define <16 x i8> @eval192(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval192:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v24, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ ret <16 x i8> %and
+}
+
+define <16 x i8> @eval193(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval193:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 137
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %0 = and <16 x i8> %src1, %src3
+ %and10 = and <16 x i8> %0, %src2
+ %or11 = or <16 x i8> %and10, %and
+ ret <16 x i8> %or11
+}
+
+define <16 x i8> @eval194(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval194:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v28, %v28
+; CHECK-NEXT: veval %v24, %v0, %v24, %v26, 137
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not2, %src1
+ %and11 = and <16 x i8> %0, %src2
+ %or12 = or <16 x i8> %and11, %and
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval195(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval195:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnx %v24, %v24, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src1, %src2
+ %or15 = xor <16 x i8> %0, splat(i8 -1)
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval196(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval196:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 138
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and22 = or <16 x i8> %not, %src3
+ %or12 = and <16 x i8> %and22, %not1
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval197(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval197:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 139
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %0 = and <16 x i8> %src3, %src1
+ %or15 = or <16 x i8> %0, %and
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval198(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval198:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v0, %v1, %v0, %v24, 47
+; CHECK-NEXT: vnc %v1, %v24, %v28
+; CHECK-NEXT: veval %v24, %v0, %v1, %v26, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and10 = and <16 x i8> %not1, %src1
+ %and11 = and <16 x i8> %and10, %src3
+ %or12 = or <16 x i8> %and11, %and
+ %0 = and <16 x i8> %not2, %src1
+ %and15 = and <16 x i8> %0, %src2
+ %or16 = or <16 x i8> %or12, %and15
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval199(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval199:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v28, %v24
+; CHECK-NEXT: vsel %v24, %v24, %v0, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and37 = or <16 x i8> %not, %src3
+ %or12 = and <16 x i8> %and37, %not1
+ %and13 = and <16 x i8> %src2, %src1
+ %or19 = or <16 x i8> %or12, %and13
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval200(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval200:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 168
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src1
+ %1 = or <16 x i8> %0, %src2
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval201(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval201:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 169
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, %src2
+ %or16 = xor <16 x i8> %1, splat(i8 -1)
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval202(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval202:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v28, %v24
+; CHECK-NEXT: vnc %v1, %v24, %v28
+; CHECK-NEXT: veval %v24, %v1, %v26, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %src3, %src1
+ %1 = or <16 x i8> %0, %src2
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %2 = and <16 x i8> %not2, %src1
+ %and16 = and <16 x i8> %2, %src2
+ %or17 = or <16 x i8> %and16, %or13
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval203(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval203:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v28, %v24
+; CHECK-NEXT: veval %v24, %v24, %v26, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src1
+ %1 = or <16 x i8> %0, %src2
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %and14 = and <16 x i8> %src2, %src1
+ %or20 = or <16 x i8> %and14, %or13
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval204(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval204:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v24, %v26, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ ret <16 x i8> %not1
+}
+
+define <16 x i8> @eval205(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval205:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 171
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = and <16 x i8> %src3, %src1
+ %or20 = or <16 x i8> %0, %not1
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval206(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval206:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not2, %src1
+ %or21 = or <16 x i8> %0, %not1
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval207(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval207:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v24, %v24, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or24 = or <16 x i8> %not1, %src1
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval208(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval208:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 138
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and1122 = or <16 x i8> %not1, %src3
+ %or12 = and <16 x i8> %and1122, %not
+ ret <16 x i8> %or12
+}
+
+define <16 x i8> @eval209(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval209:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 139
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %0 = and <16 x i8> %src3, %src2
+ %or15 = or <16 x i8> %0, %and
+ ret <16 x i8> %or15
+}
+
+define <16 x i8> @eval210(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval210:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: veval %v1, %v26, %v28, %v24, 2
+; CHECK-NEXT: veval %v0, %v1, %v0, %v24, 47
+; CHECK-NEXT: vnc %v1, %v24, %v28
+; CHECK-NEXT: veval %v24, %v0, %v1, %v26, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and10 = and <16 x i8> %not, %src2
+ %and11 = and <16 x i8> %and10, %src3
+ %or12 = or <16 x i8> %and11, %and
+ %0 = and <16 x i8> %not2, %src1
+ %and15 = and <16 x i8> %0, %src2
+ %or16 = or <16 x i8> %or12, %and15
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval211(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval211:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v28, %v26
+; CHECK-NEXT: vsel %v24, %v26, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and1137 = or <16 x i8> %not1, %src3
+ %or12 = and <16 x i8> %and1137, %not
+ %and13 = and <16 x i8> %src2, %src1
+ %or19 = or <16 x i8> %or12, %and13
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval212(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval212:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 142
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %0 = xor <16 x i8> %src2, %src1
+ %1 = and <16 x i8> %0, %src3
+ %or16 = or <16 x i8> %1, %and
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval213(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval213:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %or19 = or <16 x i8> %and, %src3
+ ret <16 x i8> %or19
+}
+
+define <16 x i8> @eval214(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval214:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v26, %v24
+; CHECK-NEXT: veval %v1, %v28, %v26, %v24, 6
+; CHECK-NEXT: veval %v2, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v1, %v2, %v0, 191
+; CHECK-NEXT: br %r14
+entry:
+ %and.demorgan = or <16 x i8> %src2, %src1
+ %and = xor <16 x i8> %and.demorgan, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = xor <16 x i8> %src2, %src1
+ %1 = and <16 x i8> %0, %src3
+ %or16 = or <16 x i8> %1, %and
+ %2 = and <16 x i8> %not2, %src1
+ %and19 = and <16 x i8> %2, %src2
+ %or20 = or <16 x i8> %or16, %and19
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval215(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval215:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 159
+; CHECK-NEXT: br %r14
+entry:
+ %0 = xor <16 x i8> %src2, %src1
+ %or16 = xor <16 x i8> %0, splat(i8 -1)
+ %or23 = or <16 x i8> %or16, %src3
+ ret <16 x i8> %or23
+}
+
+define <16 x i8> @eval216(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval216:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: veval %v1, %v26, %v28, %v24, 2
+; CHECK-NEXT: veval %v0, %v1, %v0, %v24, 47
+; CHECK-NEXT: vo %v1, %v28, %v26
+; CHECK-NEXT: veval %v24, %v0, %v24, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and10 = and <16 x i8> %not, %src2
+ %and11 = and <16 x i8> %and10, %src3
+ %or12 = or <16 x i8> %and11, %and
+ %0 = or <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and16 = and <16 x i8> %1, %src1
+ %or17 = or <16 x i8> %or12, %and16
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval217(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval217:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v0, %v28, %v26
+; CHECK-NEXT: vnx %v1, %v26, %v28
+; CHECK-NEXT: vsel %v24, %v1, %v0, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and1138 = or <16 x i8> %not1, %src3
+ %or12 = and <16 x i8> %and1138, %not
+ %0 = xor <16 x i8> %src3, %src2
+ %and1939 = xor <16 x i8> %0, splat(i8 -1)
+ %1 = and <16 x i8> %and1939, %src1
+ %or20 = or <16 x i8> %or12, %1
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval218(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval218:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: veval %v1, %v26, %v28, %v24, 2
+; CHECK-NEXT: veval %v0, %v1, %v0, %v24, 47
+; CHECK-NEXT: veval %v24, %v0, %v24, %v28, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and10 = and <16 x i8> %not, %src2
+ %and11 = and <16 x i8> %and10, %src3
+ %or12 = or <16 x i8> %and11, %and
+ %0 = and <16 x i8> %not2, %src1
+ %or21 = or <16 x i8> %or12, %0
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval219(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval219:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: veval %v1, %v26, %v28, %v24, 2
+; CHECK-NEXT: veval %v0, %v1, %v0, %v24, 47
+; CHECK-NEXT: voc %v1, %v26, %v28
+; CHECK-NEXT: veval %v24, %v0, %v1, %v24, 31
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %and10 = and <16 x i8> %not, %src2
+ %and11 = and <16 x i8> %and10, %src3
+ %or12 = or <16 x i8> %and11, %and
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and1646 = or <16 x i8> %src3.not, %src2
+ %0 = and <16 x i8> %and1646, %src1
+ %or24 = or <16 x i8> %or12, %0
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval220(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval220:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = and <16 x i8> %not, %src3
+ %or21 = or <16 x i8> %0, %not1
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval221(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval221:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v24, %v28, %v26
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or24 = or <16 x i8> %not1, %src3
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval222(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval222:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v26, %v24
+; CHECK-NEXT: veval %v0, %v0, %v28, %v26, 171
+; CHECK-NEXT: veval %v24, %v0, %v24, %v28, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and10 = and <16 x i8> %not, %src2
+ %and11 = and <16 x i8> %and10, %src3
+ %or21 = or <16 x i8> %and11, %not1
+ %0 = and <16 x i8> %not2, %src1
+ %or25 = or <16 x i8> %or21, %0
+ ret <16 x i8> %or25
+}
+
+define <16 x i8> @eval223(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval223:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v24, %v28, %v26, 191
+; CHECK-NEXT: br %r14
+entry:
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %or21 = or <16 x i8> %not1, %src1
+ %or28 = or <16 x i8> %or21, %src3
+ ret <16 x i8> %or28
+}
+
+define <16 x i8> @eval224(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval224:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 168
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src2
+ %1 = or <16 x i8> %0, %src1
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ ret <16 x i8> %or13
+}
+
+define <16 x i8> @eval225(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval225:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 169
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src2
+ %1 = xor <16 x i8> %0, %src1
+ %or16 = xor <16 x i8> %1, splat(i8 -1)
+ ret <16 x i8> %or16
+}
+
+define <16 x i8> @eval226(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval226:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v24, %v26, %v28, 2
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 143
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %src3, %src2
+ %1 = or <16 x i8> %0, %src1
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %2 = and <16 x i8> %not2, %src1
+ %and16 = and <16 x i8> %2, %src2
+ %or17 = or <16 x i8> %and16, %or13
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval227(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval227:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v28, %v26
+; CHECK-NEXT: veval %v24, %v26, %v24, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src2
+ %1 = or <16 x i8> %0, %src1
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %and14 = and <16 x i8> %src2, %src1
+ %or20 = or <16 x i8> %and14, %or13
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval228(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval228:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: vo %v1, %v28, %v24
+; CHECK-NEXT: veval %v2, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v0, %v2, %v0, %v24, 47
+; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and12 = and <16 x i8> %1, %src2
+ %and15 = and <16 x i8> %not1, %src1
+ %and16 = and <16 x i8> %and15, %src3
+ %or13 = or <16 x i8> %and16, %and
+ %or17 = or <16 x i8> %or13, %and12
+ ret <16 x i8> %or17
+}
+
+define <16 x i8> @eval229(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval229:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v28, %v26
+; CHECK-NEXT: veval %v24, %v28, %v24, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src2
+ %1 = or <16 x i8> %0, %src1
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %2 = and <16 x i8> %src3, %src1
+ %or20 = or <16 x i8> %2, %or13
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval230(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval230:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: veval %v1, %v24, %v28, %v26, 2
+; CHECK-NEXT: veval %v0, %v1, %v0, %v24, 47
+; CHECK-NEXT: veval %v24, %v0, %v26, %v28, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and15 = and <16 x i8> %not1, %src1
+ %and16 = and <16 x i8> %and15, %src3
+ %or13 = or <16 x i8> %and16, %and
+ %0 = and <16 x i8> %not2, %src2
+ %or21 = or <16 x i8> %or13, %0
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval231(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval231:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v0, %v26, %v26
+; CHECK-NEXT: vnc %v2, %v24, %v26
+; CHECK-NEXT: vo %v1, %v28, %v24
+; CHECK-NEXT: vsel %v0, %v26, %v0, %v24
+; CHECK-NEXT: veval %v0, %v0, %v2, %v28, 31
+; CHECK-NEXT: veval %v24, %v0, %v26, %v1, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %and = and <16 x i8> %not1, %not
+ %0 = or <16 x i8> %src3, %src1
+ %1 = xor <16 x i8> %0, splat(i8 -1)
+ %and12 = and <16 x i8> %1, %src2
+ %and15 = and <16 x i8> %not1, %src1
+ %and16 = and <16 x i8> %and15, %src3
+ %and18 = and <16 x i8> %src2, %src1
+ %or13 = or <16 x i8> %and18, %and
+ %or17 = or <16 x i8> %or13, %and16
+ %or24 = or <16 x i8> %or17, %and12
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval232(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval232:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 31
+; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src2
+ %1 = or <16 x i8> %0, %src1
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %2 = or <16 x i8> %src3, %src2
+ %3 = xor <16 x i8> %2, splat(i8 -1)
+ %and17 = and <16 x i8> %3, %src1
+ %or18 = or <16 x i8> %and17, %or13
+ ret <16 x i8> %or18
+}
+
+define <16 x i8> @eval233(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval233:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vx %v1, %v28, %v26
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 31
+; CHECK-NEXT: veval %v24, %v24, %v1, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src2
+ %1 = or <16 x i8> %0, %src1
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %2 = xor <16 x i8> %src3, %src2
+ %and2039 = xor <16 x i8> %2, splat(i8 -1)
+ %3 = and <16 x i8> %and2039, %src1
+ %or21 = or <16 x i8> %3, %or13
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval234(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval234:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 31
+; CHECK-NEXT: veval %v24, %v24, %v28, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %src3, %src2
+ %1 = or <16 x i8> %0, %src1
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %2 = and <16 x i8> %not2, %src1
+ %or22 = or <16 x i8> %2, %or13
+ ret <16 x i8> %or22
+}
+
+define <16 x i8> @eval235(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval235:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vn %v0, %v28, %v26
+; CHECK-NEXT: voc %v1, %v26, %v28
+; CHECK-NEXT: veval %v24, %v1, %v24, %v0, 139
+; CHECK-NEXT: br %r14
+entry:
+ %0 = and <16 x i8> %src3, %src2
+ %1 = or <16 x i8> %0, %src1
+ %or13 = xor <16 x i8> %1, splat(i8 -1)
+ %src3.not = xor <16 x i8> %src3, splat(i8 -1)
+ %and1747 = or <16 x i8> %src3.not, %src2
+ %2 = and <16 x i8> %and1747, %src1
+ %or25 = or <16 x i8> %2, %or13
+ ret <16 x i8> %or25
+}
+
+define <16 x i8> @eval236(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval236:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v24, %v26, 234
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %or22.demorgan = and <16 x i8> %0, %src2
+ %or22 = xor <16 x i8> %or22.demorgan, splat(i8 -1)
+ ret <16 x i8> %or22
+}
+
+define <16 x i8> @eval237(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval237:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v24, %v1, %v0, %v26, 239
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %or22.demorgan = and <16 x i8> %0, %src2
+ %or22 = xor <16 x i8> %or22.demorgan, splat(i8 -1)
+ %and23 = and <16 x i8> %src2, %src1
+ %and24 = and <16 x i8> %and23, %src3
+ %or25 = or <16 x i8> %and24, %or22
+ ret <16 x i8> %or25
+}
+
+define <16 x i8> @eval238(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval238:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v26, %v28, %v24, 7
+; CHECK-NEXT: veval %v24, %v24, %v28, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src1
+ %or22.demorgan = and <16 x i8> %0, %src2
+ %or22 = xor <16 x i8> %or22.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %not2, %src1
+ %or26 = or <16 x i8> %1, %or22
+ ret <16 x i8> %or26
+}
+
+define <16 x i8> @eval239(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval239:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v24
+; CHECK-NEXT: veval %v24, %v24, %v0, %v26, 239
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src1
+ %or22.demorgan = and <16 x i8> %0, %src2
+ %or22 = xor <16 x i8> %or22.demorgan, splat(i8 -1)
+ %or29 = or <16 x i8> %or22, %src1
+ ret <16 x i8> %or29
+}
+
+define <16 x i8> @eval240(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval240:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vno %v24, %v24, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ ret <16 x i8> %not
+}
+
+define <16 x i8> @eval241(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval241:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 171
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %0 = and <16 x i8> %src3, %src2
+ %or20 = or <16 x i8> %0, %not
+ ret <16 x i8> %or20
+}
+
+define <16 x i8> @eval242(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval242:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = and <16 x i8> %not2, %src2
+ %or21 = or <16 x i8> %0, %not
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval243(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval243:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v24, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or24 = or <16 x i8> %not, %src2
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval244(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval244:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %0 = and <16 x i8> %not1, %src3
+ %or21 = or <16 x i8> %0, %not
+ ret <16 x i8> %or21
+}
+
+define <16 x i8> @eval245(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval245:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: voc %v24, %v28, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or24 = or <16 x i8> %not, %src3
+ ret <16 x i8> %or24
+}
+
+define <16 x i8> @eval246(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval246:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnc %v0, %v24, %v26
+; CHECK-NEXT: veval %v0, %v0, %v28, %v24, 171
+; CHECK-NEXT: veval %v24, %v0, %v26, %v28, 47
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %not1 = xor <16 x i8> %src2, splat(i8 -1)
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %and19 = and <16 x i8> %not1, %src1
+ %and20 = and <16 x i8> %and19, %src3
+ %or21 = or <16 x i8> %and20, %not
+ %0 = and <16 x i8> %not2, %src2
+ %or25 = or <16 x i8> %or21, %0
+ ret <16 x i8> %or25
+}
+
+define <16 x i8> @eval247(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval247:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v28, %v24, 191
+; CHECK-NEXT: br %r14
+entry:
+ %not = xor <16 x i8> %src1, splat(i8 -1)
+ %or21 = or <16 x i8> %not, %src2
+ %or28 = or <16 x i8> %or21, %src3
+ ret <16 x i8> %or28
+}
+
+define <16 x i8> @eval248(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval248:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 234
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src2
+ %or22.demorgan = and <16 x i8> %0, %src1
+ %or22 = xor <16 x i8> %or22.demorgan, splat(i8 -1)
+ ret <16 x i8> %or22
+}
+
+define <16 x i8> @eval249(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval249:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v26
+; CHECK-NEXT: veval %v1, %v26, %v24, %v28, 1
+; CHECK-NEXT: veval %v24, %v1, %v0, %v24, 239
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src2
+ %or22.demorgan = and <16 x i8> %0, %src1
+ %or22 = xor <16 x i8> %or22.demorgan, splat(i8 -1)
+ %and23 = and <16 x i8> %src2, %src1
+ %and24 = and <16 x i8> %and23, %src3
+ %or25 = or <16 x i8> %and24, %or22
+ ret <16 x i8> %or25
+}
+
+define <16 x i8> @eval250(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval250:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v0, %v24, %v28, %v26, 7
+; CHECK-NEXT: veval %v24, %v26, %v28, %v0, 174
+; CHECK-NEXT: br %r14
+entry:
+ %not2 = xor <16 x i8> %src3, splat(i8 -1)
+ %0 = or <16 x i8> %src3, %src2
+ %or22.demorgan = and <16 x i8> %0, %src1
+ %or22 = xor <16 x i8> %or22.demorgan, splat(i8 -1)
+ %1 = and <16 x i8> %not2, %src2
+ %or26 = or <16 x i8> %1, %or22
+ ret <16 x i8> %or26
+}
+
+define <16 x i8> @eval251(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval251:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vo %v0, %v28, %v26
+; CHECK-NEXT: veval %v24, %v26, %v0, %v24, 239
+; CHECK-NEXT: br %r14
+entry:
+ %0 = or <16 x i8> %src3, %src2
+ %or22.demorgan = and <16 x i8> %0, %src1
+ %or22 = xor <16 x i8> %or22.demorgan, splat(i8 -1)
+ %or29 = or <16 x i8> %or22, %src2
+ ret <16 x i8> %or29
+}
+
+define <16 x i8> @eval252(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval252:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vnn %v24, %v26, %v24
+; CHECK-NEXT: br %r14
+entry:
+ %or26.demorgan = and <16 x i8> %src2, %src1
+ %or26 = xor <16 x i8> %or26.demorgan, splat(i8 -1)
+ ret <16 x i8> %or26
+}
+
+define <16 x i8> @eval253(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval253:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v28, %v26, %v24, 239
+; CHECK-NEXT: br %r14
+entry:
+ %or26.demorgan = and <16 x i8> %src2, %src1
+ %or26 = xor <16 x i8> %or26.demorgan, splat(i8 -1)
+ %or29 = or <16 x i8> %or26, %src3
+ ret <16 x i8> %or29
+}
+
+define <16 x i8> @eval254(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval254:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: veval %v24, %v26, %v24, %v28, 254
+; CHECK-NEXT: br %r14
+entry:
+ %or26.demorgan = and <16 x i8> %src2, %src1
+ %or30.demorgan = and <16 x i8> %or26.demorgan, %src3
+ %or30 = xor <16 x i8> %or30.demorgan, splat(i8 -1)
+ ret <16 x i8> %or30
+}
+
+define <16 x i8> @eval255(<16 x i8> %src1, <16 x i8> %src2, <16 x i8> %src3) {
+; CHECK-LABEL: eval255:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vgbm %v24, 65535
+; CHECK-NEXT: br %r14
+entry:
+ ret <16 x i8> splat(i8 -1)
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll b/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll
new file mode 100644
index 00000000000000..e750f1e3e7b47f
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-intrinsics-05.ll
@@ -0,0 +1,541 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; Test vector intrinsics added with arch15.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+declare <16 x i8> @llvm.s390.vgemb(<8 x i16>)
+declare <8 x i16> @llvm.s390.vgemh(<16 x i8>)
+declare <4 x i32> @llvm.s390.vgemf(<16 x i8>)
+declare <2 x i64> @llvm.s390.vgemg(<16 x i8>)
+declare i128 @llvm.s390.vgemq(<16 x i8>)
+
+declare i128 @llvm.s390.vuphg(<2 x i64>)
+declare i128 @llvm.s390.vuplhg(<2 x i64>)
+declare i128 @llvm.s390.vuplg(<2 x i64>)
+declare i128 @llvm.s390.vupllg(<2 x i64>)
+
+declare i128 @llvm.s390.vavgq(i128, i128)
+declare i128 @llvm.s390.vavglq(i128, i128)
+
+declare <16 x i8> @llvm.s390.veval(<16 x i8>, <16 x i8>, <16 x i8>, i32)
+
+declare <2 x i64> @llvm.s390.vmahg(<2 x i64>, <2 x i64>, <2 x i64>)
+declare i128 @llvm.s390.vmahq(i128, i128, i128)
+declare <2 x i64> @llvm.s390.vmalhg(<2 x i64>, <2 x i64>, <2 x i64>)
+declare i128 @llvm.s390.vmalhq(i128, i128, i128)
+
+declare i128 @llvm.s390.vmaeg(<2 x i64>, <2 x i64>, i128)
+declare i128 @llvm.s390.vmaleg(<2 x i64>, <2 x i64>, i128)
+declare i128 @llvm.s390.vmaog(<2 x i64>, <2 x i64>, i128)
+declare i128 @llvm.s390.vmalog(<2 x i64>, <2 x i64>, i128)
+
+declare <2 x i64> @llvm.s390.vmhg(<2 x i64>, <2 x i64>)
+declare i128 @llvm.s390.vmhq(i128, i128)
+declare <2 x i64> @llvm.s390.vmlhg(<2 x i64>, <2 x i64>)
+declare i128 @llvm.s390.vmlhq(i128, i128)
+
+declare i128 @llvm.s390.vmeg(<2 x i64>, <2 x i64>)
+declare i128 @llvm.s390.vmleg(<2 x i64>, <2 x i64>)
+declare i128 @llvm.s390.vmog(<2 x i64>, <2 x i64>)
+declare i128 @llvm.s390.vmlog(<2 x i64>, <2 x i64>)
+
+declare {i128, i32} @llvm.s390.vceqqs(i128, i128)
+declare {i128, i32} @llvm.s390.vchqs(i128, i128)
+declare {i128, i32} @llvm.s390.vchlqs(i128, i128)
+
+; VGEMB.
+define <16 x i8> @test_vgemb(<8 x i16> %a) {
+; CHECK-LABEL: test_vgemb:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgemb %v24, %v24
+; CHECK-NEXT: br %r14
+ %res = call <16 x i8> @llvm.s390.vgemb(<8 x i16> %a)
+ ret <16 x i8> %res
+}
+
+; VGEMH.
+define <8 x i16> @test_vgemh(<16 x i8> %a) {
+; CHECK-LABEL: test_vgemh:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgemh %v24, %v24
+; CHECK-NEXT: br %r14
+ %res = call <8 x i16> @llvm.s390.vgemh(<16 x i8> %a)
+ ret <8 x i16> %res
+}
+
+; VGEMF.
+define <4 x i32> @test_vgemf(<16 x i8> %a) {
+; CHECK-LABEL: test_vgemf:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgemf %v24, %v24
+; CHECK-NEXT: br %r14
+ %res = call <4 x i32> @llvm.s390.vgemf(<16 x i8> %a)
+ ret <4 x i32> %res
+}
+
+; VGEMG.
+define <2 x i64> @test_vgemg(<16 x i8> %a) {
+; CHECK-LABEL: test_vgemg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgemg %v24, %v24
+; CHECK-NEXT: br %r14
+ %res = call <2 x i64> @llvm.s390.vgemg(<16 x i8> %a)
+ ret <2 x i64> %res
+}
+
+; VGEMQ.
+define i128 @test_vgemq(<16 x i8> %a) {
+; CHECK-LABEL: test_vgemq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vgemq %v0, %v24
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vgemq(<16 x i8> %a)
+ ret i128 %res
+}
+
+; VUPHG.
+define i128 @test_vuphg(<2 x i64> %a) {
+; CHECK-LABEL: test_vuphg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vuphg %v0, %v24
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vuphg(<2 x i64> %a)
+ ret i128 %res
+}
+
+; VUPLHG.
+define i128 @test_vuplhg(<2 x i64> %a) {
+; CHECK-LABEL: test_vuplhg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vuplhg %v0, %v24
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vuplhg(<2 x i64> %a)
+ ret i128 %res
+}
+
+; VUPLG.
+define i128 @test_vuplg(<2 x i64> %a) {
+; CHECK-LABEL: test_vuplg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vuplg %v0, %v24
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vuplg(<2 x i64> %a)
+ ret i128 %res
+}
+
+; VUPLLG.
+define i128 @test_vupllg(<2 x i64> %a) {
+; CHECK-LABEL: test_vupllg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vupllg %v0, %v24
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vupllg(<2 x i64> %a)
+ ret i128 %res
+}
+
+; VAVGQ.
+define i128 @test_vavgq(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vavgq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vavgq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vavgq(i128 %a, i128 %b)
+ ret i128 %res
+}
+
+; VAVGLQ.
+define i128 @test_vavglq(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vavglq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vavglq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vavglq(i128 %a, i128 %b)
+ ret i128 %res
+}
+
+; VEVAL.
+define <16 x i8> @test_veval(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
+; CHECK-LABEL: test_veval:
+; CHECK: # %bb.0:
+; CHECK-NEXT: veval %v24, %v24, %v26, %v28, 123
+; CHECK-NEXT: br %r14
+ %res = call <16 x i8> @llvm.s390.veval(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 123)
+ ret <16 x i8> %res
+}
+
+; VMAHG.
+define <2 x i64> @test_vmahg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmahg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmahg %v24, %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %res = call <2 x i64> @llvm.s390.vmahg(<2 x i64> %a, <2 x i64> %b,
+ <2 x i64> %c)
+ ret <2 x i64> %res
+}
+
+; VMAHQ.
+define i128 @test_vmahq(i128 %a, i128 %b, i128 %c) {
+; CHECK-LABEL: test_vmahq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r5), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vl %v2, 0(%r3), 3
+; CHECK-NEXT: vmahq %v0, %v2, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmahq(i128 %a, i128 %b, i128 %c)
+ ret i128 %res
+}
+
+; VMALHG.
+define <2 x i64> @test_vmalhg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
+; CHECK-LABEL: test_vmalhg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmalhg %v24, %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+ %res = call <2 x i64> @llvm.s390.vmalhg(<2 x i64> %a, <2 x i64> %b,
+ <2 x i64> %c)
+ ret <2 x i64> %res
+}
+
+; VMALHQ.
+define i128 @test_vmalhq(i128 %a, i128 %b, i128 %c) {
+; CHECK-LABEL: test_vmalhq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r5), 3
+; CHECK-NEXT: vl %v1, 0(%r4), 3
+; CHECK-NEXT: vl %v2, 0(%r3), 3
+; CHECK-NEXT: vmalhq %v0, %v2, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmalhq(i128 %a, i128 %b, i128 %c)
+ ret i128 %res
+}
+
+; VMAEG.
+define i128 @test_vmaeg(<2 x i64> %a, <2 x i64> %b, i128 %c) {
+; CHECK-LABEL: test_vmaeg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vmaeg %v0, %v24, %v26, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmaeg(<2 x i64> %a, <2 x i64> %b, i128 %c)
+ ret i128 %res
+}
+
+; VMALEG.
+define i128 @test_vmaleg(<2 x i64> %a, <2 x i64> %b, i128 %c) {
+; CHECK-LABEL: test_vmaleg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vmaleg %v0, %v24, %v26, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmaleg(<2 x i64> %a, <2 x i64> %b, i128 %c)
+ ret i128 %res
+}
+
+; VMAOG.
+define i128 @test_vmaog(<2 x i64> %a, <2 x i64> %b, i128 %c) {
+; CHECK-LABEL: test_vmaog:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vmaog %v0, %v24, %v26, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmaog(<2 x i64> %a, <2 x i64> %b, i128 %c)
+ ret i128 %res
+}
+
+; VMALOG.
+define i128 @test_vmalog(<2 x i64> %a, <2 x i64> %b, i128 %c) {
+; CHECK-LABEL: test_vmalog:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vmalog %v0, %v24, %v26, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmalog(<2 x i64> %a, <2 x i64> %b, i128 %c)
+ ret i128 %res
+}
+
+; VMHG.
+define <2 x i64> @test_vmhg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmhg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmhg %v24, %v24, %v26
+; CHECK-NEXT: br %r14
+ %res = call <2 x i64> @llvm.s390.vmhg(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %res
+}
+
+; VMHQ.
+define i128 @test_vmhq(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vmhq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmhq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmhq(i128 %a, i128 %b)
+ ret i128 %res
+}
+
+; VMLHG.
+define <2 x i64> @test_vmlhg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmlhg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmlhg %v24, %v24, %v26
+; CHECK-NEXT: br %r14
+ %res = call <2 x i64> @llvm.s390.vmlhg(<2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %res
+}
+
+; VMLHQ.
+define i128 @test_vmlhq(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vmlhq:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vmlhq %v0, %v1, %v0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmlhq(i128 %a, i128 %b)
+ ret i128 %res
+}
+
+; VMEG.
+define i128 @test_vmeg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmeg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmeg %v0, %v24, %v26
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmeg(<2 x i64> %a, <2 x i64> %b)
+ ret i128 %res
+}
+
+; VMLEG.
+define i128 @test_vmleg(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmleg:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmleg %v0, %v24, %v26
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmleg(<2 x i64> %a, <2 x i64> %b)
+ ret i128 %res
+}
+
+; VMOG.
+define i128 @test_vmog(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmog:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmog %v0, %v24, %v26
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmog(<2 x i64> %a, <2 x i64> %b)
+ ret i128 %res
+}
+
+; VMLOG.
+define i128 @test_vmlog(<2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: test_vmlog:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmlog %v0, %v24, %v26
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %res = call i128 @llvm.s390.vmlog(<2 x i64> %a, <2 x i64> %b)
+ ret i128 %res
+}
+
+; VCEQGS with no processing of the result.
+define i32 @test_vceqqs(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vceqqs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vceqqs %v0, %v1, %v0
+; CHECK-NEXT: ipm %r2
+; CHECK-NEXT: srl %r2, 28
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vceqqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 1
+ ret i32 %res
+}
+
+; VCEQGS returning 1 if all elements are equal (CC == 0).
+define i32 @test_vceqqs_all_bool(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vceqqs_all_bool:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vceqqs %v0, %v1, %v0
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochie %r2, 1
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vceqqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 1
+ %cmp = icmp ult i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCEQGS, storing to %ptr if all elements are equal.
+define i128 @test_vceqqs_all_store(i128 %a, i128 %b, ptr %ptr) {
+; CHECK-LABEL: test_vceqqs_all_store:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vceqqs %v0, %v1, %v0
+; CHECK-NEXT: jnhe .LBB30_2
+; CHECK-NEXT: # %bb.1: # %store
+; CHECK-NEXT: mvhi 0(%r5), 0
+; CHECK-NEXT: .LBB30_2: # %exit
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vceqqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 0
+ %cc = extractvalue {i128, i32} %call, 1
+ %cmp = icmp sle i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, ptr %ptr
+ br label %exit
+
+exit:
+ ret i128 %res
+}
+
+; VCHGS with no processing of the result.
+define i32 @test_vchqs(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vchqs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vchqs %v0, %v1, %v0
+; CHECK-NEXT: ipm %r2
+; CHECK-NEXT: srl %r2, 28
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vchqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHGS returning 1 if all elements are higher (CC == 0).
+define i32 @test_vchqs_all_bool(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vchqs_all_bool:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vchqs %v0, %v1, %v0
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochie %r2, 1
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vchqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 1
+ %cmp = icmp ult i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHGS, storing to %ptr if all elements are higher.
+define i128 @test_vchqs_all_store(i128 %a, i128 %b, ptr %ptr) {
+; CHECK-LABEL: test_vchqs_all_store:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vchqs %v0, %v1, %v0
+; CHECK-NEXT: jnhe .LBB33_2
+; CHECK-NEXT: # %bb.1: # %store
+; CHECK-NEXT: mvhi 0(%r5), 0
+; CHECK-NEXT: .LBB33_2: # %exit
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vchqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 0
+ %cc = extractvalue {i128, i32} %call, 1
+ %cmp = icmp sle i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, ptr %ptr
+ br label %exit
+
+exit:
+ ret i128 %res
+}
+
+; VCHLQS with no processing of the result.
+define i32 @test_vchlqs(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vchlqs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vchlqs %v0, %v1, %v0
+; CHECK-NEXT: ipm %r2
+; CHECK-NEXT: srl %r2, 28
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vchlqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 1
+ ret i32 %res
+}
+
+; VCHLQS returning 1 if all elements are higher (CC == 0).
+define i32 @test_vchlqs_all_bool(i128 %a, i128 %b) {
+; CHECK-LABEL: test_vchlqs_all_bool:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r3), 3
+; CHECK-NEXT: vl %v1, 0(%r2), 3
+; CHECK-NEXT: vchlqs %v0, %v1, %v0
+; CHECK-NEXT: lhi %r2, 0
+; CHECK-NEXT: lochie %r2, 1
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vchlqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 1
+ %cmp = icmp slt i32 %res, 1
+ %ext = zext i1 %cmp to i32
+ ret i32 %ext
+}
+
+; VCHLQS, storing to %ptr if all elements are higher.
+define i128 @test_vchlqs_all_store(i128 %a, i128 %b, ptr %ptr) {
+; CHECK-LABEL: test_vchlqs_all_store:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vl %v0, 0(%r4), 3
+; CHECK-NEXT: vl %v1, 0(%r3), 3
+; CHECK-NEXT: vchlqs %v0, %v1, %v0
+; CHECK-NEXT: jnhe .LBB36_2
+; CHECK-NEXT: # %bb.1: # %store
+; CHECK-NEXT: mvhi 0(%r5), 0
+; CHECK-NEXT: .LBB36_2: # %exit
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %call = call {i128, i32} @llvm.s390.vchlqs(i128 %a, i128 %b)
+ %res = extractvalue {i128, i32} %call, 0
+ %cc = extractvalue {i128, i32} %call, 1
+ %cmp = icmp ule i32 %cc, 0
+ br i1 %cmp, label %store, label %exit
+
+store:
+ store i32 0, ptr %ptr
+ br label %exit
+
+exit:
+ ret i128 %res
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-06.ll b/llvm/test/CodeGen/SystemZ/vec-mul-06.ll
new file mode 100644
index 00000000000000..22b1b5de62c57f
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-mul-06.ll
@@ -0,0 +1,24 @@
+; Test vector multiplication on arch15.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch15 | FileCheck %s
+
+; Test a v2i64 multiplication.
+define <2 x i64> @f1(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
+; CHECK-LABEL: f1:
+; CHECK: vmlg %v24, %v26, %v28
+; CHECK: br %r14
+ %ret = mul <2 x i64> %val1, %val2
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 multiply-and-add.
+define <2 x i64> @f2(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2,
+ <2 x i64> %val3) {
+; CHECK-LABEL: f2:
+; CHECK: vmalg %v24, %v26, %v28, %v30
+; CHECK: br %r14
+ %mul = mul <2 x i64> %val1, %val2
+ %ret = add <2 x i64> %mul, %val3
+ ret <2 x i64> %ret
+}
+
diff --git a/llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt b/llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt
new file mode 100644
index 00000000000000..93274e6659801d
--- /dev/null
+++ b/llvm/test/MC/Disassembler/SystemZ/insns-arch15.txt
@@ -0,0 +1,1753 @@
+# Test arch15 instructions that don't have PC-relative operands.
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=arch15 \
+# RUN: | FileCheck %s
+
+# CHECK: bdepg %r0, %r0, %r0
+0xb9 0x6d 0x00 0x00
+
+# CHECK: bdepg %r0, %r0, %r15
+0xb9 0x6d 0xf0 0x00
+
+# CHECK: bdepg %r0, %r15, %r0
+0xb9 0x6d 0x00 0x0f
+
+# CHECK: bdepg %r15, %r0, %r0
+0xb9 0x6d 0x00 0xf0
+
+# CHECK: bdepg %r7, %r8, %r9
+0xb9 0x6d 0x90 0x78
+
+# CHECK: bextg %r0, %r0, %r0
+0xb9 0x6c 0x00 0x00
+
+# CHECK: bextg %r0, %r0, %r15
+0xb9 0x6c 0xf0 0x00
+
+# CHECK: bextg %r0, %r15, %r0
+0xb9 0x6c 0x00 0x0f
+
+# CHECK: bextg %r15, %r0, %r0
+0xb9 0x6c 0x00 0xf0
+
+# CHECK: bextg %r7, %r8, %r9
+0xb9 0x6c 0x90 0x78
+
+# CHECK: cal %r0, 0, 0
+0xc8 0x06 0x00 0x00 0x00 0x00
+
+# CHECK: cal %r2, 0, 4095
+0xc8 0x26 0x00 0x00 0x0f 0xff
+
+# CHECK: cal %r2, 0, 0(%r1)
+0xc8 0x26 0x00 0x00 0x10 0x00
+
+# CHECK: cal %r2, 0, 0(%r15)
+0xc8 0x26 0x00 0x00 0xf0 0x00
+
+# CHECK: cal %r2, 0(%r1), 4095(%r15)
+0xc8 0x26 0x10 0x00 0xff 0xff
+
+# CHECK: cal %r2, 0(%r1), 0(%r15)
+0xc8 0x26 0x10 0x00 0xf0 0x00
+
+# CHECK: cal %r2, 4095(%r1), 0(%r15)
+0xc8 0x26 0x1f 0xff 0xf0 0x00
+
+# CHECK: calg %r0, 0, 0
+0xc8 0x07 0x00 0x00 0x00 0x00
+
+# CHECK: calg %r2, 0, 4095
+0xc8 0x27 0x00 0x00 0x0f 0xff
+
+# CHECK: calg %r2, 0, 0(%r1)
+0xc8 0x27 0x00 0x00 0x10 0x00
+
+# CHECK: calg %r2, 0, 0(%r15)
+0xc8 0x27 0x00 0x00 0xf0 0x00
+
+# CHECK: calg %r2, 0(%r1), 4095(%r15)
+0xc8 0x27 0x10 0x00 0xff 0xff
+
+# CHECK: calg %r2, 0(%r1), 0(%r15)
+0xc8 0x27 0x10 0x00 0xf0 0x00
+
+# CHECK: calg %r2, 4095(%r1), 0(%r15)
+0xc8 0x27 0x1f 0xff 0xf0 0x00
+
+# CHECK: calgf %r0, 0, 0
+0xc8 0x0f 0x00 0x00 0x00 0x00
+
+# CHECK: calgf %r2, 0, 4095
+0xc8 0x2f 0x00 0x00 0x0f 0xff
+
+# CHECK: calgf %r2, 0, 0(%r1)
+0xc8 0x2f 0x00 0x00 0x10 0x00
+
+# CHECK: calgf %r2, 0, 0(%r15)
+0xc8 0x2f 0x00 0x00 0xf0 0x00
+
+# CHECK: calgf %r2, 0(%r1), 4095(%r15)
+0xc8 0x2f 0x10 0x00 0xff 0xff
+
+# CHECK: calgf %r2, 0(%r1), 0(%r15)
+0xc8 0x2f 0x10 0x00 0xf0 0x00
+
+# CHECK: calgf %r2, 4095(%r1), 0(%r15)
+0xc8 0x2f 0x1f 0xff 0xf0 0x00
+
+# CHECK: clzg %r0, %r15
+0xb9 0x68 0x00 0x0f
+
+# CHECK: clzg %r7, %r8
+0xb9 0x68 0x00 0x78
+
+# CHECK: clzg %r15, %r0
+0xb9 0x68 0x00 0xf0
+
+# CHECK: ctzg %r0, %r15
+0xb9 0x69 0x00 0x0f
+
+# CHECK: ctzg %r7, %r8
+0xb9 0x69 0x00 0x78
+
+# CHECK: ctzg %r15, %r0
+0xb9 0x69 0x00 0xf0
+
+# CHECK: kimd %r0, %r0
+0xb9 0x3e 0x00 0x00
+
+# CHECK: kimd %r0, %r0, 15
+0xb9 0x3e 0xf0 0x00
+
+# CHECK: kimd %r0, %r14
+0xb9 0x3e 0x00 0x0e
+
+# CHECK: kimd %r15, %r0
+0xb9 0x3e 0x00 0xf0
+
+# CHECK: kimd %r7, %r10, 13
+0xb9 0x3e 0xd0 0x7a
+
+# CHECK: klmd %r0, %r0
+0xb9 0x3f 0x00 0x00
+
+# CHECK: klmd %r0, %r0, 15
+0xb9 0x3f 0xf0 0x00
+
+# CHECK: klmd %r0, %r14
+0xb9 0x3f 0x00 0x0e
+
+# CHECK: klmd %r15, %r0
+0xb9 0x3f 0x00 0xf0
+
+# CHECK: klmd %r7, %r10, 13
+0xb9 0x3f 0xd0 0x7a
+
+# CHECK: lxab %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x60
+
+# CHECK: lxab %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x60
+
+# CHECK: lxab %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x60
+
+# CHECK: lxab %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x60
+
+# CHECK: lxab %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x60
+
+# CHECK: lxab %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x60
+
+# CHECK: lxab %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x60
+
+# CHECK: lxab %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x60
+
+# CHECK: lxab %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x60
+
+# CHECK: lxab %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x60
+
+# CHECK: lxah %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x62
+
+# CHECK: lxah %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x62
+
+# CHECK: lxah %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x62
+
+# CHECK: lxah %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x62
+
+# CHECK: lxah %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x62
+
+# CHECK: lxah %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x62
+
+# CHECK: lxah %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x62
+
+# CHECK: lxah %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x62
+
+# CHECK: lxah %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x62
+
+# CHECK: lxah %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x62
+
+# CHECK: lxaf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x64
+
+# CHECK: lxaf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x64
+
+# CHECK: lxaf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x64
+
+# CHECK: lxaf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x64
+
+# CHECK: lxaf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x64
+
+# CHECK: lxaf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x64
+
+# CHECK: lxaf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x64
+
+# CHECK: lxaf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x64
+
+# CHECK: lxaf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x64
+
+# CHECK: lxaf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x64
+
+# CHECK: lxag %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x66
+
+# CHECK: lxag %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x66
+
+# CHECK: lxag %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x66
+
+# CHECK: lxag %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x66
+
+# CHECK: lxag %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x66
+
+# CHECK: lxag %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x66
+
+# CHECK: lxag %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x66
+
+# CHECK: lxag %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x66
+
+# CHECK: lxag %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x66
+
+# CHECK: lxag %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x66
+
+# CHECK: lxaq %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x68
+
+# CHECK: lxaq %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x68
+
+# CHECK: lxaq %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x68
+
+# CHECK: lxaq %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x68
+
+# CHECK: lxaq %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x68
+
+# CHECK: lxaq %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x68
+
+# CHECK: lxaq %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x68
+
+# CHECK: lxaq %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x68
+
+# CHECK: lxaq %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x68
+
+# CHECK: lxaq %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x68
+
+# CHECK: llxab %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x61
+
+# CHECK: llxab %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x61
+
+# CHECK: llxab %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x61
+
+# CHECK: llxab %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x61
+
+# CHECK: llxab %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x61
+
+# CHECK: llxab %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x61
+
+# CHECK: llxab %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x61
+
+# CHECK: llxab %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x61
+
+# CHECK: llxab %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x61
+
+# CHECK: llxab %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x61
+
+# CHECK: llxah %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x63
+
+# CHECK: llxah %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x63
+
+# CHECK: llxah %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x63
+
+# CHECK: llxah %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x63
+
+# CHECK: llxah %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x63
+
+# CHECK: llxah %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x63
+
+# CHECK: llxah %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x63
+
+# CHECK: llxah %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x63
+
+# CHECK: llxah %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x63
+
+# CHECK: llxah %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x63
+
+# CHECK: llxaf %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x65
+
+# CHECK: llxaf %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x65
+
+# CHECK: llxaf %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x65
+
+# CHECK: llxaf %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x65
+
+# CHECK: llxaf %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x65
+
+# CHECK: llxaf %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x65
+
+# CHECK: llxaf %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x65
+
+# CHECK: llxaf %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x65
+
+# CHECK: llxaf %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x65
+
+# CHECK: llxaf %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x65
+
+# CHECK: llxag %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x67
+
+# CHECK: llxag %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x67
+
+# CHECK: llxag %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x67
+
+# CHECK: llxag %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x67
+
+# CHECK: llxag %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x67
+
+# CHECK: llxag %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x67
+
+# CHECK: llxag %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x67
+
+# CHECK: llxag %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x67
+
+# CHECK: llxag %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x67
+
+# CHECK: llxag %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x67
+
+# CHECK: llxaq %r0, -524288
+0xe3 0x00 0x00 0x00 0x80 0x69
+
+# CHECK: llxaq %r0, -1
+0xe3 0x00 0x0f 0xff 0xff 0x69
+
+# CHECK: llxaq %r0, 0
+0xe3 0x00 0x00 0x00 0x00 0x69
+
+# CHECK: llxaq %r0, 1
+0xe3 0x00 0x00 0x01 0x00 0x69
+
+# CHECK: llxaq %r0, 524287
+0xe3 0x00 0x0f 0xff 0x7f 0x69
+
+# CHECK: llxaq %r0, 0(%r1)
+0xe3 0x00 0x10 0x00 0x00 0x69
+
+# CHECK: llxaq %r0, 0(%r15)
+0xe3 0x00 0xf0 0x00 0x00 0x69
+
+# CHECK: llxaq %r0, 524287(%r1,%r15)
+0xe3 0x01 0xff 0xff 0x7f 0x69
+
+# CHECK: llxaq %r0, 524287(%r15,%r1)
+0xe3 0x0f 0x1f 0xff 0x7f 0x69
+
+# CHECK: llxaq %r15, 0
+0xe3 0xf0 0x00 0x00 0x00 0x69
+
+# CHECK: pfcr %r0, %r0, 0
+0xeb 0x00 0x00 0x00 0x00 0x16
+
+# CHECK: pfcr %r15, %r1, 0
+0xeb 0xf1 0x00 0x00 0x00 0x16
+
+# CHECK: pfcr %r1, %r15, 0
+0xeb 0x1f 0x00 0x00 0x00 0x16
+
+# CHECK: pfcr %r15, %r15, 0
+0xeb 0xff 0x00 0x00 0x00 0x16
+
+# CHECK: pfcr %r0, %r0, -524288
+0xeb 0x00 0x00 0x00 0x80 0x16
+
+# CHECK: pfcr %r0, %r0, -1
+0xeb 0x00 0x0f 0xff 0xff 0x16
+
+# CHECK: pfcr %r0, %r0, 1
+0xeb 0x00 0x00 0x01 0x00 0x16
+
+# CHECK: pfcr %r0, %r0, 524287
+0xeb 0x00 0x0f 0xff 0x7f 0x16
+
+# CHECK: pfcr %r0, %r0, 0(%r1)
+0xeb 0x00 0x10 0x00 0x00 0x16
+
+# CHECK: pfcr %r0, %r0, 0(%r15)
+0xeb 0x00 0xf0 0x00 0x00 0x16
+
+# CHECK: pfcr %r0, %r0, 524287(%r1)
+0xeb 0x00 0x1f 0xff 0x7f 0x16
+
+# CHECK: pfcr %r0, %r0, 524287(%r15)
+0xeb 0x00 0xff 0xff 0x7f 0x16
+
+# CHECK: vavgq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xf2
+
+# CHECK: vavgq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xf2
+
+# CHECK: vavgq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xf2
+
+# CHECK: vavgq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xf2
+
+# CHECK: vavgq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xf2
+
+# CHECK: vavglq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xf0
+
+# CHECK: vavglq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xf0
+
+# CHECK: vavglq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xf0
+
+# CHECK: vavglq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xf0
+
+# CHECK: vavglq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xf0
+
+# CHECK: vblendb %v0, %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x00 0x89
+
+# CHECK: vblend %v0, %v0, %v0, %v0, 15
+0xe7 0x00 0x0f 0x00 0x00 0x89
+
+# CHECK: vblendb %v0, %v0, %v0, %v31
+0xe7 0x00 0x00 0x00 0xf1 0x89
+
+# CHECK: vblendb %v0, %v0, %v31, %v0
+0xe7 0x00 0xf0 0x00 0x02 0x89
+
+# CHECK: vblendb %v0, %v31, %v0, %v0
+0xe7 0x0f 0x00 0x00 0x04 0x89
+
+# CHECK: vblendb %v31, %v0, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x08 0x89
+
+# CHECK: vblend %v13, %v17, %v21, %v25, 11
+0xe7 0xd1 0x5b 0x00 0x97 0x89
+
+# CHECK: vblendb %v0, %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x00 0x89
+
+# CHECK: vblendb %v0, %v0, %v0, %v31
+0xe7 0x00 0x00 0x00 0xf1 0x89
+
+# CHECK: vblendb %v0, %v0, %v31, %v0
+0xe7 0x00 0xf0 0x00 0x02 0x89
+
+# CHECK: vblendb %v0, %v31, %v0, %v0
+0xe7 0x0f 0x00 0x00 0x04 0x89
+
+# CHECK: vblendb %v31, %v0, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x08 0x89
+
+# CHECK: vblendb %v13, %v17, %v21, %v25
+0xe7 0xd1 0x50 0x00 0x97 0x89
+
+# CHECK: vblendh %v0, %v0, %v0, %v0
+0xe7 0x00 0x01 0x00 0x00 0x89
+
+# CHECK: vblendh %v0, %v0, %v0, %v31
+0xe7 0x00 0x01 0x00 0xf1 0x89
+
+# CHECK: vblendh %v0, %v0, %v31, %v0
+0xe7 0x00 0xf1 0x00 0x02 0x89
+
+# CHECK: vblendh %v0, %v31, %v0, %v0
+0xe7 0x0f 0x01 0x00 0x04 0x89
+
+# CHECK: vblendh %v31, %v0, %v0, %v0
+0xe7 0xf0 0x01 0x00 0x08 0x89
+
+# CHECK: vblendh %v13, %v17, %v21, %v25
+0xe7 0xd1 0x51 0x00 0x97 0x89
+
+# CHECK: vblendf %v0, %v0, %v0, %v0
+0xe7 0x00 0x02 0x00 0x00 0x89
+
+# CHECK: vblendf %v0, %v0, %v0, %v31
+0xe7 0x00 0x02 0x00 0xf1 0x89
+
+# CHECK: vblendf %v0, %v0, %v31, %v0
+0xe7 0x00 0xf2 0x00 0x02 0x89
+
+# CHECK: vblendf %v0, %v31, %v0, %v0
+0xe7 0x0f 0x02 0x00 0x04 0x89
+
+# CHECK: vblendf %v31, %v0, %v0, %v0
+0xe7 0xf0 0x02 0x00 0x08 0x89
+
+# CHECK: vblendf %v13, %v17, %v21, %v25
+0xe7 0xd1 0x52 0x00 0x97 0x89
+
+# CHECK: vblendg %v0, %v0, %v0, %v0
+0xe7 0x00 0x03 0x00 0x00 0x89
+
+# CHECK: vblendg %v0, %v0, %v0, %v31
+0xe7 0x00 0x03 0x00 0xf1 0x89
+
+# CHECK: vblendg %v0, %v0, %v31, %v0
+0xe7 0x00 0xf3 0x00 0x02 0x89
+
+# CHECK: vblendg %v0, %v31, %v0, %v0
+0xe7 0x0f 0x03 0x00 0x04 0x89
+
+# CHECK: vblendg %v31, %v0, %v0, %v0
+0xe7 0xf0 0x03 0x00 0x08 0x89
+
+# CHECK: vblendg %v13, %v17, %v21, %v25
+0xe7 0xd1 0x53 0x00 0x97 0x89
+
+# CHECK: vblendq %v0, %v0, %v0, %v0
+0xe7 0x00 0x04 0x00 0x00 0x89
+
+# CHECK: vblendq %v0, %v0, %v0, %v31
+0xe7 0x00 0x04 0x00 0xf1 0x89
+
+# CHECK: vblendq %v0, %v0, %v31, %v0
+0xe7 0x00 0xf4 0x00 0x02 0x89
+
+# CHECK: vblendq %v0, %v31, %v0, %v0
+0xe7 0x0f 0x04 0x00 0x04 0x89
+
+# CHECK: vblendq %v31, %v0, %v0, %v0
+0xe7 0xf0 0x04 0x00 0x08 0x89
+
+# CHECK: vblendq %v13, %v17, %v21, %v25
+0xe7 0xd1 0x54 0x00 0x97 0x89
+
+# CHECK: vceqq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xf8
+
+# CHECK: vceqq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xf8
+
+# CHECK: vceqq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xf8
+
+# CHECK: vceqq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xf8
+
+# CHECK: vceqq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xf8
+
+# CHECK: vceqqs %v5, %v22, %v7
+0xe7 0x56 0x70 0x10 0x44 0xf8
+
+# CHECK: vchq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xfb
+
+# CHECK: vchq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xfb
+
+# CHECK: vchq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xfb
+
+# CHECK: vchq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xfb
+
+# CHECK: vchq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xfb
+
+# CHECK: vchqs %v5, %v22, %v7
+0xe7 0x56 0x70 0x10 0x44 0xfb
+
+# CHECK: vchlq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xf9
+
+# CHECK: vchlq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xf9
+
+# CHECK: vchlq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xf9
+
+# CHECK: vchlq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xf9
+
+# CHECK: vchlq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xf9
+
+# CHECK: vchlqs %v5, %v22, %v7
+0xe7 0x56 0x70 0x10 0x44 0xf9
+
+# CHECK: vclzq %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0x53
+
+# CHECK: vclzq %v0, %v15
+0xe7 0x0f 0x00 0x00 0x40 0x53
+
+# CHECK: vclzq %v0, %v31
+0xe7 0x0f 0x00 0x00 0x44 0x53
+
+# CHECK: vclzq %v15, %v0
+0xe7 0xf0 0x00 0x00 0x40 0x53
+
+# CHECK: vclzq %v31, %v0
+0xe7 0xf0 0x00 0x00 0x48 0x53
+
+# CHECK: vclzq %v14, %v17
+0xe7 0xe1 0x00 0x00 0x44 0x53
+
+# CHECK: vctzq %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0x52
+
+# CHECK: vctzq %v0, %v15
+0xe7 0x0f 0x00 0x00 0x40 0x52
+
+# CHECK: vctzq %v0, %v31
+0xe7 0x0f 0x00 0x00 0x44 0x52
+
+# CHECK: vctzq %v15, %v0
+0xe7 0xf0 0x00 0x00 0x40 0x52
+
+# CHECK: vctzq %v31, %v0
+0xe7 0xf0 0x00 0x00 0x48 0x52
+
+# CHECK: vctzq %v14, %v17
+0xe7 0xe1 0x00 0x00 0x44 0x52
+
+# CHECK: vcvbq %v0, %v0, 0
+0xe6 0x00 0x00 0x00 0x00 0x4e
+
+# CHECK: vcvbq %v0, %v0, 15
+0xe6 0x00 0x00 0xf0 0x00 0x4e
+
+# CHECK: vcvbq %v31, %v0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x4e
+
+# CHECK: vcvbq %v0, %v15, 0
+0xe6 0x0f 0x00 0x00 0x00 0x4e
+
+# CHECK: vcvbq %v0, %v31, 0
+0xe6 0x0f 0x00 0x00 0x04 0x4e
+
+# CHECK: vcvbq %v3, %v18, 4
+0xe6 0x32 0x00 0x40 0x04 0x4e
+
+# CHECK: vcvdq %v0, %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x4a
+
+# CHECK: vcvdq %v0, %v0, 0, 15
+0xe6 0x00 0x00 0xf0 0x00 0x4a
+
+# CHECK: vcvdq %v0, %v0, 255, 0
+0xe6 0x00 0x00 0x0f 0xf0 0x4a
+
+# CHECK: vcvdq %v0, %v31, 0, 0
+0xe6 0x0f 0x00 0x00 0x04 0x4a
+
+# CHECK: vcvdq %v15, %v0, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x4a
+
+# CHECK: vcvdq %v31, %v0, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x4a
+
+# CHECK: vcvdq %v18, %v9, 52, 11
+0xe6 0x29 0x00 0xb3 0x48 0x4a
+
+# CHECK: vd %v0, %v0, %v0, 0, 0
+0xe7 0x00 0x00 0x00 0x00 0xb2
+
+# CHECK: vd %v0, %v0, %v0, 0, 15
+0xe7 0x00 0x00 0x0f 0x00 0xb2
+
+# CHECK: vd %v0, %v0, %v0, 15, 0
+0xe7 0x00 0x00 0x00 0xf0 0xb2
+
+# CHECK: vd %v0, %v0, %v31, 0, 0
+0xe7 0x00 0xf0 0x00 0x02 0xb2
+
+# CHECK: vd %v0, %v31, %v0, 0, 0
+0xe7 0x0f 0x00 0x00 0x04 0xb2
+
+# CHECK: vd %v31, %v0, %v0, 0, 0
+0xe7 0xf0 0x00 0x00 0x08 0xb2
+
+# CHECK: vd %v13, %v17, %v21, 8, 4
+0xe7 0xd1 0x50 0x04 0x86 0xb2
+
+# CHECK: vdf %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x20 0xb2
+
+# CHECK: vdf %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x20 0xb2
+
+# CHECK: vdf %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x22 0xb2
+
+# CHECK: vdf %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x24 0xb2
+
+# CHECK: vdf %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x28 0xb2
+
+# CHECK: vdf %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x26 0xb2
+
+# CHECK: vdg %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x30 0xb2
+
+# CHECK: vdg %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x30 0xb2
+
+# CHECK: vdg %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x32 0xb2
+
+# CHECK: vdg %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x34 0xb2
+
+# CHECK: vdg %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x38 0xb2
+
+# CHECK: vdg %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x36 0xb2
+
+# CHECK: vdq %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x40 0xb2
+
+# CHECK: vdq %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x40 0xb2
+
+# CHECK: vdq %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x42 0xb2
+
+# CHECK: vdq %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x44 0xb2
+
+# CHECK: vdq %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x48 0xb2
+
+# CHECK: vdq %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x46 0xb2
+
+# CHECK: vdl %v0, %v0, %v0, 0, 0
+0xe7 0x00 0x00 0x00 0x00 0xb0
+
+# CHECK: vdl %v0, %v0, %v0, 0, 15
+0xe7 0x00 0x00 0x0f 0x00 0xb0
+
+# CHECK: vdl %v0, %v0, %v0, 15, 0
+0xe7 0x00 0x00 0x00 0xf0 0xb0
+
+# CHECK: vdl %v0, %v0, %v31, 0, 0
+0xe7 0x00 0xf0 0x00 0x02 0xb0
+
+# CHECK: vdl %v0, %v31, %v0, 0, 0
+0xe7 0x0f 0x00 0x00 0x04 0xb0
+
+# CHECK: vdl %v31, %v0, %v0, 0, 0
+0xe7 0xf0 0x00 0x00 0x08 0xb0
+
+# CHECK: vdl %v13, %v17, %v21, 8, 4
+0xe7 0xd1 0x50 0x04 0x86 0xb0
+
+# CHECK: vdlf %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x20 0xb0
+
+# CHECK: vdlf %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x20 0xb0
+
+# CHECK: vdlf %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x22 0xb0
+
+# CHECK: vdlf %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x24 0xb0
+
+# CHECK: vdlf %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x28 0xb0
+
+# CHECK: vdlf %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x26 0xb0
+
+# CHECK: vdlg %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x30 0xb0
+
+# CHECK: vdlg %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x30 0xb0
+
+# CHECK: vdlg %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x32 0xb0
+
+# CHECK: vdlg %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x34 0xb0
+
+# CHECK: vdlg %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x38 0xb0
+
+# CHECK: vdlg %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x36 0xb0
+
+# CHECK: vdlq %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x40 0xb0
+
+# CHECK: vdlq %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x40 0xb0
+
+# CHECK: vdlq %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x42 0xb0
+
+# CHECK: vdlq %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x44 0xb0
+
+# CHECK: vdlq %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x48 0xb0
+
+# CHECK: vdlq %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x46 0xb0
+
+# CHECK: veval %v0, %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x00 0x88
+
+# CHECK: veval %v0, %v0, %v0, %v0, 255
+0xe7 0x00 0x00 0xff 0x00 0x88
+
+# CHECK: veval %v0, %v0, %v0, %v15, 0
+0xe7 0x00 0x00 0x00 0xf0 0x88
+
+# CHECK: veval %v0, %v0, %v0, %v31, 0
+0xe7 0x00 0x00 0x00 0xf1 0x88
+
+# CHECK: veval %v0, %v0, %v15, %v0, 0
+0xe7 0x00 0xf0 0x00 0x00 0x88
+
+# CHECK: veval %v0, %v0, %v31, %v0, 0
+0xe7 0x00 0xf0 0x00 0x02 0x88
+
+# CHECK: veval %v0, %v15, %v0, %v0, 0
+0xe7 0x0f 0x00 0x00 0x00 0x88
+
+# CHECK: veval %v0, %v31, %v0, %v0, 0
+0xe7 0x0f 0x00 0x00 0x04 0x88
+
+# CHECK: veval %v15, %v0, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x00 0x88
+
+# CHECK: veval %v31, %v0, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x08 0x88
+
+# CHECK: veval %v18, %v3, %v20, %v5, 4
+0xe7 0x23 0x40 0x04 0x5a 0x88
+
+# CHECK: vecq %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xdb
+
+# CHECK: vecq %v0, %v15
+0xe7 0x0f 0x00 0x00 0x40 0xdb
+
+# CHECK: vecq %v0, %v31
+0xe7 0x0f 0x00 0x00 0x44 0xdb
+
+# CHECK: vecq %v15, %v0
+0xe7 0xf0 0x00 0x00 0x40 0xdb
+
+# CHECK: vecq %v31, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xdb
+
+# CHECK: vecq %v14, %v17
+0xe7 0xe1 0x00 0x00 0x44 0xdb
+
+# CHECK: veclq %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xd9
+
+# CHECK: veclq %v0, %v15
+0xe7 0x0f 0x00 0x00 0x40 0xd9
+
+# CHECK: veclq %v0, %v31
+0xe7 0x0f 0x00 0x00 0x44 0xd9
+
+# CHECK: veclq %v15, %v0
+0xe7 0xf0 0x00 0x00 0x40 0xd9
+
+# CHECK: veclq %v31, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xd9
+
+# CHECK: veclq %v14, %v17
+0xe7 0xe1 0x00 0x00 0x44 0xd9
+
+# CHECK: vgemb %v0, %v0
+0xe7 0x00 0x00 0x00 0x00 0x54
+
+# CHECK: vgem %v0, %v0, 15
+0xe7 0x00 0x00 0x00 0xf0 0x54
+
+# CHECK: vgemb %v0, %v15
+0xe7 0x0f 0x00 0x00 0x00 0x54
+
+# CHECK: vgemb %v0, %v31
+0xe7 0x0f 0x00 0x00 0x04 0x54
+
+# CHECK: vgemb %v15, %v0
+0xe7 0xf0 0x00 0x00 0x00 0x54
+
+# CHECK: vgemb %v31, %v0
+0xe7 0xf0 0x00 0x00 0x08 0x54
+
+# CHECK: vgem %v14, %v17, 11
+0xe7 0xe1 0x00 0x00 0xb4 0x54
+
+# CHECK: vgemb %v0, %v0
+0xe7 0x00 0x00 0x00 0x00 0x54
+
+# CHECK: vgemb %v0, %v15
+0xe7 0x0f 0x00 0x00 0x00 0x54
+
+# CHECK: vgemb %v0, %v31
+0xe7 0x0f 0x00 0x00 0x04 0x54
+
+# CHECK: vgemb %v15, %v0
+0xe7 0xf0 0x00 0x00 0x00 0x54
+
+# CHECK: vgemb %v31, %v0
+0xe7 0xf0 0x00 0x00 0x08 0x54
+
+# CHECK: vgemb %v14, %v17
+0xe7 0xe1 0x00 0x00 0x04 0x54
+
+# CHECK: vgemh %v0, %v0
+0xe7 0x00 0x00 0x00 0x10 0x54
+
+# CHECK: vgemh %v0, %v15
+0xe7 0x0f 0x00 0x00 0x10 0x54
+
+# CHECK: vgemh %v0, %v31
+0xe7 0x0f 0x00 0x00 0x14 0x54
+
+# CHECK: vgemh %v15, %v0
+0xe7 0xf0 0x00 0x00 0x10 0x54
+
+# CHECK: vgemh %v31, %v0
+0xe7 0xf0 0x00 0x00 0x18 0x54
+
+# CHECK: vgemh %v14, %v17
+0xe7 0xe1 0x00 0x00 0x14 0x54
+
+# CHECK: vgemf %v0, %v0
+0xe7 0x00 0x00 0x00 0x20 0x54
+
+# CHECK: vgemf %v0, %v15
+0xe7 0x0f 0x00 0x00 0x20 0x54
+
+# CHECK: vgemf %v0, %v31
+0xe7 0x0f 0x00 0x00 0x24 0x54
+
+# CHECK: vgemf %v15, %v0
+0xe7 0xf0 0x00 0x00 0x20 0x54
+
+# CHECK: vgemf %v31, %v0
+0xe7 0xf0 0x00 0x00 0x28 0x54
+
+# CHECK: vgemf %v14, %v17
+0xe7 0xe1 0x00 0x00 0x24 0x54
+
+# CHECK: vgemg %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0x54
+
+# CHECK: vgemg %v0, %v15
+0xe7 0x0f 0x00 0x00 0x30 0x54
+
+# CHECK: vgemg %v0, %v31
+0xe7 0x0f 0x00 0x00 0x34 0x54
+
+# CHECK: vgemg %v15, %v0
+0xe7 0xf0 0x00 0x00 0x30 0x54
+
+# CHECK: vgemg %v31, %v0
+0xe7 0xf0 0x00 0x00 0x38 0x54
+
+# CHECK: vgemg %v14, %v17
+0xe7 0xe1 0x00 0x00 0x34 0x54
+
+# CHECK: vgemq %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0x54
+
+# CHECK: vgemq %v0, %v15
+0xe7 0x0f 0x00 0x00 0x40 0x54
+
+# CHECK: vgemq %v0, %v31
+0xe7 0x0f 0x00 0x00 0x44 0x54
+
+# CHECK: vgemq %v15, %v0
+0xe7 0xf0 0x00 0x00 0x40 0x54
+
+# CHECK: vgemq %v31, %v0
+0xe7 0xf0 0x00 0x00 0x48 0x54
+
+# CHECK: vgemq %v14, %v17
+0xe7 0xe1 0x00 0x00 0x44 0x54
+
+# CHECK: vlcq %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xde
+
+# CHECK: vlcq %v0, %v15
+0xe7 0x0f 0x00 0x00 0x40 0xde
+
+# CHECK: vlcq %v0, %v31
+0xe7 0x0f 0x00 0x00 0x44 0xde
+
+# CHECK: vlcq %v15, %v0
+0xe7 0xf0 0x00 0x00 0x40 0xde
+
+# CHECK: vlcq %v31, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xde
+
+# CHECK: vlcq %v14, %v17
+0xe7 0xe1 0x00 0x00 0x44 0xde
+
+# CHECK: vlpq %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xdf
+
+# CHECK: vlpq %v0, %v15
+0xe7 0x0f 0x00 0x00 0x40 0xdf
+
+# CHECK: vlpq %v0, %v31
+0xe7 0x0f 0x00 0x00 0x44 0xdf
+
+# CHECK: vlpq %v15, %v0
+0xe7 0xf0 0x00 0x00 0x40 0xdf
+
+# CHECK: vlpq %v31, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xdf
+
+# CHECK: vlpq %v14, %v17
+0xe7 0xe1 0x00 0x00 0x44 0xdf
+
+# CHECK: vmalg %v0, %v0, %v0, %v0
+0xe7 0x00 0x03 0x00 0x00 0xaa
+
+# CHECK: vmalg %v0, %v0, %v0, %v31
+0xe7 0x00 0x03 0x00 0xf1 0xaa
+
+# CHECK: vmalg %v0, %v0, %v31, %v0
+0xe7 0x00 0xf3 0x00 0x02 0xaa
+
+# CHECK: vmalg %v0, %v31, %v0, %v0
+0xe7 0x0f 0x03 0x00 0x04 0xaa
+
+# CHECK: vmalg %v31, %v0, %v0, %v0
+0xe7 0xf0 0x03 0x00 0x08 0xaa
+
+# CHECK: vmalg %v13, %v17, %v21, %v25
+0xe7 0xd1 0x53 0x00 0x97 0xaa
+
+# CHECK: vmalq %v0, %v0, %v0, %v0
+0xe7 0x00 0x04 0x00 0x00 0xaa
+
+# CHECK: vmalq %v0, %v0, %v0, %v31
+0xe7 0x00 0x04 0x00 0xf1 0xaa
+
+# CHECK: vmalq %v0, %v0, %v31, %v0
+0xe7 0x00 0xf4 0x00 0x02 0xaa
+
+# CHECK: vmalq %v0, %v31, %v0, %v0
+0xe7 0x0f 0x04 0x00 0x04 0xaa
+
+# CHECK: vmalq %v31, %v0, %v0, %v0
+0xe7 0xf0 0x04 0x00 0x08 0xaa
+
+# CHECK: vmalq %v13, %v17, %v21, %v25
+0xe7 0xd1 0x54 0x00 0x97 0xaa
+
+# CHECK: vmahg %v0, %v0, %v0, %v0
+0xe7 0x00 0x03 0x00 0x00 0xab
+
+# CHECK: vmahg %v0, %v0, %v0, %v31
+0xe7 0x00 0x03 0x00 0xf1 0xab
+
+# CHECK: vmahg %v0, %v0, %v31, %v0
+0xe7 0x00 0xf3 0x00 0x02 0xab
+
+# CHECK: vmahg %v0, %v31, %v0, %v0
+0xe7 0x0f 0x03 0x00 0x04 0xab
+
+# CHECK: vmahg %v31, %v0, %v0, %v0
+0xe7 0xf0 0x03 0x00 0x08 0xab
+
+# CHECK: vmahg %v13, %v17, %v21, %v25
+0xe7 0xd1 0x53 0x00 0x97 0xab
+
+# CHECK: vmahq %v0, %v0, %v0, %v0
+0xe7 0x00 0x04 0x00 0x00 0xab
+
+# CHECK: vmahq %v0, %v0, %v0, %v31
+0xe7 0x00 0x04 0x00 0xf1 0xab
+
+# CHECK: vmahq %v0, %v0, %v31, %v0
+0xe7 0x00 0xf4 0x00 0x02 0xab
+
+# CHECK: vmahq %v0, %v31, %v0, %v0
+0xe7 0x0f 0x04 0x00 0x04 0xab
+
+# CHECK: vmahq %v31, %v0, %v0, %v0
+0xe7 0xf0 0x04 0x00 0x08 0xab
+
+# CHECK: vmahq %v13, %v17, %v21, %v25
+0xe7 0xd1 0x54 0x00 0x97 0xab
+
+# CHECK: vmalhg %v0, %v0, %v0, %v0
+0xe7 0x00 0x03 0x00 0x00 0xa9
+
+# CHECK: vmalhg %v0, %v0, %v0, %v31
+0xe7 0x00 0x03 0x00 0xf1 0xa9
+
+# CHECK: vmalhg %v0, %v0, %v31, %v0
+0xe7 0x00 0xf3 0x00 0x02 0xa9
+
+# CHECK: vmalhg %v0, %v31, %v0, %v0
+0xe7 0x0f 0x03 0x00 0x04 0xa9
+
+# CHECK: vmalhg %v31, %v0, %v0, %v0
+0xe7 0xf0 0x03 0x00 0x08 0xa9
+
+# CHECK: vmalhg %v13, %v17, %v21, %v25
+0xe7 0xd1 0x53 0x00 0x97 0xa9
+
+# CHECK: vmalhq %v0, %v0, %v0, %v0
+0xe7 0x00 0x04 0x00 0x00 0xa9
+
+# CHECK: vmalhq %v0, %v0, %v0, %v31
+0xe7 0x00 0x04 0x00 0xf1 0xa9
+
+# CHECK: vmalhq %v0, %v0, %v31, %v0
+0xe7 0x00 0xf4 0x00 0x02 0xa9
+
+# CHECK: vmalhq %v0, %v31, %v0, %v0
+0xe7 0x0f 0x04 0x00 0x04 0xa9
+
+# CHECK: vmalhq %v31, %v0, %v0, %v0
+0xe7 0xf0 0x04 0x00 0x08 0xa9
+
+# CHECK: vmalhq %v13, %v17, %v21, %v25
+0xe7 0xd1 0x54 0x00 0x97 0xa9
+
+# CHECK: vmaeg %v0, %v0, %v0, %v0
+0xe7 0x00 0x03 0x00 0x00 0xae
+
+# CHECK: vmaeg %v0, %v0, %v0, %v31
+0xe7 0x00 0x03 0x00 0xf1 0xae
+
+# CHECK: vmaeg %v0, %v0, %v31, %v0
+0xe7 0x00 0xf3 0x00 0x02 0xae
+
+# CHECK: vmaeg %v0, %v31, %v0, %v0
+0xe7 0x0f 0x03 0x00 0x04 0xae
+
+# CHECK: vmaeg %v31, %v0, %v0, %v0
+0xe7 0xf0 0x03 0x00 0x08 0xae
+
+# CHECK: vmaeg %v13, %v17, %v21, %v25
+0xe7 0xd1 0x53 0x00 0x97 0xae
+
+# CHECK: vmaleg %v0, %v0, %v0, %v0
+0xe7 0x00 0x03 0x00 0x00 0xac
+
+# CHECK: vmaleg %v0, %v0, %v0, %v31
+0xe7 0x00 0x03 0x00 0xf1 0xac
+
+# CHECK: vmaleg %v0, %v0, %v31, %v0
+0xe7 0x00 0xf3 0x00 0x02 0xac
+
+# CHECK: vmaleg %v0, %v31, %v0, %v0
+0xe7 0x0f 0x03 0x00 0x04 0xac
+
+# CHECK: vmaleg %v31, %v0, %v0, %v0
+0xe7 0xf0 0x03 0x00 0x08 0xac
+
+# CHECK: vmaleg %v13, %v17, %v21, %v25
+0xe7 0xd1 0x53 0x00 0x97 0xac
+
+# CHECK: vmaog %v0, %v0, %v0, %v0
+0xe7 0x00 0x03 0x00 0x00 0xaf
+
+# CHECK: vmaog %v0, %v0, %v0, %v31
+0xe7 0x00 0x03 0x00 0xf1 0xaf
+
+# CHECK: vmaog %v0, %v0, %v31, %v0
+0xe7 0x00 0xf3 0x00 0x02 0xaf
+
+# CHECK: vmaog %v0, %v31, %v0, %v0
+0xe7 0x0f 0x03 0x00 0x04 0xaf
+
+# CHECK: vmaog %v31, %v0, %v0, %v0
+0xe7 0xf0 0x03 0x00 0x08 0xaf
+
+# CHECK: vmaog %v13, %v17, %v21, %v25
+0xe7 0xd1 0x53 0x00 0x97 0xaf
+
+# CHECK: vmalog %v0, %v0, %v0, %v0
+0xe7 0x00 0x03 0x00 0x00 0xad
+
+# CHECK: vmalog %v0, %v0, %v0, %v31
+0xe7 0x00 0x03 0x00 0xf1 0xad
+
+# CHECK: vmalog %v0, %v0, %v31, %v0
+0xe7 0x00 0xf3 0x00 0x02 0xad
+
+# CHECK: vmalog %v0, %v31, %v0, %v0
+0xe7 0x0f 0x03 0x00 0x04 0xad
+
+# CHECK: vmalog %v31, %v0, %v0, %v0
+0xe7 0xf0 0x03 0x00 0x08 0xad
+
+# CHECK: vmalog %v13, %v17, %v21, %v25
+0xe7 0xd1 0x53 0x00 0x97 0xad
+
+# CHECK: vmlg %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xa2
+
+# CHECK: vmlg %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x32 0xa2
+
+# CHECK: vmlg %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x34 0xa2
+
+# CHECK: vmlg %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xa2
+
+# CHECK: vmlg %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x3a 0xa2
+
+# CHECK: vmlq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xa2
+
+# CHECK: vmlq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xa2
+
+# CHECK: vmlq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xa2
+
+# CHECK: vmlq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xa2
+
+# CHECK: vmlq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xa2
+
+# CHECK: vmhg %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xa3
+
+# CHECK: vmhg %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x32 0xa3
+
+# CHECK: vmhg %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x34 0xa3
+
+# CHECK: vmhg %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xa3
+
+# CHECK: vmhg %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x3a 0xa3
+
+# CHECK: vmhq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xa3
+
+# CHECK: vmhq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xa3
+
+# CHECK: vmhq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xa3
+
+# CHECK: vmhq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xa3
+
+# CHECK: vmhq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xa3
+
+# CHECK: vmlhg %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xa1
+
+# CHECK: vmlhg %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x32 0xa1
+
+# CHECK: vmlhg %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x34 0xa1
+
+# CHECK: vmlhg %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xa1
+
+# CHECK: vmlhg %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x3a 0xa1
+
+# CHECK: vmlhq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xa1
+
+# CHECK: vmlhq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xa1
+
+# CHECK: vmlhq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xa1
+
+# CHECK: vmlhq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xa1
+
+# CHECK: vmlhq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xa1
+
+# CHECK: vmeg %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xa6
+
+# CHECK: vmeg %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x32 0xa6
+
+# CHECK: vmeg %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x34 0xa6
+
+# CHECK: vmeg %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xa6
+
+# CHECK: vmeg %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x3a 0xa6
+
+# CHECK: vmleg %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xa4
+
+# CHECK: vmleg %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x32 0xa4
+
+# CHECK: vmleg %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x34 0xa4
+
+# CHECK: vmleg %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xa4
+
+# CHECK: vmleg %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x3a 0xa4
+
+# CHECK: vmog %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xa7
+
+# CHECK: vmog %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x32 0xa7
+
+# CHECK: vmog %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x34 0xa7
+
+# CHECK: vmog %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xa7
+
+# CHECK: vmog %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x3a 0xa7
+
+# CHECK: vmlog %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xa5
+
+# CHECK: vmlog %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x32 0xa5
+
+# CHECK: vmlog %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x34 0xa5
+
+# CHECK: vmlog %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xa5
+
+# CHECK: vmlog %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x3a 0xa5
+
+# CHECK: vmnq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xfe
+
+# CHECK: vmnq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xfe
+
+# CHECK: vmnq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xfe
+
+# CHECK: vmnq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xfe
+
+# CHECK: vmnq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xfe
+
+# CHECK: vmnlq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xfc
+
+# CHECK: vmnlq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xfc
+
+# CHECK: vmnlq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xfc
+
+# CHECK: vmnlq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xfc
+
+# CHECK: vmnlq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xfc
+
+# CHECK: vmxq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xff
+
+# CHECK: vmxq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xff
+
+# CHECK: vmxq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xff
+
+# CHECK: vmxq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xff
+
+# CHECK: vmxq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xff
+
+# CHECK: vmxlq %v0, %v0, %v0
+0xe7 0x00 0x00 0x00 0x40 0xfd
+
+# CHECK: vmxlq %v0, %v0, %v31
+0xe7 0x00 0xf0 0x00 0x42 0xfd
+
+# CHECK: vmxlq %v0, %v31, %v0
+0xe7 0x0f 0x00 0x00 0x44 0xfd
+
+# CHECK: vmxlq %v31, %v0, %v0
+0xe7 0xf0 0x00 0x00 0x48 0xfd
+
+# CHECK: vmxlq %v18, %v3, %v20
+0xe7 0x23 0x40 0x00 0x4a 0xfd
+
+# CHECK: vr %v0, %v0, %v0, 0, 0
+0xe7 0x00 0x00 0x00 0x00 0xb3
+
+# CHECK: vr %v0, %v0, %v0, 0, 15
+0xe7 0x00 0x00 0x0f 0x00 0xb3
+
+# CHECK: vr %v0, %v0, %v0, 15, 0
+0xe7 0x00 0x00 0x00 0xf0 0xb3
+
+# CHECK: vr %v0, %v0, %v31, 0, 0
+0xe7 0x00 0xf0 0x00 0x02 0xb3
+
+# CHECK: vr %v0, %v31, %v0, 0, 0
+0xe7 0x0f 0x00 0x00 0x04 0xb3
+
+# CHECK: vr %v31, %v0, %v0, 0, 0
+0xe7 0xf0 0x00 0x00 0x08 0xb3
+
+# CHECK: vr %v13, %v17, %v21, 8, 4
+0xe7 0xd1 0x50 0x04 0x86 0xb3
+
+# CHECK: vrf %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x20 0xb3
+
+# CHECK: vrf %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x20 0xb3
+
+# CHECK: vrf %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x22 0xb3
+
+# CHECK: vrf %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x24 0xb3
+
+# CHECK: vrf %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x28 0xb3
+
+# CHECK: vrf %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x26 0xb3
+
+# CHECK: vrg %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x30 0xb3
+
+# CHECK: vrg %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x30 0xb3
+
+# CHECK: vrg %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x32 0xb3
+
+# CHECK: vrg %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x34 0xb3
+
+# CHECK: vrg %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x38 0xb3
+
+# CHECK: vrg %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x36 0xb3
+
+# CHECK: vrq %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x40 0xb3
+
+# CHECK: vrq %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x40 0xb3
+
+# CHECK: vrq %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x42 0xb3
+
+# CHECK: vrq %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x44 0xb3
+
+# CHECK: vrq %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x48 0xb3
+
+# CHECK: vrq %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x46 0xb3
+
+# CHECK: vrl %v0, %v0, %v0, 0, 0
+0xe7 0x00 0x00 0x00 0x00 0xb1
+
+# CHECK: vrl %v0, %v0, %v0, 0, 15
+0xe7 0x00 0x00 0x0f 0x00 0xb1
+
+# CHECK: vrl %v0, %v0, %v0, 15, 0
+0xe7 0x00 0x00 0x00 0xf0 0xb1
+
+# CHECK: vrl %v0, %v0, %v31, 0, 0
+0xe7 0x00 0xf0 0x00 0x02 0xb1
+
+# CHECK: vrl %v0, %v31, %v0, 0, 0
+0xe7 0x0f 0x00 0x00 0x04 0xb1
+
+# CHECK: vrl %v31, %v0, %v0, 0, 0
+0xe7 0xf0 0x00 0x00 0x08 0xb1
+
+# CHECK: vrl %v13, %v17, %v21, 8, 4
+0xe7 0xd1 0x50 0x04 0x86 0xb1
+
+# CHECK: vrlf %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x20 0xb1
+
+# CHECK: vrlf %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x20 0xb1
+
+# CHECK: vrlf %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x22 0xb1
+
+# CHECK: vrlf %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x24 0xb1
+
+# CHECK: vrlf %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x28 0xb1
+
+# CHECK: vrlf %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x26 0xb1
+
+# CHECK: vrlg %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x30 0xb1
+
+# CHECK: vrlg %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x30 0xb1
+
+# CHECK: vrlg %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x32 0xb1
+
+# CHECK: vrlg %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x34 0xb1
+
+# CHECK: vrlg %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x38 0xb1
+
+# CHECK: vrlg %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x36 0xb1
+
+# CHECK: vrlq %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x40 0xb1
+
+# CHECK: vrlq %v0, %v0, %v0, 15
+0xe7 0x00 0x00 0x0f 0x40 0xb1
+
+# CHECK: vrlq %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x42 0xb1
+
+# CHECK: vrlq %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x44 0xb1
+
+# CHECK: vrlq %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x48 0xb1
+
+# CHECK: vrlq %v13, %v17, %v21, 4
+0xe7 0xd1 0x50 0x04 0x46 0xb1
+
+# CHECK: vtp %v0
+0xe6 0x00 0x00 0x00 0x00 0x5f
+
+# CHECK: vtp %v0, 65535
+0xe6 0x00 0x0f 0xff 0xf0 0x5f
+
+# CHECK: vtp %v15, 4660
+0xe6 0x0f 0x01 0x23 0x40 0x5f
+
+# CHECK: vtz %v0, %v0, 0
+0xe6 0x00 0x00 0x00 0x00 0x7f
+
+# CHECK: vtz %v0, %v0, 65535
+0xe6 0x00 0x0f 0xff 0xf0 0x7f
+
+# CHECK: vtz %v0, %v31, 0
+0xe6 0x00 0xf0 0x00 0x02 0x7f
+
+# CHECK: vtz %v31, %v0, 0
+0xe6 0x0f 0x00 0x00 0x04 0x7f
+
+# CHECK: vtz %v13, %v17, 4660
+0xe6 0x0d 0x11 0x23 0x42 0x7f
+
+# CHECK: vuphg %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xd7
+
+# CHECK: vuphg %v0, %v15
+0xe7 0x0f 0x00 0x00 0x30 0xd7
+
+# CHECK: vuphg %v0, %v31
+0xe7 0x0f 0x00 0x00 0x34 0xd7
+
+# CHECK: vuphg %v15, %v0
+0xe7 0xf0 0x00 0x00 0x30 0xd7
+
+# CHECK: vuphg %v31, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xd7
+
+# CHECK: vuphg %v14, %v17
+0xe7 0xe1 0x00 0x00 0x34 0xd7
+
+# CHECK: vuplg %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xd6
+
+# CHECK: vuplg %v0, %v15
+0xe7 0x0f 0x00 0x00 0x30 0xd6
+
+# CHECK: vuplg %v0, %v31
+0xe7 0x0f 0x00 0x00 0x34 0xd6
+
+# CHECK: vuplg %v15, %v0
+0xe7 0xf0 0x00 0x00 0x30 0xd6
+
+# CHECK: vuplg %v31, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xd6
+
+# CHECK: vuplg %v14, %v17
+0xe7 0xe1 0x00 0x00 0x34 0xd6
+
+# CHECK: vuplhg %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xd5
+
+# CHECK: vuplhg %v0, %v15
+0xe7 0x0f 0x00 0x00 0x30 0xd5
+
+# CHECK: vuplhg %v0, %v31
+0xe7 0x0f 0x00 0x00 0x34 0xd5
+
+# CHECK: vuplhg %v15, %v0
+0xe7 0xf0 0x00 0x00 0x30 0xd5
+
+# CHECK: vuplhg %v31, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xd5
+
+# CHECK: vuplhg %v14, %v17
+0xe7 0xe1 0x00 0x00 0x34 0xd5
+
+# CHECK: vupllg %v0, %v0
+0xe7 0x00 0x00 0x00 0x30 0xd4
+
+# CHECK: vupllg %v0, %v15
+0xe7 0x0f 0x00 0x00 0x30 0xd4
+
+# CHECK: vupllg %v0, %v31
+0xe7 0x0f 0x00 0x00 0x34 0xd4
+
+# CHECK: vupllg %v15, %v0
+0xe7 0xf0 0x00 0x00 0x30 0xd4
+
+# CHECK: vupllg %v31, %v0
+0xe7 0xf0 0x00 0x00 0x38 0xd4
+
+# CHECK: vupllg %v14, %v17
+0xe7 0xe1 0x00 0x00 0x34 0xd4
+
diff --git a/llvm/test/MC/SystemZ/insn-bad-arch15.s b/llvm/test/MC/SystemZ/insn-bad-arch15.s
new file mode 100644
index 00000000000000..915efbc9423066
--- /dev/null
+++ b/llvm/test/MC/SystemZ/insn-bad-arch15.s
@@ -0,0 +1,360 @@
+# For arch15 only.
+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=arch15 < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid use of indexed addressing
+#CHECK: cal %r2, 160(%r1,%r15), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: cal %r2, -1(%r1), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: cal %r2, 4096(%r1), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: cal %r2, 0(%r1), -1(%r15)
+#CHECK: error: invalid operand
+#CHECK: cal %r2, 0(%r1), 4096(%r15)
+
+ cal %r2, 160(%r1,%r15), 160(%r15)
+ cal %r2, -1(%r1), 160(%r15)
+ cal %r2, 4096(%r1), 160(%r15)
+ cal %r2, 0(%r1), -1(%r15)
+ cal %r2, 0(%r1), 4096(%r15)
+
+#CHECK: error: invalid use of indexed addressing
+#CHECK: calg %r2, 160(%r1,%r15), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: calg %r2, -1(%r1), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: calg %r2, 4096(%r1), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: calg %r2, 0(%r1), -1(%r15)
+#CHECK: error: invalid operand
+#CHECK: calg %r2, 0(%r1), 4096(%r15)
+
+ calg %r2, 160(%r1,%r15), 160(%r15)
+ calg %r2, -1(%r1), 160(%r15)
+ calg %r2, 4096(%r1), 160(%r15)
+ calg %r2, 0(%r1), -1(%r15)
+ calg %r2, 0(%r1), 4096(%r15)
+
+#CHECK: error: invalid use of indexed addressing
+#CHECK: calgf %r2, 160(%r1,%r15), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: calgf %r2, -1(%r1), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: calgf %r2, 4096(%r1), 160(%r15)
+#CHECK: error: invalid operand
+#CHECK: calgf %r2, 0(%r1), -1(%r15)
+#CHECK: error: invalid operand
+#CHECK: calgf %r2, 0(%r1), 4096(%r15)
+
+ calgf %r2, 160(%r1,%r15), 160(%r15)
+ calgf %r2, -1(%r1), 160(%r15)
+ calgf %r2, 4096(%r1), 160(%r15)
+ calgf %r2, 0(%r1), -1(%r15)
+ calgf %r2, 0(%r1), 4096(%r15)
+
+#CHECK: error: invalid operand
+#CHECK: kimd %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: kimd %r0, %r0, 16
+
+ kimd %r0, %r0, -1
+ kimd %r0, %r0, 16
+
+#CHECK: error: invalid operand
+#CHECK: klmd %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: klmd %r0, %r0, 16
+
+ klmd %r0, %r0, -1
+ klmd %r0, %r0, 16
+
+#CHECK: error: invalid operand
+#CHECK: lxab %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lxab %r0, 524288
+
+ lxab %r0, -524289
+ lxab %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lxah %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lxah %r0, 524288
+
+ lxah %r0, -524289
+ lxah %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lxaf %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lxaf %r0, 524288
+
+ lxaf %r0, -524289
+ lxaf %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lxag %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lxag %r0, 524288
+
+ lxag %r0, -524289
+ lxag %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: lxaq %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: lxaq %r0, 524288
+
+ lxaq %r0, -524289
+ lxaq %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llxab %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llxab %r0, 524288
+
+ llxab %r0, -524289
+ llxab %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llxah %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llxah %r0, 524288
+
+ llxah %r0, -524289
+ llxah %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llxaf %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llxaf %r0, 524288
+
+ llxaf %r0, -524289
+ llxaf %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llxag %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llxag %r0, 524288
+
+ llxag %r0, -524289
+ llxag %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: llxaq %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: llxaq %r0, 524288
+
+ llxaq %r0, -524289
+ llxaq %r0, 524288
+
+#CHECK: error: invalid operand
+#CHECK: pfcr %r0, %r0, -524289
+#CHECK: error: invalid operand
+#CHECK: pfcr %r0, %r0, 524288
+#CHECK: error: invalid use of indexed addressing
+#CHECK: pfcr %r0, %r0, 0(%r1,%r2)
+
+ pfcr %r0, %r0, -524289
+ pfcr %r0, %r0, 524288
+ pfcr %r0, %r0, 0(%r1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vcvbq %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vcvbq %v0, %v0, 16
+
+ vcvbq %v0, %v0, -1
+ vcvbq %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vcvdq %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcvdq %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vcvdq %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vcvdq %v0, %v0, 256, 0
+
+ vcvdq %v0, %v0, 0, -1
+ vcvdq %v0, %v0, 0, 16
+ vcvdq %v0, %v0, -1, 0
+ vcvdq %v0, %v0, 256, 0
+
+#CHECK: error: invalid operand
+#CHECK: vd %v0, %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vd %v0, %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vd %v0, %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vd %v0, %v0, %v0, 16, 0
+
+ vd %v0, %v0, %v0, 0, -1
+ vd %v0, %v0, %v0, 0, 16
+ vd %v0, %v0, %v0, -1, 0
+ vd %v0, %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: vdf %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vdf %v0, %v0, %v0, 16
+
+ vdf %v0, %v0, %v0, -1
+ vdf %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vdg %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vdg %v0, %v0, %v0, 16
+
+ vdg %v0, %v0, %v0, -1
+ vdg %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vdq %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vdq %v0, %v0, %v0, 16
+
+ vdq %v0, %v0, %v0, -1
+ vdq %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vdl %v0, %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vdl %v0, %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vdl %v0, %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vdl %v0, %v0, %v0, 16, 0
+
+ vdl %v0, %v0, %v0, 0, -1
+ vdl %v0, %v0, %v0, 0, 16
+ vdl %v0, %v0, %v0, -1, 0
+ vdl %v0, %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: vdlf %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vdlf %v0, %v0, %v0, 16
+
+ vdlf %v0, %v0, %v0, -1
+ vdlf %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vdlg %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vdlg %v0, %v0, %v0, 16
+
+ vdlg %v0, %v0, %v0, -1
+ vdlg %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vdlq %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vdlq %v0, %v0, %v0, 16
+
+ vdlq %v0, %v0, %v0, -1
+ vdlq %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: veval %v0, %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: veval %v0, %v0, %v0, %v0, 256
+
+ veval %v0, %v0, %v0, %v0, -1
+ veval %v0, %v0, %v0, %v0, 256
+
+#CHECK: error: invalid operand
+#CHECK: vr %v0, %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vr %v0, %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vr %v0, %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vr %v0, %v0, %v0, 16, 0
+
+ vr %v0, %v0, %v0, 0, -1
+ vr %v0, %v0, %v0, 0, 16
+ vr %v0, %v0, %v0, -1, 0
+ vr %v0, %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: vrf %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vrf %v0, %v0, %v0, 16
+
+ vrf %v0, %v0, %v0, -1
+ vrf %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vrg %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vrg %v0, %v0, %v0, 16
+
+ vrg %v0, %v0, %v0, -1
+ vrg %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vrq %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vrq %v0, %v0, %v0, 16
+
+ vrq %v0, %v0, %v0, -1
+ vrq %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vrl %v0, %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vrl %v0, %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vrl %v0, %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vrl %v0, %v0, %v0, 16, 0
+
+ vrl %v0, %v0, %v0, 0, -1
+ vrl %v0, %v0, %v0, 0, 16
+ vrl %v0, %v0, %v0, -1, 0
+ vrl %v0, %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: vrlf %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vrlf %v0, %v0, %v0, 16
+
+ vrlf %v0, %v0, %v0, -1
+ vrlf %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vrlg %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vrlg %v0, %v0, %v0, 16
+
+ vrlg %v0, %v0, %v0, -1
+ vrlg %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vrlq %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vrlq %v0, %v0, %v0, 16
+
+ vrlq %v0, %v0, %v0, -1
+ vrlq %v0, %v0, %v0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vtp %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vtp %v0, 65536
+
+ vtp %v0, -1
+ vtp %v0, 65536
+
+#CHECK: error: invalid operand
+#CHECK: vtz %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vtz %v0, %v0, 65536
+
+ vtz %v0, %v0, -1
+ vtz %v0, %v0, 65536
+
diff --git a/llvm/test/MC/SystemZ/insn-bad-z16.s b/llvm/test/MC/SystemZ/insn-bad-z16.s
index db69c202622796..c3668c6d08a4d8 100644
--- a/llvm/test/MC/SystemZ/insn-bad-z16.s
+++ b/llvm/test/MC/SystemZ/insn-bad-z16.s
@@ -2,6 +2,51 @@
# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=z16 < %s 2> %t
# RUN: FileCheck < %t %s
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: bdepg %r0, %r0, %r0
+
+ bdepg %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: bextg %r0, %r0, %r0
+
+ bextg %r0, %r0, %r0
+
+#CHECK: error: instruction requires: concurrent-functions
+#CHECK: cal %r0, 0, 0
+
+ cal %r0, 0, 0
+
+#CHECK: error: instruction requires: concurrent-functions
+#CHECK: calg %r0, 0, 0
+
+ calg %r0, 0, 0
+
+#CHECK: error: instruction requires: concurrent-functions
+#CHECK: calgf %r0, 0, 0
+
+ calgf %r0, 0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: clzg %r0, %r0
+
+ clzg %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: ctzg %r0, %r0
+
+ ctzg %r0, %r0
+
+#CHECK: error: instruction requires: message-security-assist-extension12
+#CHECK: kimd %r0, %r0, 0
+
+ kimd %r0, %r0, 0
+
+#CHECK: error: instruction requires: message-security-assist-extension12
+#CHECK: klmd %r0, %r0, 0
+
+ klmd %r0, %r0, 0
+
#CHECK: error: invalid operand
#CHECK: lbear -1
#CHECK: error: invalid operand
@@ -24,6 +69,61 @@
lpswey 524288
lpswey 0(%r1,%r2)
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: lxab %r0, 0
+
+ lxab %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: lxah %r0, 0
+
+ lxah %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: lxaf %r0, 0
+
+ lxaf %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: lxag %r0, 0
+
+ lxag %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: lxaq %r0, 0
+
+ lxaq %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: llxab %r0, 0
+
+ llxab %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: llxah %r0, 0
+
+ llxah %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: llxaf %r0, 0
+
+ llxaf %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: llxag %r0, 0
+
+ llxag %r0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-4
+#CHECK: llxaq %r0, 0
+
+ llxaq %r0, 0
+
+#CHECK: error: instruction requires: concurrent-functions
+#CHECK: pfcr %r0, %r0, 0
+
+ pfcr %r0, %r0, 0
+
#CHECK: error: invalid operand
#CHECK: qpaci -1
#CHECK: error: invalid operand
@@ -54,6 +154,60 @@
stbear 4096
stbear 0(%r1,%r2)
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vavgq %v0, %v0, %v0
+
+ vavgq %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vavglq %v0, %v0, %v0
+
+ vavglq %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vblend %v0, %v0, %v0, %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vblendb %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vblendh %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vblendf %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vblendg %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vblendq %v0, %v0, %v0, %v0
+
+ vblend %v0, %v0, %v0, %v0, 0
+ vblendb %v0, %v0, %v0, %v0
+ vblendh %v0, %v0, %v0, %v0
+ vblendf %v0, %v0, %v0, %v0
+ vblendg %v0, %v0, %v0, %v0
+ vblendq %v0, %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vceqq %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vceqqs %v0, %v0, %v0
+
+ vceqq %v0, %v0, %v0
+ vceqqs %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vchq %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vchqs %v0, %v0, %v0
+
+ vchq %v0, %v0, %v0
+ vchqs %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vchlq %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vchlqs %v0, %v0, %v0
+
+ vchlq %v0, %v0, %v0
+ vchlqs %v0, %v0, %v0
+
#CHECK: error: invalid operand
#CHECK: vcfn %v0, %v0, 0, -1
#CHECK: error: invalid operand
@@ -132,6 +286,16 @@
vclzdp %v0, %v0, -1
vclzdp %v0, %v0, 16
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vclzq %v0, %v0
+
+ vclzq %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vctzq %v0, %v0
+
+ vctzq %v0, %v0
+
#CHECK: error: invalid operand
#CHECK: vcsph %v0, %v0, %v0, -1
#CHECK: error: invalid operand
@@ -140,6 +304,191 @@
vcsph %v0, %v0, %v0, -1
vcsph %v0, %v0, %v0, 16
+#CHECK: error: instruction requires: vector-packed-decimal-enhancement-3
+#CHECK: vcvbq %v0, %v0, 0
+
+ vcvbq %v0, %v0, 0
+
+#CHECK: error: instruction requires: vector-packed-decimal-enhancement-3
+#CHECK: vcvdq %v0, %v0, 0, 0
+
+ vcvdq %v0, %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vd %v0, %v0, %v0, 0, 0
+#CHECK: vdf %v0, %v0, %v0, 0
+#CHECK: vdg %v0, %v0, %v0, 0
+#CHECK: vdq %v0, %v0, %v0, 0
+
+ vd %v0, %v0, %v0, 0, 0
+ vdf %v0, %v0, %v0, 0
+ vdg %v0, %v0, %v0, 0
+ vdq %v0, %v0, %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vdl %v0, %v0, %v0, 0, 0
+#CHECK: vdlf %v0, %v0, %v0, 0
+#CHECK: vdlg %v0, %v0, %v0, 0
+#CHECK: vdlq %v0, %v0, %v0, 0
+
+ vdl %v0, %v0, %v0, 0, 0
+ vdlf %v0, %v0, %v0, 0
+ vdlg %v0, %v0, %v0, 0
+ vdlq %v0, %v0, %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: veval %v0, %v0, %v0, %v0, 0
+
+ veval %v0, %v0, %v0, %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vecq %v0, %v0
+
+ vecq %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: veclq %v0, %v0
+
+ veclq %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vgem %v0, %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vgemb %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vgemh %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vgemf %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vgemg %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vgemq %v0, %v0
+
+ vgem %v0, %v0, 0
+ vgemb %v0, %v0
+ vgemh %v0, %v0
+ vgemf %v0, %v0
+ vgemg %v0, %v0
+ vgemq %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vlcq %v0, %v0
+
+ vlcq %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vlpq %v0, %v0
+
+ vlpq %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmalg %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmalq %v0, %v0, %v0, %v0
+
+ vmalg %v0, %v0, %v0, %v0
+ vmalq %v0, %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmahg %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmahq %v0, %v0, %v0, %v0
+
+ vmahg %v0, %v0, %v0, %v0
+ vmahq %v0, %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmalhg %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmalhq %v0, %v0, %v0, %v0
+
+ vmalhg %v0, %v0, %v0, %v0
+ vmalhq %v0, %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmaeg %v0, %v0, %v0, %v0
+
+ vmaeg %v0, %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmaleg %v0, %v0, %v0, %v0
+
+ vmaleg %v0, %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmaog %v0, %v0, %v0, %v0
+
+ vmaog %v0, %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmalog %v0, %v0, %v0, %v0
+
+ vmalog %v0, %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmlg %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmlq %v0, %v0, %v0
+
+ vmlg %v0, %v0, %v0
+ vmlq %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmhg %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmhq %v0, %v0, %v0
+
+ vmhg %v0, %v0, %v0
+ vmhq %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmlhg %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmlhq %v0, %v0, %v0
+
+ vmlhg %v0, %v0, %v0
+ vmlhq %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmeg %v0, %v0, %v0
+
+ vmeg %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmleg %v0, %v0, %v0
+
+ vmleg %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmog %v0, %v0, %v0
+
+ vmog %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmlog %v0, %v0, %v0
+
+ vmlog %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmnq %v0, %v0, %v0
+
+ vmnq %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmnlq %v0, %v0, %v0
+
+ vmnlq %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmxq %v0, %v0, %v0
+
+ vmxq %v0, %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vmxlq %v0, %v0, %v0
+
+ vmxlq %v0, %v0, %v0
+
#CHECK: error: invalid operand
#CHECK: vpkzr %v0, %v0, %v0, 0, -1
#CHECK: error: invalid operand
@@ -154,6 +503,28 @@
vpkzr %v0, %v0, %v0, -1, 0
vpkzr %v0, %v0, %v0, 256, 0
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vr %v0, %v0, %v0, 0, 0
+#CHECK: vrf %v0, %v0, %v0, 0
+#CHECK: vrg %v0, %v0, %v0, 0
+#CHECK: vrq %v0, %v0, %v0, 0
+
+ vr %v0, %v0, %v0, 0, 0
+ vrf %v0, %v0, %v0, 0
+ vrg %v0, %v0, %v0, 0
+ vrq %v0, %v0, %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vrl %v0, %v0, %v0, 0, 0
+#CHECK: vrlf %v0, %v0, %v0, 0
+#CHECK: vrlg %v0, %v0, %v0, 0
+#CHECK: vrlq %v0, %v0, %v0, 0
+
+ vrl %v0, %v0, %v0, 0, 0
+ vrlf %v0, %v0, %v0, 0
+ vrlg %v0, %v0, %v0, 0
+ vrlq %v0, %v0, %v0, 0
+
#CHECK: error: invalid operand
#CHECK: vschp %v0, %v0, %v0, 0, -1
#CHECK: error: invalid operand
@@ -206,6 +577,36 @@
vsrpr %v0, %v0, %v0, -1, 0
vsrpr %v0, %v0, %v0, 256, 0
+#CHECK: error: instruction requires: vector-packed-decimal-enhancement-3
+#CHECK: vtp %v0, 0
+
+ vtp %v0, 0
+
+#CHECK: error: instruction requires: vector-packed-decimal-enhancement-3
+#CHECK: vtz %v0, %v0, 0
+
+ vtz %v0, %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vuphg %v0, %v0
+
+ vuphg %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vuplg %v0, %v0
+
+ vuplg %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vuplhg %v0, %v0
+
+ vuplhg %v0, %v0
+
+#CHECK: error: instruction requires: vector-enhancements-3
+#CHECK: vupllg %v0, %v0
+
+ vupllg %v0, %v0
+
#CHECK: error: invalid operand
#CHECK: vupkzh %v0, %v0, -1
#CHECK: error: invalid operand
diff --git a/llvm/test/MC/SystemZ/insn-good-arch15.s b/llvm/test/MC/SystemZ/insn-good-arch15.s
new file mode 100644
index 00000000000000..46ff13db0b549b
--- /dev/null
+++ b/llvm/test/MC/SystemZ/insn-good-arch15.s
@@ -0,0 +1,1348 @@
+# For arch15 and above.
+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=arch15 -show-encoding %s \
+# RUN: | FileCheck %s
+
+#CHECK: bdepg %r0, %r0, %r0 # encoding: [0xb9,0x6d,0x00,0x00]
+#CHECK: bdepg %r0, %r0, %r15 # encoding: [0xb9,0x6d,0xf0,0x00]
+#CHECK: bdepg %r0, %r15, %r0 # encoding: [0xb9,0x6d,0x00,0x0f]
+#CHECK: bdepg %r15, %r0, %r0 # encoding: [0xb9,0x6d,0x00,0xf0]
+#CHECK: bdepg %r7, %r8, %r9 # encoding: [0xb9,0x6d,0x90,0x78]
+
+ bdepg %r0,%r0,%r0
+ bdepg %r0,%r0,%r15
+ bdepg %r0,%r15,%r0
+ bdepg %r15,%r0,%r0
+ bdepg %r7,%r8,%r9
+
+#CHECK: bextg %r0, %r0, %r0 # encoding: [0xb9,0x6c,0x00,0x00]
+#CHECK: bextg %r0, %r0, %r15 # encoding: [0xb9,0x6c,0xf0,0x00]
+#CHECK: bextg %r0, %r15, %r0 # encoding: [0xb9,0x6c,0x00,0x0f]
+#CHECK: bextg %r15, %r0, %r0 # encoding: [0xb9,0x6c,0x00,0xf0]
+#CHECK: bextg %r7, %r8, %r9 # encoding: [0xb9,0x6c,0x90,0x78]
+
+ bextg %r0,%r0,%r0
+ bextg %r0,%r0,%r15
+ bextg %r0,%r15,%r0
+ bextg %r15,%r0,%r0
+ bextg %r7,%r8,%r9
+
+#CHECK: cal %r0, 0, 0 # encoding: [0xc8,0x06,0x00,0x00,0x00,0x00]
+#CHECK: cal %r2, 0(%r1), 0(%r15) # encoding: [0xc8,0x26,0x10,0x00,0xf0,0x00]
+#CHECK: cal %r2, 1(%r1), 0(%r15) # encoding: [0xc8,0x26,0x10,0x01,0xf0,0x00]
+#CHECK: cal %r2, 4095(%r1), 0(%r15) # encoding: [0xc8,0x26,0x1f,0xff,0xf0,0x00]
+#CHECK: cal %r2, 0(%r1), 1(%r15) # encoding: [0xc8,0x26,0x10,0x00,0xf0,0x01]
+#CHECK: cal %r2, 0(%r1), 4095(%r15) # encoding: [0xc8,0x26,0x10,0x00,0xff,0xff]
+
+ cal %r0, 0, 0
+ cal %r2, 0(%r1), 0(%r15)
+ cal %r2, 1(%r1), 0(%r15)
+ cal %r2, 4095(%r1), 0(%r15)
+ cal %r2, 0(%r1), 1(%r15)
+ cal %r2, 0(%r1), 4095(%r15)
+
+#CHECK: calg %r0, 0, 0 # encoding: [0xc8,0x07,0x00,0x00,0x00,0x00]
+#CHECK: calg %r2, 0(%r1), 0(%r15) # encoding: [0xc8,0x27,0x10,0x00,0xf0,0x00]
+#CHECK: calg %r2, 1(%r1), 0(%r15) # encoding: [0xc8,0x27,0x10,0x01,0xf0,0x00]
+#CHECK: calg %r2, 4095(%r1), 0(%r15) # encoding: [0xc8,0x27,0x1f,0xff,0xf0,0x00]
+#CHECK: calg %r2, 0(%r1), 1(%r15) # encoding: [0xc8,0x27,0x10,0x00,0xf0,0x01]
+#CHECK: calg %r2, 0(%r1), 4095(%r15) # encoding: [0xc8,0x27,0x10,0x00,0xff,0xff]
+
+ calg %r0, 0, 0
+ calg %r2, 0(%r1), 0(%r15)
+ calg %r2, 1(%r1), 0(%r15)
+ calg %r2, 4095(%r1), 0(%r15)
+ calg %r2, 0(%r1), 1(%r15)
+ calg %r2, 0(%r1), 4095(%r15)
+
+#CHECK: calgf %r0, 0, 0 # encoding: [0xc8,0x0f,0x00,0x00,0x00,0x00]
+#CHECK: calgf %r2, 0(%r1), 0(%r15) # encoding: [0xc8,0x2f,0x10,0x00,0xf0,0x00]
+#CHECK: calgf %r2, 1(%r1), 0(%r15) # encoding: [0xc8,0x2f,0x10,0x01,0xf0,0x00]
+#CHECK: calgf %r2, 4095(%r1), 0(%r15) # encoding: [0xc8,0x2f,0x1f,0xff,0xf0,0x00]
+#CHECK: calgf %r2, 0(%r1), 1(%r15) # encoding: [0xc8,0x2f,0x10,0x00,0xf0,0x01]
+#CHECK: calgf %r2, 0(%r1), 4095(%r15) # encoding: [0xc8,0x2f,0x10,0x00,0xff,0xff]
+
+ calgf %r0, 0, 0
+ calgf %r2, 0(%r1), 0(%r15)
+ calgf %r2, 1(%r1), 0(%r15)
+ calgf %r2, 4095(%r1), 0(%r15)
+ calgf %r2, 0(%r1), 1(%r15)
+ calgf %r2, 0(%r1), 4095(%r15)
+
+#CHECK: clzg %r0, %r15 # encoding: [0xb9,0x68,0x00,0x0f]
+#CHECK: clzg %r7, %r8 # encoding: [0xb9,0x68,0x00,0x78]
+#CHECK: clzg %r15, %r0 # encoding: [0xb9,0x68,0x00,0xf0]
+
+ clzg %r0, %r15
+ clzg %r7, %r8
+ clzg %r15, %r0
+
+#CHECK: ctzg %r0, %r15 # encoding: [0xb9,0x69,0x00,0x0f]
+#CHECK: ctzg %r7, %r8 # encoding: [0xb9,0x69,0x00,0x78]
+#CHECK: ctzg %r15, %r0 # encoding: [0xb9,0x69,0x00,0xf0]
+
+ ctzg %r0, %r15
+ ctzg %r7, %r8
+ ctzg %r15, %r0
+
+#CHECK: kimd %r0, %r0, 0 # encoding: [0xb9,0x3e,0x00,0x00]
+#CHECK: kimd %r0, %r0, 15 # encoding: [0xb9,0x3e,0xf0,0x00]
+#CHECK: kimd %r0, %r14, 0 # encoding: [0xb9,0x3e,0x00,0x0e]
+#CHECK: kimd %r15, %r0, 0 # encoding: [0xb9,0x3e,0x00,0xf0]
+#CHECK: kimd %r7, %r10, 13 # encoding: [0xb9,0x3e,0xd0,0x7a]
+
+ kimd %r0, %r0, 0
+ kimd %r0, %r0, 15
+ kimd %r0, %r14, 0
+ kimd %r15, %r0, 0
+ kimd %r7, %r10, 13
+
+#CHECK: klmd %r0, %r0, 0 # encoding: [0xb9,0x3f,0x00,0x00]
+#CHECK: klmd %r0, %r0, 15 # encoding: [0xb9,0x3f,0xf0,0x00]
+#CHECK: klmd %r0, %r14, 0 # encoding: [0xb9,0x3f,0x00,0x0e]
+#CHECK: klmd %r15, %r0, 0 # encoding: [0xb9,0x3f,0x00,0xf0]
+#CHECK: klmd %r7, %r10, 13 # encoding: [0xb9,0x3f,0xd0,0x7a]
+
+ klmd %r0, %r0, 0
+ klmd %r0, %r0, 15
+ klmd %r0, %r14, 0
+ klmd %r15, %r0, 0
+ klmd %r7, %r10, 13
+
+#CHECK: lxab %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x60]
+#CHECK: lxab %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x60]
+#CHECK: lxab %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x60]
+#CHECK: lxab %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x60]
+#CHECK: lxab %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x60]
+#CHECK: lxab %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x60]
+#CHECK: lxab %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x60]
+#CHECK: lxab %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x60]
+#CHECK: lxab %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x60]
+#CHECK: lxab %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x60]
+
+ lxab %r0, -524288
+ lxab %r0, -1
+ lxab %r0, 0
+ lxab %r0, 1
+ lxab %r0, 524287
+ lxab %r0, 0(%r1)
+ lxab %r0, 0(%r15)
+ lxab %r0, 524287(%r1,%r15)
+ lxab %r0, 524287(%r15,%r1)
+ lxab %r15, 0
+
+#CHECK: lxah %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x62]
+#CHECK: lxah %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x62]
+#CHECK: lxah %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x62]
+#CHECK: lxah %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x62]
+#CHECK: lxah %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x62]
+#CHECK: lxah %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x62]
+#CHECK: lxah %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x62]
+#CHECK: lxah %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x62]
+#CHECK: lxah %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x62]
+#CHECK: lxah %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x62]
+
+ lxah %r0, -524288
+ lxah %r0, -1
+ lxah %r0, 0
+ lxah %r0, 1
+ lxah %r0, 524287
+ lxah %r0, 0(%r1)
+ lxah %r0, 0(%r15)
+ lxah %r0, 524287(%r1,%r15)
+ lxah %r0, 524287(%r15,%r1)
+ lxah %r15, 0
+
+#CHECK: lxaf %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x64]
+#CHECK: lxaf %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x64]
+#CHECK: lxaf %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x64]
+#CHECK: lxaf %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x64]
+#CHECK: lxaf %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x64]
+#CHECK: lxaf %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x64]
+#CHECK: lxaf %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x64]
+#CHECK: lxaf %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x64]
+#CHECK: lxaf %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x64]
+#CHECK: lxaf %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x64]
+
+ lxaf %r0, -524288
+ lxaf %r0, -1
+ lxaf %r0, 0
+ lxaf %r0, 1
+ lxaf %r0, 524287
+ lxaf %r0, 0(%r1)
+ lxaf %r0, 0(%r15)
+ lxaf %r0, 524287(%r1,%r15)
+ lxaf %r0, 524287(%r15,%r1)
+ lxaf %r15, 0
+
+#CHECK: lxag %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x66]
+#CHECK: lxag %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x66]
+#CHECK: lxag %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x66]
+#CHECK: lxag %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x66]
+#CHECK: lxag %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x66]
+#CHECK: lxag %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x66]
+#CHECK: lxag %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x66]
+#CHECK: lxag %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x66]
+#CHECK: lxag %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x66]
+#CHECK: lxag %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x66]
+
+ lxag %r0, -524288
+ lxag %r0, -1
+ lxag %r0, 0
+ lxag %r0, 1
+ lxag %r0, 524287
+ lxag %r0, 0(%r1)
+ lxag %r0, 0(%r15)
+ lxag %r0, 524287(%r1,%r15)
+ lxag %r0, 524287(%r15,%r1)
+ lxag %r15, 0
+
+#CHECK: lxaq %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x68]
+#CHECK: lxaq %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x68]
+#CHECK: lxaq %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x68]
+#CHECK: lxaq %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x68]
+#CHECK: lxaq %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x68]
+#CHECK: lxaq %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x68]
+#CHECK: lxaq %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x68]
+#CHECK: lxaq %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x68]
+#CHECK: lxaq %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x68]
+#CHECK: lxaq %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x68]
+
+ lxaq %r0, -524288
+ lxaq %r0, -1
+ lxaq %r0, 0
+ lxaq %r0, 1
+ lxaq %r0, 524287
+ lxaq %r0, 0(%r1)
+ lxaq %r0, 0(%r15)
+ lxaq %r0, 524287(%r1,%r15)
+ lxaq %r0, 524287(%r15,%r1)
+ lxaq %r15, 0
+
+#CHECK: llxab %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x61]
+#CHECK: llxab %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x61]
+#CHECK: llxab %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x61]
+#CHECK: llxab %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x61]
+#CHECK: llxab %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x61]
+#CHECK: llxab %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x61]
+#CHECK: llxab %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x61]
+#CHECK: llxab %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x61]
+#CHECK: llxab %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x61]
+#CHECK: llxab %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x61]
+
+ llxab %r0, -524288
+ llxab %r0, -1
+ llxab %r0, 0
+ llxab %r0, 1
+ llxab %r0, 524287
+ llxab %r0, 0(%r1)
+ llxab %r0, 0(%r15)
+ llxab %r0, 524287(%r1,%r15)
+ llxab %r0, 524287(%r15,%r1)
+ llxab %r15, 0
+
+#CHECK: llxah %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x63]
+#CHECK: llxah %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x63]
+#CHECK: llxah %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x63]
+#CHECK: llxah %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x63]
+#CHECK: llxah %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x63]
+#CHECK: llxah %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x63]
+#CHECK: llxah %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x63]
+#CHECK: llxah %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x63]
+#CHECK: llxah %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x63]
+#CHECK: llxah %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x63]
+
+ llxah %r0, -524288
+ llxah %r0, -1
+ llxah %r0, 0
+ llxah %r0, 1
+ llxah %r0, 524287
+ llxah %r0, 0(%r1)
+ llxah %r0, 0(%r15)
+ llxah %r0, 524287(%r1,%r15)
+ llxah %r0, 524287(%r15,%r1)
+ llxah %r15, 0
+
+#CHECK: llxaf %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x65]
+#CHECK: llxaf %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x65]
+#CHECK: llxaf %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x65]
+#CHECK: llxaf %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x65]
+#CHECK: llxaf %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x65]
+#CHECK: llxaf %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x65]
+#CHECK: llxaf %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x65]
+#CHECK: llxaf %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x65]
+#CHECK: llxaf %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x65]
+#CHECK: llxaf %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x65]
+
+ llxaf %r0, -524288
+ llxaf %r0, -1
+ llxaf %r0, 0
+ llxaf %r0, 1
+ llxaf %r0, 524287
+ llxaf %r0, 0(%r1)
+ llxaf %r0, 0(%r15)
+ llxaf %r0, 524287(%r1,%r15)
+ llxaf %r0, 524287(%r15,%r1)
+ llxaf %r15, 0
+
+#CHECK: llxag %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x67]
+#CHECK: llxag %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x67]
+#CHECK: llxag %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x67]
+#CHECK: llxag %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x67]
+#CHECK: llxag %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x67]
+#CHECK: llxag %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x67]
+#CHECK: llxag %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x67]
+#CHECK: llxag %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x67]
+#CHECK: llxag %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x67]
+#CHECK: llxag %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x67]
+
+ llxag %r0, -524288
+ llxag %r0, -1
+ llxag %r0, 0
+ llxag %r0, 1
+ llxag %r0, 524287
+ llxag %r0, 0(%r1)
+ llxag %r0, 0(%r15)
+ llxag %r0, 524287(%r1,%r15)
+ llxag %r0, 524287(%r15,%r1)
+ llxag %r15, 0
+
+#CHECK: llxaq %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x69]
+#CHECK: llxaq %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x69]
+#CHECK: llxaq %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x69]
+#CHECK: llxaq %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x69]
+#CHECK: llxaq %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x69]
+#CHECK: llxaq %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x69]
+#CHECK: llxaq %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x69]
+#CHECK: llxaq %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x69]
+#CHECK: llxaq %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x69]
+#CHECK: llxaq %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x69]
+
+ llxaq %r0, -524288
+ llxaq %r0, -1
+ llxaq %r0, 0
+ llxaq %r0, 1
+ llxaq %r0, 524287
+ llxaq %r0, 0(%r1)
+ llxaq %r0, 0(%r15)
+ llxaq %r0, 524287(%r1,%r15)
+ llxaq %r0, 524287(%r15,%r1)
+ llxaq %r15, 0
+
+#CHECK: pfcr %r0, %r0, 0 # encoding: [0xeb,0x00,0x00,0x00,0x00,0x16]
+#CHECK: pfcr %r15, %r1, 0 # encoding: [0xeb,0xf1,0x00,0x00,0x00,0x16]
+#CHECK: pfcr %r1, %r15, 0 # encoding: [0xeb,0x1f,0x00,0x00,0x00,0x16]
+#CHECK: pfcr %r15, %r15, 0 # encoding: [0xeb,0xff,0x00,0x00,0x00,0x16]
+#CHECK: pfcr %r0, %r0, -524288 # encoding: [0xeb,0x00,0x00,0x00,0x80,0x16]
+#CHECK: pfcr %r0, %r0, -1 # encoding: [0xeb,0x00,0x0f,0xff,0xff,0x16]
+#CHECK: pfcr %r0, %r0, 1 # encoding: [0xeb,0x00,0x00,0x01,0x00,0x16]
+#CHECK: pfcr %r0, %r0, 524287 # encoding: [0xeb,0x00,0x0f,0xff,0x7f,0x16]
+#CHECK: pfcr %r0, %r0, 0(%r1) # encoding: [0xeb,0x00,0x10,0x00,0x00,0x16]
+#CHECK: pfcr %r0, %r0, 0(%r15) # encoding: [0xeb,0x00,0xf0,0x00,0x00,0x16]
+#CHECK: pfcr %r0, %r0, 524287(%r1) # encoding: [0xeb,0x00,0x1f,0xff,0x7f,0x16]
+#CHECK: pfcr %r0, %r0, 524287(%r15) # encoding: [0xeb,0x00,0xff,0xff,0x7f,0x16]
+
+ pfcr %r0, %r0 ,0
+ pfcr %r15, %r1, 0
+ pfcr %r1, %r15, 0
+ pfcr %r15, %r15, 0
+ pfcr %r0, %r0, -524288
+ pfcr %r0, %r0, -1
+ pfcr %r0, %r0, 1
+ pfcr %r0, %r0, 524287
+ pfcr %r0, %r0, 0(%r1)
+ pfcr %r0, %r0, 0(%r15)
+ pfcr %r0, %r0, 524287(%r1)
+ pfcr %r0, %r0, 524287(%r15)
+
+#CHECK: vavgq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf2]
+#CHECK: vavgq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf2]
+#CHECK: vavgq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf2]
+#CHECK: vavgq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf2]
+#CHECK: vavgq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf2]
+
+ vavgq %v0, %v0, %v0
+ vavgq %v0, %v0, %v31
+ vavgq %v0, %v31, %v0
+ vavgq %v31, %v0, %v0
+ vavgq %v18, %v3, %v20
+
+#CHECK: vavglq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf0]
+#CHECK: vavglq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf0]
+#CHECK: vavglq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf0]
+#CHECK: vavglq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf0]
+#CHECK: vavglq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf0]
+
+ vavglq %v0, %v0, %v0
+ vavglq %v0, %v0, %v31
+ vavglq %v0, %v31, %v0
+ vavglq %v31, %v0, %v0
+ vavglq %v18, %v3, %v20
+
+#CHECK: vblend %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x89]
+#CHECK: vblend %v0, %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x0f,0x00,0x00,0x89]
+#CHECK: vblend %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x89]
+#CHECK: vblend %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x89]
+#CHECK: vblend %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x89]
+#CHECK: vblend %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x89]
+#CHECK: vblend %v13, %v17, %v21, %v25, 11 # encoding: [0xe7,0xd1,0x5b,0x00,0x97,0x89]
+
+ vblend %v0, %v0, %v0, %v0, 0
+ vblend %v0, %v0, %v0, %v0, 15
+ vblend %v0, %v0, %v0, %v31, 0
+ vblend %v0, %v0, %v31, %v0, 0
+ vblend %v0, %v31, %v0, %v0, 0
+ vblend %v31, %v0, %v0, %v0, 0
+ vblend %v13, %v17, %v21, %v25, 11
+
+#CHECK: vblendb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x89]
+#CHECK: vblendb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x89]
+#CHECK: vblendb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x89]
+#CHECK: vblendb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x89]
+#CHECK: vblendb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x89]
+#CHECK: vblendb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0x89]
+
+ vblendb %v0, %v0, %v0, %v0
+ vblendb %v0, %v0, %v0, %v31
+ vblendb %v0, %v0, %v31, %v0
+ vblendb %v0, %v31, %v0, %v0
+ vblendb %v31, %v0, %v0, %v0
+ vblendb %v13, %v17, %v21, %v25
+
+#CHECK: vblendh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x89]
+#CHECK: vblendh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0x89]
+#CHECK: vblendh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0x89]
+#CHECK: vblendh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0x89]
+#CHECK: vblendh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0x89]
+#CHECK: vblendh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0x89]
+
+ vblendh %v0, %v0, %v0, %v0
+ vblendh %v0, %v0, %v0, %v31
+ vblendh %v0, %v0, %v31, %v0
+ vblendh %v0, %v31, %v0, %v0
+ vblendh %v31, %v0, %v0, %v0
+ vblendh %v13, %v17, %v21, %v25
+
+#CHECK: vblendf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x89]
+#CHECK: vblendf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x89]
+#CHECK: vblendf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x89]
+#CHECK: vblendf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x89]
+#CHECK: vblendf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x89]
+#CHECK: vblendf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0x89]
+
+ vblendf %v0, %v0, %v0, %v0
+ vblendf %v0, %v0, %v0, %v31
+ vblendf %v0, %v0, %v31, %v0
+ vblendf %v0, %v31, %v0, %v0
+ vblendf %v31, %v0, %v0, %v0
+ vblendf %v13, %v17, %v21, %v25
+
+#CHECK: vblendg %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0x89]
+#CHECK: vblendg %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0x89]
+#CHECK: vblendg %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0x89]
+#CHECK: vblendg %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0x89]
+#CHECK: vblendg %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0x89]
+#CHECK: vblendg %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0x89]
+
+ vblendg %v0, %v0, %v0, %v0
+ vblendg %v0, %v0, %v0, %v31
+ vblendg %v0, %v0, %v31, %v0
+ vblendg %v0, %v31, %v0, %v0
+ vblendg %v31, %v0, %v0, %v0
+ vblendg %v13, %v17, %v21, %v25
+
+#CHECK: vblendq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0x89]
+#CHECK: vblendq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0x89]
+#CHECK: vblendq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0x89]
+#CHECK: vblendq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0x89]
+#CHECK: vblendq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0x89]
+#CHECK: vblendq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0x89]
+
+ vblendq %v0, %v0, %v0, %v0
+ vblendq %v0, %v0, %v0, %v31
+ vblendq %v0, %v0, %v31, %v0
+ vblendq %v0, %v31, %v0, %v0
+ vblendq %v31, %v0, %v0, %v0
+ vblendq %v13, %v17, %v21, %v25
+
+#CHECK: vceqq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf8]
+#CHECK: vceqq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf8]
+#CHECK: vceqq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf8]
+#CHECK: vceqq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf8]
+#CHECK: vceqq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf8]
+#CHECK: vceqqs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x44,0xf8]
+
+ vceqq %v0, %v0, %v0
+ vceqq %v0, %v0, %v31
+ vceqq %v0, %v31, %v0
+ vceqq %v31, %v0, %v0
+ vceqq %v18, %v3, %v20
+ vceqqs %v5, %v22, %v7
+
+#CHECK: vchq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xfb]
+#CHECK: vchq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xfb]
+#CHECK: vchq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xfb]
+#CHECK: vchq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xfb]
+#CHECK: vchq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xfb]
+#CHECK: vchqs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x44,0xfb]
+
+ vchq %v0, %v0, %v0
+ vchq %v0, %v0, %v31
+ vchq %v0, %v31, %v0
+ vchq %v31, %v0, %v0
+ vchq %v18, %v3, %v20
+ vchqs %v5, %v22, %v7
+
+#CHECK: vchlq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf9]
+#CHECK: vchlq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf9]
+#CHECK: vchlq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf9]
+#CHECK: vchlq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf9]
+#CHECK: vchlq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf9]
+#CHECK: vchlqs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x44,0xf9]
+
+ vchlq %v0, %v0, %v0
+ vchlq %v0, %v0, %v31
+ vchlq %v0, %v31, %v0
+ vchlq %v31, %v0, %v0
+ vchlq %v18, %v3, %v20
+ vchlqs %v5, %v22, %v7
+
+#CHECK: vclzq %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0x53]
+#CHECK: vclzq %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0x53]
+#CHECK: vclzq %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0x53]
+#CHECK: vclzq %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0x53]
+#CHECK: vclzq %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0x53]
+#CHECK: vclzq %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0x53]
+
+ vclzq %v0, %v0
+ vclzq %v0, %v15
+ vclzq %v0, %v31
+ vclzq %v15, %v0
+ vclzq %v31, %v0
+ vclzq %v14, %v17
+
+#CHECK: vctzq %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0x52]
+#CHECK: vctzq %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0x52]
+#CHECK: vctzq %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0x52]
+#CHECK: vctzq %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0x52]
+#CHECK: vctzq %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0x52]
+#CHECK: vctzq %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0x52]
+
+ vctzq %v0, %v0
+ vctzq %v0, %v15
+ vctzq %v0, %v31
+ vctzq %v15, %v0
+ vctzq %v31, %v0
+ vctzq %v14, %v17
+
+#CHECK: vcvbq %v0, %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x4e]
+#CHECK: vcvbq %v0, %v0, 15 # encoding: [0xe6,0x00,0x00,0xf0,0x00,0x4e]
+#CHECK: vcvbq %v31, %v0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x4e]
+#CHECK: vcvbq %v0, %v15, 0 # encoding: [0xe6,0x0f,0x00,0x00,0x00,0x4e]
+#CHECK: vcvbq %v0, %v31, 0 # encoding: [0xe6,0x0f,0x00,0x00,0x04,0x4e]
+#CHECK: vcvbq %v3, %v18, 4 # encoding: [0xe6,0x32,0x00,0x40,0x04,0x4e]
+
+ vcvbq %v0, %v0, 0
+ vcvbq %v0, %v0, 15
+ vcvbq %v31, %v0, 0
+ vcvbq %v0, %v15, 0
+ vcvbq %v0, %v31, 0
+ vcvbq %v3, %v18, 4
+
+#CHECK: vcvdq %v0, %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x4a]
+#CHECK: vcvdq %v0, %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0xf0,0x00,0x4a]
+#CHECK: vcvdq %v0, %v0, 255, 0 # encoding: [0xe6,0x00,0x00,0x0f,0xf0,0x4a]
+#CHECK: vcvdq %v0, %v31, 0, 0 # encoding: [0xe6,0x0f,0x00,0x00,0x04,0x4a]
+#CHECK: vcvdq %v15, %v0, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x4a]
+#CHECK: vcvdq %v31, %v0, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x4a]
+#CHECK: vcvdq %v18, %v9, 52, 11 # encoding: [0xe6,0x29,0x00,0xb3,0x48,0x4a]
+
+ vcvdq %v0, %v0, 0, 0
+ vcvdq %v0, %v0, 0, 15
+ vcvdq %v0, %v0, 255, 0
+ vcvdq %v0, %v31, 0, 0
+ vcvdq %v15, %v0, 0, 0
+ vcvdq %v31, %v0, 0, 0
+ vcvdq %v18, %v9, 0x34, 11
+
+#CHECK: vd %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xb2]
+#CHECK: vd %v0, %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0xb2]
+#CHECK: vd %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xb2]
+#CHECK: vd %v0, %v0, %v31, 0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xb2]
+#CHECK: vd %v0, %v31, %v0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xb2]
+#CHECK: vd %v31, %v0, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xb2]
+#CHECK: vd %v13, %v17, %v21, 8, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x86,0xb2]
+
+ vd %v0, %v0, %v0, 0, 0
+ vd %v0, %v0, %v0, 0, 15
+ vd %v0, %v0, %v0, 15, 0
+ vd %v0, %v0, %v31, 0, 0
+ vd %v0, %v31, %v0, 0, 0
+ vd %v31, %v0, %v0, 0, 0
+ vd %v13, %v17, %v21, 8, 4
+
+#CHECK: vdf %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xb2]
+#CHECK: vdf %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x20,0xb2]
+#CHECK: vdf %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xb2]
+#CHECK: vdf %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xb2]
+#CHECK: vdf %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xb2]
+#CHECK: vdf %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x26,0xb2]
+
+ vdf %v0, %v0, %v0, 0
+ vdf %v0, %v0, %v0, 15
+ vdf %v0, %v0, %v31, 0
+ vdf %v0, %v31, %v0, 0
+ vdf %v31, %v0, %v0, 0
+ vdf %v13, %v17, %v21, 4
+
+#CHECK: vdg %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xb2]
+#CHECK: vdg %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x30,0xb2]
+#CHECK: vdg %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xb2]
+#CHECK: vdg %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xb2]
+#CHECK: vdg %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xb2]
+#CHECK: vdg %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x36,0xb2]
+
+ vdg %v0, %v0, %v0, 0
+ vdg %v0, %v0, %v0, 15
+ vdg %v0, %v0, %v31, 0
+ vdg %v0, %v31, %v0, 0
+ vdg %v31, %v0, %v0, 0
+ vdg %v13, %v17, %v21, 4
+
+#CHECK: vdq %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xb2]
+#CHECK: vdq %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x40,0xb2]
+#CHECK: vdq %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xb2]
+#CHECK: vdq %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xb2]
+#CHECK: vdq %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xb2]
+#CHECK: vdq %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x46,0xb2]
+
+ vdq %v0, %v0, %v0, 0
+ vdq %v0, %v0, %v0, 15
+ vdq %v0, %v0, %v31, 0
+ vdq %v0, %v31, %v0, 0
+ vdq %v31, %v0, %v0, 0
+ vdq %v13, %v17, %v21, 4
+
+#CHECK: vdl %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xb0]
+#CHECK: vdl %v0, %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0xb0]
+#CHECK: vdl %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xb0]
+#CHECK: vdl %v0, %v0, %v31, 0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xb0]
+#CHECK: vdl %v0, %v31, %v0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xb0]
+#CHECK: vdl %v31, %v0, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xb0]
+#CHECK: vdl %v13, %v17, %v21, 8, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x86,0xb0]
+
+ vdl %v0, %v0, %v0, 0, 0
+ vdl %v0, %v0, %v0, 0, 15
+ vdl %v0, %v0, %v0, 15, 0
+ vdl %v0, %v0, %v31, 0, 0
+ vdl %v0, %v31, %v0, 0, 0
+ vdl %v31, %v0, %v0, 0, 0
+ vdl %v13, %v17, %v21, 8, 4
+
+#CHECK: vdlf %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xb0]
+#CHECK: vdlf %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x20,0xb0]
+#CHECK: vdlf %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xb0]
+#CHECK: vdlf %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xb0]
+#CHECK: vdlf %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xb0]
+#CHECK: vdlf %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x26,0xb0]
+
+ vdlf %v0, %v0, %v0, 0
+ vdlf %v0, %v0, %v0, 15
+ vdlf %v0, %v0, %v31, 0
+ vdlf %v0, %v31, %v0, 0
+ vdlf %v31, %v0, %v0, 0
+ vdlf %v13, %v17, %v21, 4
+
+#CHECK: vdlg %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xb0]
+#CHECK: vdlg %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x30,0xb0]
+#CHECK: vdlg %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xb0]
+#CHECK: vdlg %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xb0]
+#CHECK: vdlg %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xb0]
+#CHECK: vdlg %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x36,0xb0]
+
+ vdlg %v0, %v0, %v0, 0
+ vdlg %v0, %v0, %v0, 15
+ vdlg %v0, %v0, %v31, 0
+ vdlg %v0, %v31, %v0, 0
+ vdlg %v31, %v0, %v0, 0
+ vdlg %v13, %v17, %v21, 4
+
+#CHECK: vdlq %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xb0]
+#CHECK: vdlq %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x40,0xb0]
+#CHECK: vdlq %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xb0]
+#CHECK: vdlq %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xb0]
+#CHECK: vdlq %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xb0]
+#CHECK: vdlq %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x46,0xb0]
+
+ vdlq %v0, %v0, %v0, 0
+ vdlq %v0, %v0, %v0, 15
+ vdlq %v0, %v0, %v31, 0
+ vdlq %v0, %v31, %v0, 0
+ vdlq %v31, %v0, %v0, 0
+ vdlq %v13, %v17, %v21, 4
+
+#CHECK: veval %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x88]
+#CHECK: veval %v0, %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x88]
+#CHECK: veval %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x88]
+#CHECK: veval %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x88]
+#CHECK: veval %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x88]
+#CHECK: veval %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x88]
+#CHECK: veval %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x88]
+#CHECK: veval %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x88]
+#CHECK: veval %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x88]
+#CHECK: veval %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x88]
+#CHECK: veval %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x04,0x5a,0x88]
+
+ veval %v0, %v0, %v0, %v0, 0
+ veval %v0, %v0, %v0, %v0, 255
+ veval %v0, %v0, %v0, %v15, 0
+ veval %v0, %v0, %v0, %v31, 0
+ veval %v0, %v0, %v15, %v0, 0
+ veval %v0, %v0, %v31, %v0, 0
+ veval %v0, %v15, %v0, %v0, 0
+ veval %v0, %v31, %v0, %v0, 0
+ veval %v15, %v0, %v0, %v0, 0
+ veval %v31, %v0, %v0, %v0, 0
+ veval %v18, %v3, %v20, %v5, 4
+
+#CHECK: vecq %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xdb]
+#CHECK: vecq %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0xdb]
+#CHECK: vecq %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xdb]
+#CHECK: vecq %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0xdb]
+#CHECK: vecq %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xdb]
+#CHECK: vecq %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0xdb]
+
+ vecq %v0, %v0
+ vecq %v0, %v15
+ vecq %v0, %v31
+ vecq %v15, %v0
+ vecq %v31, %v0
+ vecq %v14, %v17
+
+#CHECK: veclq %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xd9]
+#CHECK: veclq %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0xd9]
+#CHECK: veclq %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xd9]
+#CHECK: veclq %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0xd9]
+#CHECK: veclq %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xd9]
+#CHECK: veclq %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0xd9]
+
+ veclq %v0, %v0
+ veclq %v0, %v15
+ veclq %v0, %v31
+ veclq %v15, %v0
+ veclq %v31, %v0
+ veclq %v14, %v17
+
+#CHECK: vgem %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x54]
+#CHECK: vgem %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x54]
+#CHECK: vgem %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x54]
+#CHECK: vgem %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x54]
+#CHECK: vgem %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x54]
+#CHECK: vgem %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x54]
+#CHECK: vgem %v14, %v17, 11 # encoding: [0xe7,0xe1,0x00,0x00,0xb4,0x54]
+
+ vgem %v0, %v0, 0
+ vgem %v0, %v0, 15
+ vgem %v0, %v15, 0
+ vgem %v0, %v31, 0
+ vgem %v15, %v0, 0
+ vgem %v31, %v0, 0
+ vgem %v14, %v17, 11
+
+#CHECK: vgemb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x54]
+#CHECK: vgemb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x54]
+#CHECK: vgemb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x54]
+#CHECK: vgemb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x54]
+#CHECK: vgemb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x54]
+#CHECK: vgemb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x54]
+
+ vgemb %v0, %v0
+ vgemb %v0, %v15
+ vgemb %v0, %v31
+ vgemb %v15, %v0
+ vgemb %v31, %v0
+ vgemb %v14, %v17
+
+#CHECK: vgemh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x54]
+#CHECK: vgemh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x54]
+#CHECK: vgemh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x54]
+#CHECK: vgemh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x54]
+#CHECK: vgemh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x54]
+#CHECK: vgemh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x54]
+
+ vgemh %v0, %v0
+ vgemh %v0, %v15
+ vgemh %v0, %v31
+ vgemh %v15, %v0
+ vgemh %v31, %v0
+ vgemh %v14, %v17
+
+#CHECK: vgemf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x54]
+#CHECK: vgemf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x54]
+#CHECK: vgemf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x54]
+#CHECK: vgemf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x54]
+#CHECK: vgemf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x54]
+#CHECK: vgemf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x54]
+
+ vgemf %v0, %v0
+ vgemf %v0, %v15
+ vgemf %v0, %v31
+ vgemf %v15, %v0
+ vgemf %v31, %v0
+ vgemf %v14, %v17
+
+#CHECK: vgemg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x54]
+#CHECK: vgemg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x54]
+#CHECK: vgemg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x54]
+#CHECK: vgemg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x54]
+#CHECK: vgemg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x54]
+#CHECK: vgemg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0x54]
+
+ vgemg %v0, %v0
+ vgemg %v0, %v15
+ vgemg %v0, %v31
+ vgemg %v15, %v0
+ vgemg %v31, %v0
+ vgemg %v14, %v17
+
+#CHECK: vgemq %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0x54]
+#CHECK: vgemq %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0x54]
+#CHECK: vgemq %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0x54]
+#CHECK: vgemq %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0x54]
+#CHECK: vgemq %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0x54]
+#CHECK: vgemq %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0x54]
+
+ vgemq %v0, %v0
+ vgemq %v0, %v15
+ vgemq %v0, %v31
+ vgemq %v15, %v0
+ vgemq %v31, %v0
+ vgemq %v14, %v17
+
+#CHECK: vlcq %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xde]
+#CHECK: vlcq %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0xde]
+#CHECK: vlcq %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xde]
+#CHECK: vlcq %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0xde]
+#CHECK: vlcq %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xde]
+#CHECK: vlcq %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0xde]
+
+ vlcq %v0, %v0
+ vlcq %v0, %v15
+ vlcq %v0, %v31
+ vlcq %v15, %v0
+ vlcq %v31, %v0
+ vlcq %v14, %v17
+
+#CHECK: vlpq %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xdf]
+#CHECK: vlpq %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x40,0xdf]
+#CHECK: vlpq %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xdf]
+#CHECK: vlpq %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x40,0xdf]
+#CHECK: vlpq %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xdf]
+#CHECK: vlpq %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x44,0xdf]
+
+ vlpq %v0, %v0
+ vlpq %v0, %v15
+ vlpq %v0, %v31
+ vlpq %v15, %v0
+ vlpq %v31, %v0
+ vlpq %v14, %v17
+
+#CHECK: vmalg %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xaa]
+#CHECK: vmalg %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xaa]
+#CHECK: vmalg %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xaa]
+#CHECK: vmalg %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xaa]
+#CHECK: vmalg %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xaa]
+#CHECK: vmalg %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xaa]
+
+ vmalg %v0, %v0, %v0, %v0
+ vmalg %v0, %v0, %v0, %v31
+ vmalg %v0, %v0, %v31, %v0
+ vmalg %v0, %v31, %v0, %v0
+ vmalg %v31, %v0, %v0, %v0
+ vmalg %v13, %v17, %v21, %v25
+
+#CHECK: vmalq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xaa]
+#CHECK: vmalq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xaa]
+#CHECK: vmalq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xaa]
+#CHECK: vmalq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xaa]
+#CHECK: vmalq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xaa]
+#CHECK: vmalq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xaa]
+
+ vmalq %v0, %v0, %v0, %v0
+ vmalq %v0, %v0, %v0, %v31
+ vmalq %v0, %v0, %v31, %v0
+ vmalq %v0, %v31, %v0, %v0
+ vmalq %v31, %v0, %v0, %v0
+ vmalq %v13, %v17, %v21, %v25
+
+#CHECK: vmahg %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xab]
+#CHECK: vmahg %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xab]
+#CHECK: vmahg %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xab]
+#CHECK: vmahg %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xab]
+#CHECK: vmahg %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xab]
+#CHECK: vmahg %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xab]
+
+ vmahg %v0, %v0, %v0, %v0
+ vmahg %v0, %v0, %v0, %v31
+ vmahg %v0, %v0, %v31, %v0
+ vmahg %v0, %v31, %v0, %v0
+ vmahg %v31, %v0, %v0, %v0
+ vmahg %v13, %v17, %v21, %v25
+
+#CHECK: vmahq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xab]
+#CHECK: vmahq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xab]
+#CHECK: vmahq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xab]
+#CHECK: vmahq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xab]
+#CHECK: vmahq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xab]
+#CHECK: vmahq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xab]
+
+ vmahq %v0, %v0, %v0, %v0
+ vmahq %v0, %v0, %v0, %v31
+ vmahq %v0, %v0, %v31, %v0
+ vmahq %v0, %v31, %v0, %v0
+ vmahq %v31, %v0, %v0, %v0
+ vmahq %v13, %v17, %v21, %v25
+
+#CHECK: vmalhg %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xa9]
+#CHECK: vmalhg %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xa9]
+#CHECK: vmalhg %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xa9]
+#CHECK: vmalhg %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xa9]
+#CHECK: vmalhg %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xa9]
+#CHECK: vmalhg %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xa9]
+
+ vmalhg %v0, %v0, %v0, %v0
+ vmalhg %v0, %v0, %v0, %v31
+ vmalhg %v0, %v0, %v31, %v0
+ vmalhg %v0, %v31, %v0, %v0
+ vmalhg %v31, %v0, %v0, %v0
+ vmalhg %v13, %v17, %v21, %v25
+
+#CHECK: vmalhq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xa9]
+#CHECK: vmalhq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xa9]
+#CHECK: vmalhq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xa9]
+#CHECK: vmalhq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xa9]
+#CHECK: vmalhq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xa9]
+#CHECK: vmalhq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xa9]
+
+ vmalhq %v0, %v0, %v0, %v0
+ vmalhq %v0, %v0, %v0, %v31
+ vmalhq %v0, %v0, %v31, %v0
+ vmalhq %v0, %v31, %v0, %v0
+ vmalhq %v31, %v0, %v0, %v0
+ vmalhq %v13, %v17, %v21, %v25
+
+#CHECK: vmaeg %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xae]
+#CHECK: vmaeg %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xae]
+#CHECK: vmaeg %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xae]
+#CHECK: vmaeg %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xae]
+#CHECK: vmaeg %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xae]
+#CHECK: vmaeg %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xae]
+
+ vmaeg %v0, %v0, %v0, %v0
+ vmaeg %v0, %v0, %v0, %v31
+ vmaeg %v0, %v0, %v31, %v0
+ vmaeg %v0, %v31, %v0, %v0
+ vmaeg %v31, %v0, %v0, %v0
+ vmaeg %v13, %v17, %v21, %v25
+
+#CHECK: vmaleg %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xac]
+#CHECK: vmaleg %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xac]
+#CHECK: vmaleg %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xac]
+#CHECK: vmaleg %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xac]
+#CHECK: vmaleg %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xac]
+#CHECK: vmaleg %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xac]
+
+ vmaleg %v0, %v0, %v0, %v0
+ vmaleg %v0, %v0, %v0, %v31
+ vmaleg %v0, %v0, %v31, %v0
+ vmaleg %v0, %v31, %v0, %v0
+ vmaleg %v31, %v0, %v0, %v0
+ vmaleg %v13, %v17, %v21, %v25
+
+#CHECK: vmaog %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xaf]
+#CHECK: vmaog %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xaf]
+#CHECK: vmaog %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xaf]
+#CHECK: vmaog %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xaf]
+#CHECK: vmaog %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xaf]
+#CHECK: vmaog %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xaf]
+
+ vmaog %v0, %v0, %v0, %v0
+ vmaog %v0, %v0, %v0, %v31
+ vmaog %v0, %v0, %v31, %v0
+ vmaog %v0, %v31, %v0, %v0
+ vmaog %v31, %v0, %v0, %v0
+ vmaog %v13, %v17, %v21, %v25
+
+#CHECK: vmalog %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xad]
+#CHECK: vmalog %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xad]
+#CHECK: vmalog %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xad]
+#CHECK: vmalog %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xad]
+#CHECK: vmalog %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xad]
+#CHECK: vmalog %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xad]
+
+ vmalog %v0, %v0, %v0, %v0
+ vmalog %v0, %v0, %v0, %v31
+ vmalog %v0, %v0, %v31, %v0
+ vmalog %v0, %v31, %v0, %v0
+ vmalog %v31, %v0, %v0, %v0
+ vmalog %v13, %v17, %v21, %v25
+
+#CHECK: vmlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xa2]
+#CHECK: vmlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xa2]
+#CHECK: vmlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xa2]
+#CHECK: vmlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xa2]
+#CHECK: vmlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xa2]
+
+ vmlg %v0, %v0, %v0
+ vmlg %v0, %v0, %v31
+ vmlg %v0, %v31, %v0
+ vmlg %v31, %v0, %v0
+ vmlg %v18, %v3, %v20
+
+#CHECK: vmlq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xa2]
+#CHECK: vmlq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xa2]
+#CHECK: vmlq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xa2]
+#CHECK: vmlq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xa2]
+#CHECK: vmlq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xa2]
+
+ vmlq %v0, %v0, %v0
+ vmlq %v0, %v0, %v31
+ vmlq %v0, %v31, %v0
+ vmlq %v31, %v0, %v0
+ vmlq %v18, %v3, %v20
+
+#CHECK: vmhg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xa3]
+#CHECK: vmhg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xa3]
+#CHECK: vmhg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xa3]
+#CHECK: vmhg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xa3]
+#CHECK: vmhg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xa3]
+
+ vmhg %v0, %v0, %v0
+ vmhg %v0, %v0, %v31
+ vmhg %v0, %v31, %v0
+ vmhg %v31, %v0, %v0
+ vmhg %v18, %v3, %v20
+
+#CHECK: vmhq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xa3]
+#CHECK: vmhq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xa3]
+#CHECK: vmhq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xa3]
+#CHECK: vmhq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xa3]
+#CHECK: vmhq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xa3]
+
+ vmhq %v0, %v0, %v0
+ vmhq %v0, %v0, %v31
+ vmhq %v0, %v31, %v0
+ vmhq %v31, %v0, %v0
+ vmhq %v18, %v3, %v20
+
+#CHECK: vmlhg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xa1]
+#CHECK: vmlhg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xa1]
+#CHECK: vmlhg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xa1]
+#CHECK: vmlhg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xa1]
+#CHECK: vmlhg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xa1]
+
+ vmlhg %v0, %v0, %v0
+ vmlhg %v0, %v0, %v31
+ vmlhg %v0, %v31, %v0
+ vmlhg %v31, %v0, %v0
+ vmlhg %v18, %v3, %v20
+
+#CHECK: vmlhq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xa1]
+#CHECK: vmlhq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xa1]
+#CHECK: vmlhq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xa1]
+#CHECK: vmlhq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xa1]
+#CHECK: vmlhq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xa1]
+
+ vmlhq %v0, %v0, %v0
+ vmlhq %v0, %v0, %v31
+ vmlhq %v0, %v31, %v0
+ vmlhq %v31, %v0, %v0
+ vmlhq %v18, %v3, %v20
+
+#CHECK: vmeg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xa6]
+#CHECK: vmeg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xa6]
+#CHECK: vmeg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xa6]
+#CHECK: vmeg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xa6]
+#CHECK: vmeg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xa6]
+
+ vmeg %v0, %v0, %v0
+ vmeg %v0, %v0, %v31
+ vmeg %v0, %v31, %v0
+ vmeg %v31, %v0, %v0
+ vmeg %v18, %v3, %v20
+
+#CHECK: vmleg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xa4]
+#CHECK: vmleg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xa4]
+#CHECK: vmleg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xa4]
+#CHECK: vmleg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xa4]
+#CHECK: vmleg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xa4]
+
+ vmleg %v0, %v0, %v0
+ vmleg %v0, %v0, %v31
+ vmleg %v0, %v31, %v0
+ vmleg %v31, %v0, %v0
+ vmleg %v18, %v3, %v20
+
+#CHECK: vmog %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xa7]
+#CHECK: vmog %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xa7]
+#CHECK: vmog %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xa7]
+#CHECK: vmog %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xa7]
+#CHECK: vmog %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xa7]
+
+ vmog %v0, %v0, %v0
+ vmog %v0, %v0, %v31
+ vmog %v0, %v31, %v0
+ vmog %v31, %v0, %v0
+ vmog %v18, %v3, %v20
+
+#CHECK: vmlog %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xa5]
+#CHECK: vmlog %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xa5]
+#CHECK: vmlog %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xa5]
+#CHECK: vmlog %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xa5]
+#CHECK: vmlog %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xa5]
+
+ vmlog %v0, %v0, %v0
+ vmlog %v0, %v0, %v31
+ vmlog %v0, %v31, %v0
+ vmlog %v31, %v0, %v0
+ vmlog %v18, %v3, %v20
+
+#CHECK: vmnq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xfe]
+#CHECK: vmnq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xfe]
+#CHECK: vmnq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xfe]
+#CHECK: vmnq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xfe]
+#CHECK: vmnq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xfe]
+
+ vmnq %v0, %v0, %v0
+ vmnq %v0, %v0, %v31
+ vmnq %v0, %v31, %v0
+ vmnq %v31, %v0, %v0
+ vmnq %v18, %v3, %v20
+
+#CHECK: vmnlq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xfc]
+#CHECK: vmnlq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xfc]
+#CHECK: vmnlq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xfc]
+#CHECK: vmnlq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xfc]
+#CHECK: vmnlq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xfc]
+
+ vmnlq %v0, %v0, %v0
+ vmnlq %v0, %v0, %v31
+ vmnlq %v0, %v31, %v0
+ vmnlq %v31, %v0, %v0
+ vmnlq %v18, %v3, %v20
+
+#CHECK: vmxq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xff]
+#CHECK: vmxq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xff]
+#CHECK: vmxq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xff]
+#CHECK: vmxq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xff]
+#CHECK: vmxq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xff]
+
+ vmxq %v0, %v0, %v0
+ vmxq %v0, %v0, %v31
+ vmxq %v0, %v31, %v0
+ vmxq %v31, %v0, %v0
+ vmxq %v18, %v3, %v20
+
+#CHECK: vmxlq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xfd]
+#CHECK: vmxlq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xfd]
+#CHECK: vmxlq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xfd]
+#CHECK: vmxlq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xfd]
+#CHECK: vmxlq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xfd]
+
+ vmxlq %v0, %v0, %v0
+ vmxlq %v0, %v0, %v31
+ vmxlq %v0, %v31, %v0
+ vmxlq %v31, %v0, %v0
+ vmxlq %v18, %v3, %v20
+
+#CHECK: vr %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xb3]
+#CHECK: vr %v0, %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0xb3]
+#CHECK: vr %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xb3]
+#CHECK: vr %v0, %v0, %v31, 0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xb3]
+#CHECK: vr %v0, %v31, %v0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xb3]
+#CHECK: vr %v31, %v0, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xb3]
+#CHECK: vr %v13, %v17, %v21, 8, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x86,0xb3]
+
+ vr %v0, %v0, %v0, 0, 0
+ vr %v0, %v0, %v0, 0, 15
+ vr %v0, %v0, %v0, 15, 0
+ vr %v0, %v0, %v31, 0, 0
+ vr %v0, %v31, %v0, 0, 0
+ vr %v31, %v0, %v0, 0, 0
+ vr %v13, %v17, %v21, 8, 4
+
+#CHECK: vrf %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xb3]
+#CHECK: vrf %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x20,0xb3]
+#CHECK: vrf %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xb3]
+#CHECK: vrf %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xb3]
+#CHECK: vrf %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xb3]
+#CHECK: vrf %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x26,0xb3]
+
+ vrf %v0, %v0, %v0, 0
+ vrf %v0, %v0, %v0, 15
+ vrf %v0, %v0, %v31, 0
+ vrf %v0, %v31, %v0, 0
+ vrf %v31, %v0, %v0, 0
+ vrf %v13, %v17, %v21, 4
+
+#CHECK: vrg %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xb3]
+#CHECK: vrg %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x30,0xb3]
+#CHECK: vrg %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xb3]
+#CHECK: vrg %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xb3]
+#CHECK: vrg %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xb3]
+#CHECK: vrg %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x36,0xb3]
+
+ vrg %v0, %v0, %v0, 0
+ vrg %v0, %v0, %v0, 15
+ vrg %v0, %v0, %v31, 0
+ vrg %v0, %v31, %v0, 0
+ vrg %v31, %v0, %v0, 0
+ vrg %v13, %v17, %v21, 4
+
+#CHECK: vrq %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xb3]
+#CHECK: vrq %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x40,0xb3]
+#CHECK: vrq %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xb3]
+#CHECK: vrq %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xb3]
+#CHECK: vrq %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xb3]
+#CHECK: vrq %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x46,0xb3]
+
+ vrq %v0, %v0, %v0, 0
+ vrq %v0, %v0, %v0, 15
+ vrq %v0, %v0, %v31, 0
+ vrq %v0, %v31, %v0, 0
+ vrq %v31, %v0, %v0, 0
+ vrq %v13, %v17, %v21, 4
+
+#CHECK: vrl %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xb1]
+#CHECK: vrl %v0, %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x00,0xb1]
+#CHECK: vrl %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xb1]
+#CHECK: vrl %v0, %v0, %v31, 0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xb1]
+#CHECK: vrl %v0, %v31, %v0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xb1]
+#CHECK: vrl %v31, %v0, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xb1]
+#CHECK: vrl %v13, %v17, %v21, 8, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x86,0xb1]
+
+ vrl %v0, %v0, %v0, 0, 0
+ vrl %v0, %v0, %v0, 0, 15
+ vrl %v0, %v0, %v0, 15, 0
+ vrl %v0, %v0, %v31, 0, 0
+ vrl %v0, %v31, %v0, 0, 0
+ vrl %v31, %v0, %v0, 0, 0
+ vrl %v13, %v17, %v21, 8, 4
+
+#CHECK: vrlf %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xb1]
+#CHECK: vrlf %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x20,0xb1]
+#CHECK: vrlf %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xb1]
+#CHECK: vrlf %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xb1]
+#CHECK: vrlf %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xb1]
+#CHECK: vrlf %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x26,0xb1]
+
+ vrlf %v0, %v0, %v0, 0
+ vrlf %v0, %v0, %v0, 15
+ vrlf %v0, %v0, %v31, 0
+ vrlf %v0, %v31, %v0, 0
+ vrlf %v31, %v0, %v0, 0
+ vrlf %v13, %v17, %v21, 4
+
+#CHECK: vrlg %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xb1]
+#CHECK: vrlg %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x30,0xb1]
+#CHECK: vrlg %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xb1]
+#CHECK: vrlg %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xb1]
+#CHECK: vrlg %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xb1]
+#CHECK: vrlg %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x36,0xb1]
+
+ vrlg %v0, %v0, %v0, 0
+ vrlg %v0, %v0, %v0, 15
+ vrlg %v0, %v0, %v31, 0
+ vrlg %v0, %v31, %v0, 0
+ vrlg %v31, %v0, %v0, 0
+ vrlg %v13, %v17, %v21, 4
+
+#CHECK: vrlq %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xb1]
+#CHECK: vrlq %v0, %v0, %v0, 15 # encoding: [0xe7,0x00,0x00,0x0f,0x40,0xb1]
+#CHECK: vrlq %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xb1]
+#CHECK: vrlq %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xb1]
+#CHECK: vrlq %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xb1]
+#CHECK: vrlq %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x04,0x46,0xb1]
+
+ vrlq %v0, %v0, %v0, 0
+ vrlq %v0, %v0, %v0, 15
+ vrlq %v0, %v0, %v31, 0
+ vrlq %v0, %v31, %v0, 0
+ vrlq %v31, %v0, %v0, 0
+ vrlq %v13, %v17, %v21, 4
+
+#CHECK: vtp %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x5f]
+#CHECK: vtp %v0, 65535 # encoding: [0xe6,0x00,0x0f,0xff,0xf0,0x5f]
+#CHECK: vtp %v15, 4660 # encoding: [0xe6,0x0f,0x01,0x23,0x40,0x5f]
+
+ vtp %v0, 0
+ vtp %v0, 65535
+ vtp %v15, 4660
+
+#CHECK: vtz %v0, %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x7f]
+#CHECK: vtz %v0, %v0, 65535 # encoding: [0xe6,0x00,0x0f,0xff,0xf0,0x7f]
+#CHECK: vtz %v0, %v31, 0 # encoding: [0xe6,0x00,0xf0,0x00,0x02,0x7f]
+#CHECK: vtz %v31, %v0, 0 # encoding: [0xe6,0x0f,0x00,0x00,0x04,0x7f]
+#CHECK: vtz %v13, %v17, 4660 # encoding: [0xe6,0x0d,0x11,0x23,0x42,0x7f]
+
+ vtz %v0, %v0, 0
+ vtz %v0, %v0, 65535
+ vtz %v0, %v31, 0
+ vtz %v31, %v0, 0
+ vtz %v13, %v17, 4660
+
+#CHECK: vuphg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xd7]
+#CHECK: vuphg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xd7]
+#CHECK: vuphg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xd7]
+#CHECK: vuphg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xd7]
+#CHECK: vuphg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xd7]
+#CHECK: vuphg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xd7]
+
+ vuphg %v0, %v0
+ vuphg %v0, %v15
+ vuphg %v0, %v31
+ vuphg %v15, %v0
+ vuphg %v31, %v0
+ vuphg %v14, %v17
+
+#CHECK: vuplg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xd6]
+#CHECK: vuplg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xd6]
+#CHECK: vuplg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xd6]
+#CHECK: vuplg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xd6]
+#CHECK: vuplg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xd6]
+#CHECK: vuplg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xd6]
+
+ vuplg %v0, %v0
+ vuplg %v0, %v15
+ vuplg %v0, %v31
+ vuplg %v15, %v0
+ vuplg %v31, %v0
+ vuplg %v14, %v17
+
+#CHECK: vuplhg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xd5]
+#CHECK: vuplhg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xd5]
+#CHECK: vuplhg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xd5]
+#CHECK: vuplhg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xd5]
+#CHECK: vuplhg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xd5]
+#CHECK: vuplhg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xd5]
+
+ vuplhg %v0, %v0
+ vuplhg %v0, %v15
+ vuplhg %v0, %v31
+ vuplhg %v15, %v0
+ vuplhg %v31, %v0
+ vuplhg %v14, %v17
+
+#CHECK: vupllg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xd4]
+#CHECK: vupllg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xd4]
+#CHECK: vupllg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xd4]
+#CHECK: vupllg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xd4]
+#CHECK: vupllg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xd4]
+#CHECK: vupllg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xd4]
+
+ vupllg %v0, %v0
+ vupllg %v0, %v15
+ vupllg %v0, %v31
+ vupllg %v15, %v0
+ vupllg %v31, %v0
+ vupllg %v14, %v17
+
diff --git a/llvm/unittests/TargetParser/Host.cpp b/llvm/unittests/TargetParser/Host.cpp
index a478c2e6d92d3e..c5b96e1df904e6 100644
--- a/llvm/unittests/TargetParser/Host.cpp
+++ b/llvm/unittests/TargetParser/Host.cpp
@@ -334,10 +334,14 @@ CPU revision : 0
TEST(getLinuxHostCPUName, s390x) {
SmallVector<std::string> ModelIDs(
- {"3931", "8561", "3906", "2964", "2827", "2817", "2097", "2064"});
+ {"9175", "3931", "8561", "3906", "2964", "2827", "2817", "2097", "2064"});
SmallVector<std::string> VectorSupport({"", "vx"});
SmallVector<StringRef> ExpectedCPUs;
+ // Model Id: 9175
+ ExpectedCPUs.push_back("zEC12");
+ ExpectedCPUs.push_back("arch15");
+
// Model Id: 3931
ExpectedCPUs.push_back("zEC12");
ExpectedCPUs.push_back("z16");
More information about the llvm-commits
mailing list