[llvm] r365932 - [SystemZ] Add support for new cpu architecture - arch13
Ulrich Weigand via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 12 11:13:16 PDT 2019
Author: uweigand
Date: Fri Jul 12 11:13:16 2019
New Revision: 365932
URL: http://llvm.org/viewvc/llvm-project?rev=365932&view=rev
Log:
[SystemZ] Add support for new cpu architecture - arch13
This patch series adds support for the next-generation arch13
CPU architecture to the SystemZ backend.
This includes:
- Basic support for the new processor and its features.
- Assembler/disassembler support for new instructions.
- CodeGen for new instructions, including new LLVM intrinsics.
- Scheduler description for the new processor.
- Detection of arch13 as host processor.
Note: No currently available Z system supports the arch13
architecture. Once new systems become available, the
official system name will be added as supported -march name.
Added:
llvm/trunk/lib/Target/SystemZ/SystemZScheduleArch13.td
llvm/trunk/test/Analysis/CostModel/SystemZ/logic-miscext3.ll
llvm/trunk/test/CodeGen/SystemZ/cond-move-06.ll
llvm/trunk/test/CodeGen/SystemZ/cond-move-07.ll
llvm/trunk/test/CodeGen/SystemZ/cond-move-08.mir
llvm/trunk/test/CodeGen/SystemZ/ctpop-02.ll
llvm/trunk/test/CodeGen/SystemZ/not-01.ll
llvm/trunk/test/CodeGen/SystemZ/vec-bswap-01.ll
llvm/trunk/test/CodeGen/SystemZ/vec-bswap-02.ll
llvm/trunk/test/CodeGen/SystemZ/vec-bswap-03.ll
llvm/trunk/test/CodeGen/SystemZ/vec-bswap-04.ll
llvm/trunk/test/CodeGen/SystemZ/vec-bswap-05.ll
llvm/trunk/test/CodeGen/SystemZ/vec-bswap-06.ll
llvm/trunk/test/CodeGen/SystemZ/vec-bswap-07.ll
llvm/trunk/test/CodeGen/SystemZ/vec-conv-03.ll
llvm/trunk/test/CodeGen/SystemZ/vec-eswap-01.ll
llvm/trunk/test/CodeGen/SystemZ/vec-eswap-02.ll
llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics-03.ll
llvm/trunk/test/MC/Disassembler/SystemZ/insns-arch13.txt
llvm/trunk/test/MC/SystemZ/insn-bad-arch13.s
llvm/trunk/test/MC/SystemZ/insn-good-arch13.s
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td
llvm/trunk/lib/Support/Host.cpp
llvm/trunk/lib/Target/SystemZ/SystemZFeatures.td
llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td
llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp
llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h
llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td
llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td
llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td
llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp
llvm/trunk/lib/Target/SystemZ/SystemZSchedule.td
llvm/trunk/lib/Target/SystemZ/SystemZShortenInst.cpp
llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp
llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h
llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
llvm/trunk/test/Analysis/CostModel/SystemZ/fp-cast.ll
llvm/trunk/test/Analysis/CostModel/SystemZ/intrinsics.ll
llvm/trunk/test/CodeGen/SystemZ/cond-move-01.ll
llvm/trunk/test/CodeGen/SystemZ/cond-move-02.ll
llvm/trunk/test/CodeGen/SystemZ/cond-move-03.ll
llvm/trunk/test/MC/SystemZ/insn-bad-z14.s
llvm/trunk/test/Verifier/SystemZ/intrinsic-immarg.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsSystemZ.td Fri Jul 12 11:13:16 2019
@@ -48,6 +48,9 @@ class SystemZTernaryConv<string name, LL
: GCCBuiltin<"__builtin_s390_" ## name>,
Intrinsic<[result], [arg, arg, result], [IntrNoMem]>;
+class SystemZTernaryConvCC<LLVMType result, LLVMType arg>
+ : Intrinsic<[result, llvm_i32_ty], [arg, arg, result], [IntrNoMem]>;
+
class SystemZTernary<string name, LLVMType type>
: SystemZTernaryConv<name, type, type>;
@@ -415,6 +418,24 @@ let TargetPrefix = "s390" in {
def int_s390_vstrl : GCCBuiltin<"__builtin_s390_vstrl">,
Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
[IntrArgMemOnly, IntrWriteMem]>;
+
+ // Instructions from the Vector Enhancements Facility 2
+ def int_s390_vsld : GCCBuiltin<"__builtin_s390_vsld">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
+ def int_s390_vsrd : GCCBuiltin<"__builtin_s390_vsrd">,
+ Intrinsic<[llvm_v16i8_ty],
+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
+ def int_s390_vstrsb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
+ def int_s390_vstrsh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
+ def int_s390_vstrsf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
+ def int_s390_vstrszb : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v16i8_ty>;
+ def int_s390_vstrszh : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
+ def int_s390_vstrszf : SystemZTernaryConvCC<llvm_v16i8_ty, llvm_v4i32_ty>;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Support/Host.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/Host.cpp?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Support/Host.cpp (original)
+++ llvm/trunk/lib/Support/Host.cpp Fri Jul 12 11:13:16 2019
@@ -315,6 +315,8 @@ StringRef sys::detail::getHostCPUNameFor
Pos += sizeof("machine = ") - 1;
unsigned int Id;
if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
+ if (Id >= 8561 && HaveVectorSupport)
+ return "arch13";
if (Id >= 3906 && HaveVectorSupport)
return "z14";
if (Id >= 2964 && HaveVectorSupport)
Modified: llvm/trunk/lib/Target/SystemZ/SystemZFeatures.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZFeatures.td?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZFeatures.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZFeatures.td Fri Jul 12 11:13:16 2019
@@ -241,6 +241,51 @@ def Arch12NewFeatures : SystemZFeatureLi
//===----------------------------------------------------------------------===//
//
+// New features added in the Thirteenth Edition of the z/Architecture
+//
+//===----------------------------------------------------------------------===//
+
+def FeatureMiscellaneousExtensions3 : SystemZFeature<
+ "miscellaneous-extensions-3", "MiscellaneousExtensions3",
+ "Assume that the miscellaneous-extensions facility 3 is installed"
+>;
+
+def FeatureMessageSecurityAssist9 : SystemZFeature<
+ "message-security-assist-extension9", "MessageSecurityAssist9",
+ "Assume that the message-security-assist extension facility 9 is installed"
+>;
+
+def FeatureVectorEnhancements2 : SystemZFeature<
+ "vector-enhancements-2", "VectorEnhancements2",
+ "Assume that the vector enhancements facility 2 is installed"
+>;
+
+def FeatureVectorPackedDecimalEnhancement : SystemZFeature<
+ "vector-packed-decimal-enhancement", "VectorPackedDecimalEnhancement",
+ "Assume that the vector packed decimal enhancement facility is installed"
+>;
+
+def FeatureEnhancedSort : SystemZFeature<
+ "enhanced-sort", "EnhancedSort",
+ "Assume that the enhanced-sort facility is installed"
+>;
+
+def FeatureDeflateConversion : SystemZFeature<
+ "deflate-conversion", "DeflateConversion",
+ "Assume that the deflate-conversion facility is installed"
+>;
+
+def Arch13NewFeatures : SystemZFeatureList<[
+ FeatureMiscellaneousExtensions3,
+ FeatureMessageSecurityAssist9,
+ FeatureVectorEnhancements2,
+ FeatureVectorPackedDecimalEnhancement,
+ FeatureEnhancedSort,
+ FeatureDeflateConversion
+]>;
+
+//===----------------------------------------------------------------------===//
+//
// Cumulative supported and unsupported feature sets
//
//===----------------------------------------------------------------------===//
@@ -255,9 +300,13 @@ def Arch11SupportedFeatures
: SystemZFeatureAdd<Arch10SupportedFeatures.List, Arch11NewFeatures.List>;
def Arch12SupportedFeatures
: SystemZFeatureAdd<Arch11SupportedFeatures.List, Arch12NewFeatures.List>;
+def Arch13SupportedFeatures
+ : SystemZFeatureAdd<Arch12SupportedFeatures.List, Arch13NewFeatures.List>;
-def Arch12UnsupportedFeatures
+def Arch13UnsupportedFeatures
: SystemZFeatureList<[]>;
+def Arch12UnsupportedFeatures
+ : SystemZFeatureAdd<Arch13UnsupportedFeatures.List, Arch13NewFeatures.List>;
def Arch11UnsupportedFeatures
: SystemZFeatureAdd<Arch12UnsupportedFeatures.List, Arch12NewFeatures.List>;
def Arch10UnsupportedFeatures
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp Fri Jul 12 11:13:16 2019
@@ -1480,6 +1480,23 @@ void SystemZDAGToDAGISel::Select(SDNode
Node->getOperand(0).getOpcode() != ISD::Constant)
if (auto *Op1 = dyn_cast<ConstantSDNode>(Node->getOperand(1))) {
uint64_t Val = Op1->getZExtValue();
+ // Don't split the operation if we can match one of the combined
+ // logical operations provided by miscellaneous-extensions-3.
+ if (Subtarget->hasMiscellaneousExtensions3()) {
+ unsigned ChildOpcode = Node->getOperand(0).getOpcode();
+ // Check whether this expression matches NAND/NOR/NXOR.
+ if (Val == (uint64_t)-1 && Opcode == ISD::XOR)
+ if (ChildOpcode == ISD::AND || ChildOpcode == ISD::OR ||
+ ChildOpcode == ISD::XOR)
+ break;
+ // Check whether this expression matches OR-with-complement.
+ if (Opcode == ISD::OR && ChildOpcode == ISD::XOR) {
+ auto Op0 = Node->getOperand(0);
+ if (auto *Op0Op1 = dyn_cast<ConstantSDNode>(Op0->getOperand(1)))
+ if (Op0Op1->getZExtValue() == (uint64_t)-1)
+ break;
+ }
+ }
if (!SystemZ::isImmLF(Val) && !SystemZ::isImmHF(Val)) {
splitLargeImmediate(Opcode, Node, Node->getOperand(0),
Val - uint32_t(Val), uint32_t(Val));
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Fri Jul 12 11:13:16 2019
@@ -252,6 +252,12 @@ SystemZTargetLowering::SystemZTargetLowe
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Promote);
setOperationAction(ISD::CTLZ, MVT::i64, Legal);
+ // On arch13 we have native support for a 64-bit CTPOP.
+ if (Subtarget.hasMiscellaneousExtensions3()) {
+ setOperationAction(ISD::CTPOP, MVT::i32, Promote);
+ setOperationAction(ISD::CTPOP, MVT::i64, Legal);
+ }
+
// Give LowerOperation the chance to replace 64-bit ORs with subregs.
setOperationAction(ISD::OR, MVT::i64, Custom);
@@ -377,6 +383,17 @@ SystemZTargetLowering::SystemZTargetLowe
setOperationAction(ISD::UINT_TO_FP, MVT::v2f64, Legal);
}
+ if (Subtarget.hasVectorEnhancements2()) {
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_SINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal);
+ setOperationAction(ISD::FP_TO_UINT, MVT::v4f32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::SINT_TO_FP, MVT::v4f32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Legal);
+ setOperationAction(ISD::UINT_TO_FP, MVT::v4f32, Legal);
+ }
+
// Handle floating-point types.
for (unsigned I = MVT::FIRST_FP_VALUETYPE;
I <= MVT::LAST_FP_VALUETYPE;
@@ -576,6 +593,7 @@ SystemZTargetLowering::SystemZTargetLowe
setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
setTargetDAGCombine(ISD::LOAD);
setTargetDAGCombine(ISD::STORE);
+ setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
@@ -1809,6 +1827,20 @@ static bool isIntrinsicWithCC(SDValue Op
CCValid = SystemZ::CCMASK_ANY;
return true;
+ case Intrinsic::s390_vstrsb:
+ case Intrinsic::s390_vstrsh:
+ case Intrinsic::s390_vstrsf:
+ Opcode = SystemZISD::VSTRS_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
+ case Intrinsic::s390_vstrszb:
+ case Intrinsic::s390_vstrszh:
+ case Intrinsic::s390_vstrszf:
+ Opcode = SystemZISD::VSTRSZ_CC;
+ CCValid = SystemZ::CCMASK_ANY;
+ return true;
+
case Intrinsic::s390_vfcedbs:
case Intrinsic::s390_vfcesbs:
Opcode = SystemZISD::VFCMPES;
@@ -4506,9 +4538,18 @@ static SDValue tryBuildVectorShuffle(Sel
return GS.getNode(DAG, SDLoc(BVN));
}
+bool SystemZTargetLowering::isVectorElementLoad(SDValue Op) const {
+ if (Op.getOpcode() == ISD::LOAD && cast<LoadSDNode>(Op)->isUnindexed())
+ return true;
+ if (Subtarget.hasVectorEnhancements2() && Op.getOpcode() == SystemZISD::LRV)
+ return true;
+ return false;
+}
+
// Combine GPR scalar values Elems into a vector of type VT.
-static SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
- SmallVectorImpl<SDValue> &Elems) {
+SDValue
+SystemZTargetLowering::buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SmallVectorImpl<SDValue> &Elems) const {
// See whether there is a single replicated value.
SDValue Single;
unsigned int NumElements = Elems.size();
@@ -4537,13 +4578,13 @@ static SDValue buildVector(SelectionDAG
// we would need 2 instructions to replicate it: VLVGP followed by VREPx.
// This is only a win if the single defined element is used more than once.
// In other cases we're better off using a single VLVGx.
- if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
+ if (Single.getNode() && (Count > 1 || isVectorElementLoad(Single)))
return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
// If all elements are loads, use VLREP/VLEs (below).
bool AllLoads = true;
for (auto Elem : Elems)
- if (Elem.getOpcode() != ISD::LOAD || cast<LoadSDNode>(Elem)->isIndexed()) {
+ if (!isVectorElementLoad(Elem)) {
AllLoads = false;
break;
}
@@ -4615,8 +4656,7 @@ static SDValue buildVector(SelectionDAG
std::map<const SDNode*, unsigned> UseCounts;
SDNode *LoadMaxUses = nullptr;
for (unsigned I = 0; I < NumElements; ++I)
- if (Elems[I].getOpcode() == ISD::LOAD &&
- cast<LoadSDNode>(Elems[I])->isUnindexed()) {
+ if (isVectorElementLoad(Elems[I])) {
SDNode *Ld = Elems[I].getNode();
UseCounts[Ld]++;
if (LoadMaxUses == nullptr || UseCounts[LoadMaxUses] < UseCounts[Ld])
@@ -5152,6 +5192,8 @@ const char *SystemZTargetLowering::getTa
OPCODE(VISTR_CC);
OPCODE(VSTRC_CC);
OPCODE(VSTRCZ_CC);
+ OPCODE(VSTRS_CC);
+ OPCODE(VSTRSZ_CC);
OPCODE(TDC);
OPCODE(ATOMIC_SWAPW);
OPCODE(ATOMIC_LOADW_ADD);
@@ -5171,6 +5213,8 @@ const char *SystemZTargetLowering::getTa
OPCODE(ATOMIC_CMP_SWAP_128);
OPCODE(LRV);
OPCODE(STRV);
+ OPCODE(VLER);
+ OPCODE(VSTER);
OPCODE(PREFETCH);
}
return nullptr;
@@ -5484,6 +5528,31 @@ SDValue SystemZTargetLowering::combineLO
return SDValue(N, 0);
}
+bool SystemZTargetLowering::canLoadStoreByteSwapped(EVT VT) const {
+ if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64)
+ return true;
+ if (Subtarget.hasVectorEnhancements2())
+ if (VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v2i64)
+ return true;
+ return false;
+}
+
+static bool isVectorElementSwap(ArrayRef<int> M, EVT VT) {
+ if (!VT.isVector() || !VT.isSimple() ||
+ VT.getSizeInBits() != 128 ||
+ VT.getScalarSizeInBits() % 8 != 0)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ for (unsigned i = 0; i < NumElts; ++i) {
+ if (M[i] < 0) continue; // ignore UNDEF indices
+ if ((unsigned) M[i] != NumElts - 1 - i)
+ return false;
+ }
+
+ return true;
+}
+
SDValue SystemZTargetLowering::combineSTORE(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5505,13 +5574,11 @@ SDValue SystemZTargetLowering::combineST
SN->getMemOperand());
}
}
- // Combine STORE (BSWAP) into STRVH/STRV/STRVG
+ // Combine STORE (BSWAP) into STRVH/STRV/STRVG/VSTBR
if (!SN->isTruncatingStore() &&
Op1.getOpcode() == ISD::BSWAP &&
Op1.getNode()->hasOneUse() &&
- (Op1.getValueType() == MVT::i16 ||
- Op1.getValueType() == MVT::i32 ||
- Op1.getValueType() == MVT::i64)) {
+ canLoadStoreByteSwapped(Op1.getValueType())) {
SDValue BSwapOp = Op1.getOperand(0);
@@ -5526,15 +5593,97 @@ SDValue SystemZTargetLowering::combineST
DAG.getMemIntrinsicNode(SystemZISD::STRV, SDLoc(N), DAG.getVTList(MVT::Other),
Ops, MemVT, SN->getMemOperand());
}
+ // Combine STORE (element-swap) into VSTER
+ if (!SN->isTruncatingStore() &&
+ Op1.getOpcode() == ISD::VECTOR_SHUFFLE &&
+ Op1.getNode()->hasOneUse() &&
+ Subtarget.hasVectorEnhancements2()) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op1.getNode());
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+ if (isVectorElementSwap(ShuffleMask, Op1.getValueType())) {
+ SDValue Ops[] = {
+ N->getOperand(0), Op1.getOperand(0), N->getOperand(2)
+ };
+
+ return DAG.getMemIntrinsicNode(SystemZISD::VSTER, SDLoc(N),
+ DAG.getVTList(MVT::Other),
+ Ops, MemVT, SN->getMemOperand());
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue SystemZTargetLowering::combineVECTOR_SHUFFLE(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+ // Combine element-swap (LOAD) into VLER
+ if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
+ N->getOperand(0).hasOneUse() &&
+ Subtarget.hasVectorEnhancements2()) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> ShuffleMask = SVN->getMask();
+ if (isVectorElementSwap(ShuffleMask, N->getValueType(0))) {
+ SDValue Load = N->getOperand(0);
+ LoadSDNode *LD = cast<LoadSDNode>(Load);
+
+ // Create the element-swapping load.
+ SDValue Ops[] = {
+ LD->getChain(), // Chain
+ LD->getBasePtr() // Ptr
+ };
+ SDValue ESLoad =
+ DAG.getMemIntrinsicNode(SystemZISD::VLER, SDLoc(N),
+ DAG.getVTList(LD->getValueType(0), MVT::Other),
+ Ops, LD->getMemoryVT(), LD->getMemOperand());
+
+ // First, combine the VECTOR_SHUFFLE away. This makes the value produced
+ // by the load dead.
+ DCI.CombineTo(N, ESLoad);
+
+ // Next, combine the load away, we give it a bogus result value but a real
+ // chain result. The result value is dead because the shuffle is dead.
+ DCI.CombineTo(Load.getNode(), ESLoad, ESLoad.getValue(1));
+
+ // Return N so it doesn't get rechecked!
+ return SDValue(N, 0);
+ }
+ }
+
return SDValue();
}
SDValue SystemZTargetLowering::combineEXTRACT_VECTOR_ELT(
SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
if (!Subtarget.hasVector())
return SDValue();
+ // Look through bitcasts that retain the number of vector elements.
+ SDValue Op = N->getOperand(0);
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getValueType().isVector() &&
+ Op.getOperand(0).getValueType().isVector() &&
+ Op.getValueType().getVectorNumElements() ==
+ Op.getOperand(0).getValueType().getVectorNumElements())
+ Op = Op.getOperand(0);
+
+ // Pull BSWAP out of a vector extraction.
+ if (Op.getOpcode() == ISD::BSWAP && Op.hasOneUse()) {
+ EVT VecVT = Op.getValueType();
+ EVT EltVT = VecVT.getVectorElementType();
+ Op = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), EltVT,
+ Op.getOperand(0), N->getOperand(1));
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Op);
+ if (EltVT != N->getValueType(0)) {
+ DCI.AddToWorklist(Op.getNode());
+ Op = DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op);
+ }
+ return Op;
+ }
+
// Try to simplify a vector extraction.
if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
SDValue Op0 = N->getOperand(0);
@@ -5660,11 +5809,10 @@ SDValue SystemZTargetLowering::combineFP
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- // Combine BSWAP (LOAD) into LRVH/LRV/LRVG
+ // Combine BSWAP (LOAD) into LRVH/LRV/LRVG/VLBR
if (ISD::isNON_EXTLoad(N->getOperand(0).getNode()) &&
N->getOperand(0).hasOneUse() &&
- (N->getValueType(0) == MVT::i16 || N->getValueType(0) == MVT::i32 ||
- N->getValueType(0) == MVT::i64)) {
+ canLoadStoreByteSwapped(N->getValueType(0))) {
SDValue Load = N->getOperand(0);
LoadSDNode *LD = cast<LoadSDNode>(Load);
@@ -5697,6 +5845,74 @@ SDValue SystemZTargetLowering::combineBS
// Return N so it doesn't get rechecked!
return SDValue(N, 0);
}
+
+ // Look through bitcasts that retain the number of vector elements.
+ SDValue Op = N->getOperand(0);
+ if (Op.getOpcode() == ISD::BITCAST &&
+ Op.getValueType().isVector() &&
+ Op.getOperand(0).getValueType().isVector() &&
+ Op.getValueType().getVectorNumElements() ==
+ Op.getOperand(0).getValueType().getVectorNumElements())
+ Op = Op.getOperand(0);
+
+ // Push BSWAP into a vector insertion if at least one side then simplifies.
+ if (Op.getOpcode() == ISD::INSERT_VECTOR_ELT && Op.hasOneUse()) {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Elt = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(Vec) ||
+ Vec.getOpcode() == ISD::BSWAP || Vec.isUndef() ||
+ DAG.isConstantIntBuildVectorOrConstantInt(Elt) ||
+ Elt.getOpcode() == ISD::BSWAP || Elt.isUndef() ||
+ (canLoadStoreByteSwapped(N->getValueType(0)) &&
+ ISD::isNON_EXTLoad(Elt.getNode()) && Elt.hasOneUse())) {
+ EVT VecVT = N->getValueType(0);
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ if (VecVT != Vec.getValueType()) {
+ Vec = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Vec);
+ DCI.AddToWorklist(Vec.getNode());
+ }
+ if (EltVT != Elt.getValueType()) {
+ Elt = DAG.getNode(ISD::BITCAST, SDLoc(N), EltVT, Elt);
+ DCI.AddToWorklist(Elt.getNode());
+ }
+ Vec = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Vec);
+ DCI.AddToWorklist(Vec.getNode());
+ Elt = DAG.getNode(ISD::BSWAP, SDLoc(N), EltVT, Elt);
+ DCI.AddToWorklist(Elt.getNode());
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VecVT,
+ Vec, Elt, Idx);
+ }
+ }
+
+ // Push BSWAP into a vector shuffle if at least one side then simplifies.
+ ShuffleVectorSDNode *SV = dyn_cast<ShuffleVectorSDNode>(Op);
+ if (SV && Op.hasOneUse()) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
+ if (DAG.isConstantIntBuildVectorOrConstantInt(Op0) ||
+ Op0.getOpcode() == ISD::BSWAP || Op0.isUndef() ||
+ DAG.isConstantIntBuildVectorOrConstantInt(Op1) ||
+ Op1.getOpcode() == ISD::BSWAP || Op1.isUndef()) {
+ EVT VecVT = N->getValueType(0);
+ if (VecVT != Op0.getValueType()) {
+ Op0 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op0);
+ DCI.AddToWorklist(Op0.getNode());
+ }
+ if (VecVT != Op1.getValueType()) {
+ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), VecVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ }
+ Op0 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op0);
+ DCI.AddToWorklist(Op0.getNode());
+ Op1 = DAG.getNode(ISD::BSWAP, SDLoc(N), VecVT, Op1);
+ DCI.AddToWorklist(Op1.getNode());
+ return DAG.getVectorShuffle(VecVT, SDLoc(N), Op0, Op1, SV->getMask());
+ }
+ }
+
return SDValue();
}
@@ -5919,6 +6135,7 @@ SDValue SystemZTargetLowering::PerformDA
case SystemZISD::MERGE_LOW: return combineMERGE(N, DCI);
case ISD::LOAD: return combineLOAD(N, DCI);
case ISD::STORE: return combineSTORE(N, DCI);
+ case ISD::VECTOR_SHUFFLE: return combineVECTOR_SHUFFLE(N, DCI);
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h Fri Jul 12 11:13:16 2019
@@ -281,6 +281,8 @@ enum NodeType : unsigned {
VISTR_CC,
VSTRC_CC,
VSTRCZ_CC,
+ VSTRS_CC,
+ VSTRSZ_CC,
// Test Data Class.
//
@@ -340,6 +342,9 @@ enum NodeType : unsigned {
// Byte swapping load/store. Same operands as regular load/store.
LRV, STRV,
+ // Element swapping load/store. Same operands as regular load/store.
+ VLER, VSTER,
+
// Prefetch from the second operand using the 4-bit control code in
// the first operand. The code is 1 for a load prefetch and 2 for
// a store prefetch.
@@ -571,6 +576,9 @@ private:
SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ bool isVectorElementLoad(SDValue Op) const;
+ SDValue buildVector(SelectionDAG &DAG, const SDLoc &DL, EVT VT,
+ SmallVectorImpl<SDValue> &Elems) const;
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
@@ -590,8 +598,10 @@ private:
SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+ bool canLoadStoreByteSwapped(EVT VT) const;
SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineVECTOR_SHUFFLE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrFormats.td Fri Jul 12 11:13:16 2019
@@ -1414,13 +1414,15 @@ class InstVRRi<bits<16> op, dag outs, da
bits<4> R1;
bits<5> V2;
bits<4> M3;
+ bits<4> M4;
let Inst{47-40} = op{15-8};
let Inst{39-36} = R1;
let Inst{35-32} = V2{3-0};
let Inst{31-24} = 0;
let Inst{23-20} = M3;
- let Inst{19-12} = 0;
+ let Inst{19-16} = M4;
+ let Inst{15-12} = 0;
let Inst{11} = 0;
let Inst{10} = V2{4};
let Inst{9-8} = 0;
@@ -2489,12 +2491,18 @@ class StoreVRX<string mnemonic, bits<16>
TypedReg tr, bits<5> bytes, bits<4> type = 0>
: InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
mnemonic#"\t$V1, $XBD2",
- [(set (tr.vt tr.op:$V1), (operator bdxaddr12only:$XBD2))]> {
+ [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2)]> {
let M3 = type;
let mayStore = 1;
let AccessBytes = bytes;
}
+class StoreVRXGeneric<string mnemonic, bits<16> opcode>
+ : InstVRX<opcode, (outs), (ins VR128:$V1, bdxaddr12only:$XBD2, imm32zx4:$M3),
+ mnemonic#"\t$V1, $XBD2, $M3", []> {
+ let mayStore = 1;
+}
+
multiclass StoreVRXAlign<string mnemonic, bits<16> opcode> {
let mayStore = 1, AccessBytes = 16 in {
def Align : InstVRX<opcode, (outs),
@@ -3151,6 +3159,11 @@ class BinaryRRFb<string mnemonic, bits<1
let M4 = 0;
}
+class BinaryRRFc<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2>
+ : InstRRFc<opcode, (outs cls1:$R1), (ins cls2:$R2, imm32zx4:$M3),
+ mnemonic#"\t$R1, $R2, $M3", []>;
+
class BinaryMemRRFc<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2, Immediate imm>
: InstRRFc<opcode, (outs cls2:$R2, cls1:$R1), (ins cls1:$R1src, imm:$M3),
@@ -3218,6 +3231,41 @@ multiclass CondBinaryRRFPair<string mnem
def Asm : AsmCondBinaryRRF<mnemonic, opcode, cls1, cls2>;
}
+class CondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2, RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1),
+ (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
+ mnemonic#"$M4\t$R1, $R2, $R3",
+ [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
+ cond4:$valid, cond4:$M4))]> {
+ let CCMaskLast = 1;
+}
+
+// Like CondBinaryRRFa, but used for the raw assembly form. The condition-code
+// mask is the third operand rather than being part of the mnemonic.
+class AsmCondBinaryRRFa<string mnemonic, bits<16> opcode, RegisterOperand cls1,
+ RegisterOperand cls2, RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2, imm32zx4:$M4),
+ mnemonic#"\t$R1, $R2, $R3, $M4", []>;
+
+// Like CondBinaryRRFa, but with a fixed CC mask.
+class FixedCondBinaryRRFa<CondVariant V, string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1), (ins cls3:$R3, cls2:$R2),
+ mnemonic#V.suffix#"\t$R1, $R2, $R3", []> {
+ let isAsmParserOnly = V.alternate;
+ let M4 = V.ccmask;
+}
+
+multiclass CondBinaryRRFaPair<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3> {
+ let isCodeGenOnly = 1 in
+ def "" : CondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+ def Asm : AsmCondBinaryRRFa<mnemonic, opcode, cls1, cls2, cls3>;
+}
+
class BinaryRI<string mnemonic, bits<12> opcode, SDPatternOperator operator,
RegisterOperand cls, Immediate imm>
: InstRIa<opcode, (outs cls:$R1), (ins cls:$R1src, imm:$I2),
@@ -3612,7 +3660,9 @@ class BinaryVRRf<string mnemonic, bits<1
class BinaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
: InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2, imm32zx4:$M3),
- mnemonic#"\t$R1, $V2, $M3", []>;
+ mnemonic#"\t$R1, $V2, $M3", []> {
+ let M4 = 0;
+}
class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> type>
@@ -3990,6 +4040,17 @@ class SideEffectTernaryRRFa<string mnemo
let M4 = 0;
}
+class SideEffectTernaryMemMemRRFa<string mnemonic, bits<16> opcode,
+ RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : InstRRFa<opcode, (outs cls1:$R1, cls2:$R2),
+ (ins cls1:$R1src, cls2:$R2src, cls3:$R3),
+ mnemonic#"\t$R1, $R2, $R3", []> {
+ let Constraints = "$R1 = $R1src, $R2 = $R2src";
+ let DisableEncoding = "$R1src, $R2src";
+ let M4 = 0;
+}
+
class SideEffectTernaryRRFb<string mnemonic, bits<16> opcode,
RegisterOperand cls1, RegisterOperand cls2,
RegisterOperand cls3>
@@ -4278,7 +4339,7 @@ class TernaryVRRcFloatGeneric<string mne
mnemonic#"\t$V1, $V2, $V3, $M4, $M5, $M6", []>;
class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
- TypedReg tr1, TypedReg tr2, bits<4> type = 0>
+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m6 = 0>
: InstVRRd<opcode, (outs tr1.op:$V1),
(ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
mnemonic#"\t$V1, $V2, $V3, $V4",
@@ -4286,7 +4347,7 @@ class TernaryVRRd<string mnemonic, bits<
(tr2.vt tr2.op:$V3),
(tr1.vt tr1.op:$V4)))]> {
let M5 = type;
- let M6 = 0;
+ let M6 = m6;
}
class TernaryVRRdGeneric<string mnemonic, bits<16> opcode>
@@ -4296,6 +4357,34 @@ class TernaryVRRdGeneric<string mnemonic
let M6 = 0;
}
+// Ternary operation where the assembler mnemonic has an extra operand to
+// optionally allow specifiying arbitrary M6 values.
+multiclass TernaryExtraVRRd<string mnemonic, bits<16> opcode,
+ SDPatternOperator operator,
+ TypedReg tr1, TypedReg tr2, bits<4> type> {
+ let M5 = type, Defs = [CC] in
+ def "" : InstVRRd<opcode, (outs tr1.op:$V1),
+ (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M6", []>;
+ def : Pat<(operator (tr2.vt tr2.op:$V2), (tr2.vt tr2.op:$V3),
+ (tr1.vt tr1.op:$V4)),
+ (!cast<Instruction>(NAME) tr2.op:$V2, tr2.op:$V3, tr1.op:$V4, 0)>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
+ (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
+ tr2.op:$V3, tr1.op:$V4, 0)>;
+}
+
+multiclass TernaryExtraVRRdGeneric<string mnemonic, bits<16> opcode> {
+ let Defs = [CC] in
+ def "" : InstVRRd<opcode, (outs VR128:$V1),
+ (ins VR128:$V2, VR128:$V3, VR128:$V4,
+ imm32zx4:$M5, imm32zx4:$M6),
+ mnemonic#"\t$V1, $V2, $V3, $V4, $M5, $M6", []>;
+ def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4, $M5",
+ (!cast<Instruction>(NAME) VR128:$V1, VR128:$V2, VR128:$V3,
+ VR128:$V4, imm32zx4:$M5, 0)>;
+}
+
class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
: InstVRRe<opcode, (outs tr1.op:$V1),
@@ -4326,6 +4415,11 @@ class TernaryVRSb<string mnemonic, bits<
let M4 = type;
}
+class TernaryVRRi<string mnemonic, bits<16> opcode, RegisterOperand cls>
+ : InstVRRi<opcode, (outs cls:$R1), (ins VR128:$V2,
+ imm32zx4:$M3, imm32zx4:$M4),
+ mnemonic#"\t$R1, $V2, $M3, $M4", []>;
+
class TernaryVRSbGeneric<string mnemonic, bits<16> opcode>
: InstVRSb<opcode, (outs VR128:$V1),
(ins VR128:$V1src, GR64:$R3, shift12only:$BD2, imm32zx4:$M4),
@@ -4705,6 +4799,17 @@ class CondBinaryRRFPseudo<RegisterOperan
let CCMaskLast = 1;
}
+// Like CondBinaryRRFa, but expanded after RA depending on the choice of
+// register.
+class CondBinaryRRFaPseudo<RegisterOperand cls1, RegisterOperand cls2,
+ RegisterOperand cls3>
+ : Pseudo<(outs cls1:$R1),
+ (ins cls3:$R3, cls2:$R2, cond4:$valid, cond4:$M4),
+ [(set cls1:$R1, (z_select_ccmask cls2:$R2, cls3:$R3,
+ cond4:$valid, cond4:$M4))]> {
+ let CCMaskLast = 1;
+}
+
// Like CondBinaryRIE, but expanded after RA depending on the choice of
// register.
class CondBinaryRIEPseudo<RegisterOperand cls, Immediate imm>
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.cpp Fri Jul 12 11:13:16 2019
@@ -223,6 +223,65 @@ void SystemZInstrInfo::expandLOCRPseudo(
// correctly. This change is defered to the SystemZExpandPseudo pass.
}
+// MI is a select pseudo instruction. Replace it with LowOpcode if source
+// and destination are all low GR32s and HighOpcode if source and destination
+// are all high GR32s. Otherwise, use the two-operand MixedOpcode.
+void SystemZInstrInfo::expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode,
+ unsigned MixedOpcode) const {
+ unsigned DestReg = MI.getOperand(0).getReg();
+ unsigned Src1Reg = MI.getOperand(1).getReg();
+ unsigned Src2Reg = MI.getOperand(2).getReg();
+ bool DestIsHigh = isHighReg(DestReg);
+ bool Src1IsHigh = isHighReg(Src1Reg);
+ bool Src2IsHigh = isHighReg(Src2Reg);
+
+ // If sources and destination aren't all high or all low, we may be able to
+ // simplify the operation by moving one of the sources to the destination
+ // first. But only if this doesn't clobber the other source.
+ if (DestReg != Src1Reg && DestReg != Src2Reg) {
+ if (DestIsHigh != Src1IsHigh) {
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src1Reg,
+ SystemZ::LR, 32, MI.getOperand(1).isKill(),
+ MI.getOperand(1).isUndef());
+ MI.getOperand(1).setReg(DestReg);
+ Src1Reg = DestReg;
+ Src1IsHigh = DestIsHigh;
+ } else if (DestIsHigh != Src2IsHigh) {
+ emitGRX32Move(*MI.getParent(), MI, MI.getDebugLoc(), DestReg, Src2Reg,
+ SystemZ::LR, 32, MI.getOperand(2).isKill(),
+ MI.getOperand(2).isUndef());
+ MI.getOperand(2).setReg(DestReg);
+ Src2Reg = DestReg;
+ Src2IsHigh = DestIsHigh;
+ }
+ }
+
+ // If the destination (now) matches one source, prefer this to be first.
+ if (DestReg != Src1Reg && DestReg == Src2Reg) {
+ commuteInstruction(MI, false, 1, 2);
+ std::swap(Src1Reg, Src2Reg);
+ std::swap(Src1IsHigh, Src2IsHigh);
+ }
+
+ if (!DestIsHigh && !Src1IsHigh && !Src2IsHigh)
+ MI.setDesc(get(LowOpcode));
+ else if (DestIsHigh && Src1IsHigh && Src2IsHigh)
+ MI.setDesc(get(HighOpcode));
+ else {
+ // Given the simplifcation above, we must already have a two-operand case.
+ assert (DestReg == Src1Reg);
+ MI.setDesc(get(MixedOpcode));
+ MI.tieOperands(0, 1);
+ LOCRMuxJumps++;
+ }
+
+ // If we were unable to implement the pseudo with a single instruction, we
+ // need to convert it back into a branch sequence. This cannot be done here
+ // since the caller of expandPostRAPseudo does not handle changes to the CFG
+ // correctly. This change is defered to the SystemZExpandPseudo pass.
+}
+
// MI is an RR-style pseudo instruction that zero-extends the low Size bits
// of one GRX32 into another. Replace it with LowOpcode if both operands
// are low registers, otherwise use RISB[LH]G.
@@ -312,6 +371,10 @@ MachineInstr *SystemZInstrInfo::commuteI
};
switch (MI.getOpcode()) {
+ case SystemZ::SELRMux:
+ case SystemZ::SELFHR:
+ case SystemZ::SELR:
+ case SystemZ::SELGR:
case SystemZ::LOCRMux:
case SystemZ::LOCFHR:
case SystemZ::LOCR:
@@ -606,7 +669,9 @@ void SystemZInstrInfo::insertSelect(Mach
unsigned Opc;
if (SystemZ::GRX32BitRegClass.hasSubClassEq(RC)) {
- if (STI.hasLoadStoreOnCond2())
+ if (STI.hasMiscellaneousExtensions3())
+ Opc = SystemZ::SELRMux;
+ else if (STI.hasLoadStoreOnCond2())
Opc = SystemZ::LOCRMux;
else {
Opc = SystemZ::LOCR;
@@ -618,9 +683,12 @@ void SystemZInstrInfo::insertSelect(Mach
TrueReg = TReg;
FalseReg = FReg;
}
- } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC))
- Opc = SystemZ::LOCGR;
- else
+ } else if (SystemZ::GR64BitRegClass.hasSubClassEq(RC)) {
+ if (STI.hasMiscellaneousExtensions3())
+ Opc = SystemZ::SELGR;
+ else
+ Opc = SystemZ::LOCGR;
+ } else
llvm_unreachable("Invalid register class");
BuildMI(MBB, I, DL, get(Opc), DstReg)
@@ -643,7 +711,11 @@ bool SystemZInstrInfo::FoldImmediate(Mac
unsigned NewUseOpc;
unsigned UseIdx;
int CommuteIdx = -1;
+ bool TieOps = false;
switch (UseOpc) {
+ case SystemZ::SELRMux:
+ TieOps = true;
+ /* fall through */
case SystemZ::LOCRMux:
if (!STI.hasLoadStoreOnCond2())
return false;
@@ -655,6 +727,9 @@ bool SystemZInstrInfo::FoldImmediate(Mac
else
return false;
break;
+ case SystemZ::SELGR:
+ TieOps = true;
+ /* fall through */
case SystemZ::LOCGR:
if (!STI.hasLoadStoreOnCond2())
return false;
@@ -676,6 +751,8 @@ bool SystemZInstrInfo::FoldImmediate(Mac
bool DeleteDef = MRI->hasOneNonDBGUse(Reg);
UseMI.setDesc(get(NewUseOpc));
+ if (TieOps)
+ UseMI.tieOperands(0, 1);
UseMI.getOperand(UseIdx).ChangeToImmediate(ImmVal);
if (DeleteDef)
DefMI.eraseFromParent();
@@ -1285,6 +1362,11 @@ bool SystemZInstrInfo::expandPostRAPseud
expandLOCRPseudo(MI, SystemZ::LOCR, SystemZ::LOCFHR);
return true;
+ case SystemZ::SELRMux:
+ expandSELRPseudo(MI, SystemZ::SELR, SystemZ::SELFHR,
+ SystemZ::LOCRMux);
+ return true;
+
case SystemZ::STCMux:
expandRXYPseudo(MI, SystemZ::STC, SystemZ::STCH);
return true;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.h Fri Jul 12 11:13:16 2019
@@ -162,6 +162,8 @@ class SystemZInstrInfo : public SystemZG
unsigned HighOpcode) const;
void expandLOCRPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned HighOpcode) const;
+ void expandSELRPseudo(MachineInstr &MI, unsigned LowOpcode,
+ unsigned HighOpcode, unsigned MixedOpcode) const;
void expandZExtPseudo(MachineInstr &MI, unsigned LowOpcode,
unsigned Size) const;
void expandLoadStackGuard(MachineInstr *MI) const;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrInfo.td Fri Jul 12 11:13:16 2019
@@ -474,6 +474,11 @@ let mayLoad = 1, mayStore = 1, Defs = [C
def MVCLU : SideEffectTernaryMemMemRSY<"mvclu", 0xEB8E, GR128, GR128>;
}
+// Move right.
+let Predicates = [FeatureMiscellaneousExtensions3],
+ mayLoad = 1, mayStore = 1, Uses = [R0L] in
+ def MVCRL : SideEffectBinarySSE<"mvcrl", 0xE50A>;
+
// String moves.
let mayLoad = 1, mayStore = 1, Defs = [CC] in
defm MVST : StringRRE<"mvst", 0xB255, z_stpcpy>;
@@ -482,6 +487,29 @@ let mayLoad = 1, mayStore = 1, Defs = [C
// Conditional move instructions
//===----------------------------------------------------------------------===//
+let Predicates = [FeatureMiscellaneousExtensions3], Uses = [CC] in {
+ // Select.
+ let isCommutable = 1 in {
+ // Expands to SELR or SELFHR or a branch-and-move sequence,
+ // depending on the choice of registers.
+ def SELRMux : CondBinaryRRFaPseudo<GRX32, GRX32, GRX32>;
+ defm SELFHR : CondBinaryRRFaPair<"selfhr", 0xB9C0, GRH32, GRH32, GRH32>;
+ defm SELR : CondBinaryRRFaPair<"selr", 0xB9F0, GR32, GR32, GR32>;
+ defm SELGR : CondBinaryRRFaPair<"selgr", 0xB9E3, GR64, GR64, GR64>;
+ }
+
+ // Define AsmParser extended mnemonics for each general condition-code mask.
+ foreach V = [ "E", "NE", "H", "NH", "L", "NL", "HE", "NHE", "LE", "NLE",
+ "Z", "NZ", "P", "NP", "M", "NM", "LH", "NLH", "O", "NO" ] in {
+ def SELRAsm#V : FixedCondBinaryRRFa<CV<V>, "selr", 0xB9F0,
+ GR32, GR32, GR32>;
+ def SELFHRAsm#V : FixedCondBinaryRRFa<CV<V>, "selfhr", 0xB9C0,
+ GRH32, GRH32, GRH32>;
+ def SELGRAsm#V : FixedCondBinaryRRFa<CV<V>, "selgr", 0xB9E3,
+ GR64, GR64, GR64>;
+ }
+}
+
let Predicates = [FeatureLoadStoreOnCond2], Uses = [CC] in {
// Load immediate on condition. Matched via DAG pattern and created
// by the PeepholeOptimizer via FoldImmediate.
@@ -1244,6 +1272,43 @@ defm : RMWIByte<xor, bdaddr12pair, XI>;
defm : RMWIByte<xor, bdaddr20pair, XIY>;
//===----------------------------------------------------------------------===//
+// Combined logical operations
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureMiscellaneousExtensions3],
+ Defs = [CC] in {
+ // AND with complement.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def NCRK : BinaryRRFa<"ncrk", 0xB9F5, andc, GR32, GR32, GR32>;
+ def NCGRK : BinaryRRFa<"ncgrk", 0xB9E5, andc, GR64, GR64, GR64>;
+ }
+
+ // OR with complement.
+ let CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def OCRK : BinaryRRFa<"ocrk", 0xB975, orc, GR32, GR32, GR32>;
+ def OCGRK : BinaryRRFa<"ocgrk", 0xB965, orc, GR64, GR64, GR64>;
+ }
+
+ // NAND.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def NNRK : BinaryRRFa<"nnrk", 0xB974, nand, GR32, GR32, GR32>;
+ def NNGRK : BinaryRRFa<"nngrk", 0xB964, nand, GR64, GR64, GR64>;
+ }
+
+ // NOR.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def NORK : BinaryRRFa<"nork", 0xB976, nor, GR32, GR32, GR32>;
+ def NOGRK : BinaryRRFa<"nogrk", 0xB966, nor, GR64, GR64, GR64>;
+ }
+
+ // NXOR.
+ let isCommutable = 1, CCValues = 0xC, CompareZeroCCMask = 0x8 in {
+ def NXRK : BinaryRRFa<"nxrk", 0xB977, nxor, GR32, GR32, GR32>;
+ def NXGRK : BinaryRRFa<"nxgrk", 0xB967, nxor, GR64, GR64, GR64>;
+ }
+}
+
+//===----------------------------------------------------------------------===//
// Multiplication
//===----------------------------------------------------------------------===//
@@ -1837,6 +1902,9 @@ let mayLoad = 1, mayStore = 1, Uses = [R
let Predicates = [FeatureMessageSecurityAssist8] in
def KMA : SideEffectTernaryMemMemMemRRFb<"kma", 0xB929,
GR128, GR128, GR128>;
+
+ let Predicates = [FeatureMessageSecurityAssist9] in
+ def KDSA : SideEffectBinaryMemRRE<"kdsa", 0xB93A, GR64, GR128>;
}
//===----------------------------------------------------------------------===//
@@ -2017,7 +2085,12 @@ let Defs = [CC] in
def : Pat<(ctlz GR64:$src),
(EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
-// Population count. Counts bits set per byte.
+// Population count. Counts bits set per byte or doubleword.
+let Predicates = [FeatureMiscellaneousExtensions3] in {
+ let Defs = [CC] in
+ def POPCNTOpt : BinaryRRFc<"popcnt", 0xB9E1, GR64, GR64>;
+ def : Pat<(ctpop GR64:$src), (POPCNTOpt GR64:$src, 8)>;
+}
let Predicates = [FeaturePopulationCount], Defs = [CC] in
def POPCNT : UnaryRRE<"popcnt", 0xB9E1, z_popcnt, GR64, GR64>;
@@ -2048,6 +2121,17 @@ let mayLoad = 1, Defs = [CC] in
let mayLoad = 1, mayStore = 1, Defs = [CC, R1D], Uses = [R0L, R1D] in
def CMPSC : SideEffectBinaryMemMemRRE<"cmpsc", 0xB263, GR128, GR128>;
+// Sort lists.
+let Predicates = [FeatureEnhancedSort],
+ mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
+ def SORTL : SideEffectBinaryMemMemRRE<"sortl", 0xB938, GR128, GR128>;
+
+// Deflate conversion call.
+let Predicates = [FeatureDeflateConversion],
+ mayLoad = 1, mayStore = 1, Defs = [CC], Uses = [R0L, R1D] in
+ def DFLTCC : SideEffectTernaryMemMemRRFa<"dfltcc", 0xB939,
+ GR128, GR128, GR64>;
+
// Execute.
let hasSideEffects = 1 in {
def EX : SideEffectBinaryRX<"ex", 0x44, GR64>;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td Fri Jul 12 11:13:16 2019
@@ -249,6 +249,81 @@ let Predicates = [FeatureVectorPackedDec
}
//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+let Predicates = [FeatureVectorEnhancements2] in {
+ // Load byte-reversed elements.
+ def VLBR : UnaryVRXGeneric<"vlbr", 0xE606>;
+ def VLBRH : UnaryVRX<"vlbrh", 0xE606, z_loadbswap, v128h, 16, 1>;
+ def VLBRF : UnaryVRX<"vlbrf", 0xE606, z_loadbswap, v128f, 16, 2>;
+ def VLBRG : UnaryVRX<"vlbrg", 0xE606, z_loadbswap, v128g, 16, 3>;
+ def VLBRQ : UnaryVRX<"vlbrq", 0xE606, null_frag, v128q, 16, 4>;
+
+ // Load elements reversed.
+ def VLER : UnaryVRXGeneric<"vler", 0xE607>;
+ def VLERH : UnaryVRX<"vlerh", 0xE607, z_loadeswap, v128h, 16, 1>;
+ def VLERF : UnaryVRX<"vlerf", 0xE607, z_loadeswap, v128f, 16, 2>;
+ def VLERG : UnaryVRX<"vlerg", 0xE607, z_loadeswap, v128g, 16, 3>;
+ def : Pat<(v4f32 (z_loadeswap bdxaddr12only:$addr)),
+ (VLERF bdxaddr12only:$addr)>;
+ def : Pat<(v2f64 (z_loadeswap bdxaddr12only:$addr)),
+ (VLERG bdxaddr12only:$addr)>;
+ def : Pat<(v16i8 (z_loadeswap bdxaddr12only:$addr)),
+ (VLBRQ bdxaddr12only:$addr)>;
+
+ // Load byte-reversed element.
+ def VLEBRH : TernaryVRX<"vlebrh", 0xE601, z_vlebri16, v128h, v128h, 2, imm32zx3>;
+ def VLEBRF : TernaryVRX<"vlebrf", 0xE603, z_vlebri32, v128f, v128f, 4, imm32zx2>;
+ def VLEBRG : TernaryVRX<"vlebrg", 0xE602, z_vlebri64, v128g, v128g, 8, imm32zx1>;
+
+ // Load byte-reversed element and zero.
+ def VLLEBRZ : UnaryVRXGeneric<"vllebrz", 0xE604>;
+ def VLLEBRZH : UnaryVRX<"vllebrzh", 0xE604, z_vllebrzi16, v128h, 2, 1>;
+ def VLLEBRZF : UnaryVRX<"vllebrzf", 0xE604, z_vllebrzi32, v128f, 4, 2>;
+ def VLLEBRZG : UnaryVRX<"vllebrzg", 0xE604, z_vllebrzi64, v128g, 8, 3>;
+ def VLLEBRZE : UnaryVRX<"vllebrze", 0xE604, z_vllebrzli32, v128f, 4, 6>;
+ def : InstAlias<"lerv\t$V1, $XBD2",
+ (VLLEBRZE VR128:$V1, bdxaddr12only:$XBD2), 0>;
+ def : InstAlias<"ldrv\t$V1, $XBD2",
+ (VLLEBRZG VR128:$V1, bdxaddr12only:$XBD2), 0>;
+
+ // Load byte-reversed element and replicate.
+ def VLBRREP : UnaryVRXGeneric<"vlbrrep", 0xE605>;
+ def VLBRREPH : UnaryVRX<"vlbrreph", 0xE605, z_replicate_loadbswapi16, v128h, 2, 1>;
+ def VLBRREPF : UnaryVRX<"vlbrrepf", 0xE605, z_replicate_loadbswapi32, v128f, 4, 2>;
+ def VLBRREPG : UnaryVRX<"vlbrrepg", 0xE605, z_replicate_loadbswapi64, v128g, 8, 3>;
+
+ // Store byte-reversed elements.
+ def VSTBR : StoreVRXGeneric<"vstbr", 0xE60E>;
+ def VSTBRH : StoreVRX<"vstbrh", 0xE60E, z_storebswap, v128h, 16, 1>;
+ def VSTBRF : StoreVRX<"vstbrf", 0xE60E, z_storebswap, v128f, 16, 2>;
+ def VSTBRG : StoreVRX<"vstbrg", 0xE60E, z_storebswap, v128g, 16, 3>;
+ def VSTBRQ : StoreVRX<"vstbrq", 0xE60E, null_frag, v128q, 16, 4>;
+
+ // Store elements reversed.
+ def VSTER : StoreVRXGeneric<"vster", 0xE60F>;
+ def VSTERH : StoreVRX<"vsterh", 0xE60F, z_storeeswap, v128h, 16, 1>;
+ def VSTERF : StoreVRX<"vsterf", 0xE60F, z_storeeswap, v128f, 16, 2>;
+ def VSTERG : StoreVRX<"vsterg", 0xE60F, z_storeeswap, v128g, 16, 3>;
+ def : Pat<(z_storeeswap (v4f32 VR128:$val), bdxaddr12only:$addr),
+ (VSTERF VR128:$val, bdxaddr12only:$addr)>;
+ def : Pat<(z_storeeswap (v2f64 VR128:$val), bdxaddr12only:$addr),
+ (VSTERG VR128:$val, bdxaddr12only:$addr)>;
+ def : Pat<(z_storeeswap (v16i8 VR128:$val), bdxaddr12only:$addr),
+ (VSTBRQ VR128:$val, bdxaddr12only:$addr)>;
+
+ // Store byte-reversed element.
+ def VSTEBRH : StoreBinaryVRX<"vstebrh", 0xE609, z_vstebri16, v128h, 2, imm32zx3>;
+ def VSTEBRF : StoreBinaryVRX<"vstebrf", 0xE60B, z_vstebri32, v128f, 4, imm32zx2>;
+ def VSTEBRG : StoreBinaryVRX<"vstebrg", 0xE60A, z_vstebri64, v128g, 8, imm32zx1>;
+ def : InstAlias<"sterv\t$V1, $XBD2",
+ (VSTEBRF VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
+ def : InstAlias<"stdrv\t$V1, $XBD2",
+ (VSTEBRG VR128:$V1, bdxaddr12only:$XBD2, 0), 0>;
+}
+
+//===----------------------------------------------------------------------===//
// Selects and permutes
//===----------------------------------------------------------------------===//
@@ -706,6 +781,10 @@ let Predicates = [FeatureVector] in {
def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
(VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
+ // Shift left double by bit.
+ let Predicates = [FeatureVectorEnhancements2] in
+ def VSLD : TernaryVRId<"vsld", 0xE786, int_s390_vsld, v128b, v128b, 0>;
+
// Shift right arithmetic.
def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
@@ -718,6 +797,10 @@ let Predicates = [FeatureVector] in {
// Shift right logical by byte.
def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
+ // Shift right double by bit.
+ let Predicates = [FeatureVectorEnhancements2] in
+ def VSRD : TernaryVRId<"vsrd", 0xE787, int_s390_vsrd, v128b, v128b, 0>;
+
// Subtract.
def VS : BinaryVRRcGeneric<"vs", 0xE7F7>;
def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
@@ -945,23 +1028,41 @@ let Predicates = [FeatureVector] in {
}
}
- // Convert from fixed 64-bit.
+ // Convert from fixed.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VCDG : TernaryVRRaFloatGeneric<"vcdg", 0xE7C3>;
def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
}
def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
+ let Predicates = [FeatureVectorEnhancements2] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in
+ def VCFPS : TernaryVRRaFloatGeneric<"vcfps", 0xE7C3>;
+ def VCEFB : TernaryVRRa<"vcefb", 0xE7C3, null_frag, v128sb, v128g, 2, 0>;
+ def WCEFB : TernaryVRRa<"wcefb", 0xE7C3, null_frag, v32sb, v32f, 2, 8>;
+ }
+ def : FPConversion<VCEFB, sint_to_fp, v128sb, v128f, 0, 0>;
+ }
- // Convert from logical 64-bit.
+ // Convert from logical.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VCDLG : TernaryVRRaFloatGeneric<"vcdlg", 0xE7C1>;
def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
}
def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
+ let Predicates = [FeatureVectorEnhancements2] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in
+ def VCFPL : TernaryVRRaFloatGeneric<"vcfpl", 0xE7C1>;
+ def VCELFB : TernaryVRRa<"vcelfb", 0xE7C1, null_frag, v128sb, v128g, 2, 0>;
+ def WCELFB : TernaryVRRa<"wcelfb", 0xE7C1, null_frag, v32sb, v32f, 2, 8>;
+ }
+ def : FPConversion<VCELFB, uint_to_fp, v128sb, v128f, 0, 0>;
+ }
- // Convert to fixed 64-bit.
+ // Convert to fixed.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VCGD : TernaryVRRaFloatGeneric<"vcgd", 0xE7C2>;
def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
@@ -969,8 +1070,18 @@ let Predicates = [FeatureVector] in {
}
// Rounding mode should agree with SystemZInstrFP.td.
def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
+ let Predicates = [FeatureVectorEnhancements2] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in
+ def VCSFP : TernaryVRRaFloatGeneric<"vcsfp", 0xE7C2>;
+ def VCFEB : TernaryVRRa<"vcfeb", 0xE7C2, null_frag, v128sb, v128g, 2, 0>;
+ def WCFEB : TernaryVRRa<"wcfeb", 0xE7C2, null_frag, v32sb, v32f, 2, 8>;
+ }
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCFEB, fp_to_sint, v128f, v128sb, 0, 5>;
+ }
- // Convert to logical 64-bit.
+ // Convert to logical.
let Uses = [FPC], mayRaiseFPException = 1 in {
def VCLGD : TernaryVRRaFloatGeneric<"vclgd", 0xE7C0>;
def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
@@ -978,6 +1089,16 @@ let Predicates = [FeatureVector] in {
}
// Rounding mode should agree with SystemZInstrFP.td.
def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
+ let Predicates = [FeatureVectorEnhancements2] in {
+ let Uses = [FPC], mayRaiseFPException = 1 in {
+ let isAsmParserOnly = 1 in
+ def VCLFP : TernaryVRRaFloatGeneric<"vclfp", 0xE7C0>;
+ def VCLFEB : TernaryVRRa<"vclfeb", 0xE7C0, null_frag, v128sb, v128g, 2, 0>;
+ def WCLFEB : TernaryVRRa<"wclfeb", 0xE7C0, null_frag, v32sb, v32f, 2, 8>;
+ }
+ // Rounding mode should agree with SystemZInstrFP.td.
+ def : FPConversion<VCLFEB, fp_to_uint, v128f, v128sb, 0, 5>;
+ }
// Divide.
let Uses = [FPC], mayRaiseFPException = 1 in {
@@ -1568,6 +1689,24 @@ let Predicates = [FeatureVector] in {
z_vstrcz_cc, v128f, v128f, 2, 2>;
}
+let Predicates = [FeatureVectorEnhancements2] in {
+ defm VSTRS : TernaryExtraVRRdGeneric<"vstrs", 0xE78B>;
+ defm VSTRSB : TernaryExtraVRRd<"vstrsb", 0xE78B,
+ z_vstrs_cc, v128b, v128b, 0>;
+ defm VSTRSH : TernaryExtraVRRd<"vstrsh", 0xE78B,
+ z_vstrs_cc, v128b, v128h, 1>;
+ defm VSTRSF : TernaryExtraVRRd<"vstrsf", 0xE78B,
+ z_vstrs_cc, v128b, v128f, 2>;
+ let Defs = [CC] in {
+ def VSTRSZB : TernaryVRRd<"vstrszb", 0xE78B,
+ z_vstrsz_cc, v128b, v128b, 0, 2>;
+ def VSTRSZH : TernaryVRRd<"vstrszh", 0xE78B,
+ z_vstrsz_cc, v128b, v128h, 1, 2>;
+ def VSTRSZF : TernaryVRRd<"vstrszf", 0xE78B,
+ z_vstrsz_cc, v128b, v128f, 2, 2>;
+ }
+}
+
//===----------------------------------------------------------------------===//
// Packed-decimal instructions
//===----------------------------------------------------------------------===//
@@ -1579,6 +1718,10 @@ let Predicates = [FeatureVectorPackedDec
def VUPKZ : StoreLengthVSI<"vupkz", 0xE63C, null_frag, 0>;
let Defs = [CC] in {
+ let Predicates = [FeatureVectorPackedDecimalEnhancement] in {
+ def VCVBOpt : TernaryVRRi<"vcvb", 0xE650, GR32>;
+ def VCVBGOpt : TernaryVRRi<"vcvbg", 0xE652, GR64>;
+ }
def VCVB : BinaryVRRi<"vcvb", 0xE650, GR32>;
def VCVBG : BinaryVRRi<"vcvbg", 0xE652, GR64>;
def VCVD : TernaryVRIi<"vcvd", 0xE658, GR32>;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZOperators.td?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZOperators.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZOperators.td Fri Jul 12 11:13:16 2019
@@ -191,6 +191,12 @@ def SDT_ZVecTernary : SDTypeProf
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>]>;
+def SDT_ZVecTernaryConvCC : SDTypeProfile<2, 3,
+ [SDTCisVec<0>,
+ SDTCisVT<1, i32>,
+ SDTCisVec<2>,
+ SDTCisSameAs<2, 3>,
+ SDTCisSameAs<0, 4>]>;
def SDT_ZVecTernaryInt : SDTypeProfile<1, 3,
[SDTCisVec<0>,
SDTCisSameAs<0, 1>,
@@ -278,6 +284,10 @@ def z_loadbswap : SDNode<"SystemZ
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def z_storebswap : SDNode<"SystemZISD::STRV", SDTStore,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
+def z_loadeswap : SDNode<"SystemZISD::VLER", SDTLoad,
+ [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
+def z_storeeswap : SDNode<"SystemZISD::VSTER", SDTStore,
+ [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def z_tdc : SDNode<"SystemZISD::TDC", SDT_ZTest>;
@@ -337,6 +347,10 @@ def z_vstrc_cc : SDNode<"System
SDT_ZVecQuaternaryIntCC>;
def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC",
SDT_ZVecQuaternaryIntCC>;
+def z_vstrs_cc : SDNode<"SystemZISD::VSTRS_CC",
+ SDT_ZVecTernaryConvCC>;
+def z_vstrsz_cc : SDNode<"SystemZISD::VSTRSZ_CC",
+ SDT_ZVecTernaryConvCC>;
def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvIntCC>;
class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
@@ -661,6 +675,18 @@ def z_usub : PatFrags<(ops node:$src1, n
[(z_usubo node:$src1, node:$src2),
(sub node:$src1, node:$src2)]>;
+// Combined logical operations.
+def andc : PatFrag<(ops node:$src1, node:$src2),
+ (and node:$src1, (not node:$src2))>;
+def orc : PatFrag<(ops node:$src1, node:$src2),
+ (or node:$src1, (not node:$src2))>;
+def nand : PatFrag<(ops node:$src1, node:$src2),
+ (not (and node:$src1, node:$src2))>;
+def nor : PatFrag<(ops node:$src1, node:$src2),
+ (not (or node:$src1, node:$src2))>;
+def nxor : PatFrag<(ops node:$src1, node:$src2),
+ (not (xor node:$src1, node:$src2))>;
+
// Fused multiply-subtract, using the natural operand order.
def any_fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
(any_fma node:$src1, node:$src2, (fneg node:$src3))>;
@@ -722,6 +748,10 @@ def z_replicate_loadi32 : z_replicate_lo
def z_replicate_loadi64 : z_replicate_load<i64, load>;
def z_replicate_loadf32 : z_replicate_load<f32, load>;
def z_replicate_loadf64 : z_replicate_load<f64, load>;
+// Byte-swapped replicated vector element loads.
+def z_replicate_loadbswapi16 : z_replicate_load<i32, z_loadbswap16>;
+def z_replicate_loadbswapi32 : z_replicate_load<i32, z_loadbswap32>;
+def z_replicate_loadbswapi64 : z_replicate_load<i64, z_loadbswap64>;
// Load a scalar and insert it into a single element of a vector.
class z_vle<ValueType scalartype, SDPatternOperator load>
@@ -734,6 +764,10 @@ def z_vlei32 : z_vle<i32, load>;
def z_vlei64 : z_vle<i64, load>;
def z_vlef32 : z_vle<f32, load>;
def z_vlef64 : z_vle<f64, load>;
+// Byte-swapped vector element loads.
+def z_vlebri16 : z_vle<i32, z_loadbswap16>;
+def z_vlebri32 : z_vle<i32, z_loadbswap32>;
+def z_vlebri64 : z_vle<i64, z_loadbswap64>;
// Load a scalar and insert it into the low element of the high i64 of a
// zeroed vector.
@@ -778,6 +812,18 @@ def z_vllezlf32 : PatFrag<(ops node:$add
(v2i64
(bitconvert (v4f32 immAllZerosV))))>;
+// Byte-swapped variants.
+def z_vllebrzi16 : z_vllez<i32, z_loadbswap16, 3>;
+def z_vllebrzi32 : z_vllez<i32, z_loadbswap32, 1>;
+def z_vllebrzli32 : z_vllez<i32, z_loadbswap32, 0>;
+def z_vllebrzi64 : PatFrags<(ops node:$addr),
+ [(z_vector_insert immAllZerosV,
+ (i64 (z_loadbswap64 node:$addr)),
+ (i32 0)),
+ (z_join_dwords (i64 (z_loadbswap64 node:$addr)),
+ (i64 0))]>;
+
+
// Store one element of a vector.
class z_vste<ValueType scalartype, SDPatternOperator store>
: PatFrag<(ops node:$vec, node:$addr, node:$index),
@@ -789,6 +835,10 @@ def z_vstei32 : z_vste<i32, store>;
def z_vstei64 : z_vste<i64, store>;
def z_vstef32 : z_vste<f32, store>;
def z_vstef64 : z_vste<f64, store>;
+// Byte-swapped vector element stores.
+def z_vstebri16 : z_vste<i32, z_storebswap16>;
+def z_vstebri32 : z_vste<i32, z_storebswap32>;
+def z_vstebri64 : z_vste<i64, z_storebswap64>;
// Arithmetic negation on vectors.
def z_vneg : PatFrag<(ops node:$x), (sub immAllZerosV, node:$x)>;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZProcessors.td Fri Jul 12 11:13:16 2019
@@ -35,3 +35,5 @@ def : ProcessorModel<"z13", Z13Model, Ar
def : ProcessorModel<"arch12", Z14Model, Arch12SupportedFeatures.List>;
def : ProcessorModel<"z14", Z14Model, Arch12SupportedFeatures.List>;
+def : ProcessorModel<"arch13", Arch13Model, Arch13SupportedFeatures.List>;
+
Modified: llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZRegisterInfo.cpp Fri Jul 12 11:13:16 2019
@@ -96,17 +96,21 @@ SystemZRegisterInfo::getRegAllocationHin
if (!DoneRegs.insert(Reg).second)
continue;
- for (auto &Use : MRI->use_instructions(Reg)) {
+ for (auto &Use : MRI->reg_instructions(Reg)) {
// For LOCRMux, see if the other operand is already a high or low
- // register, and in that case give the correpsonding hints for
+ // register, and in that case give the corresponding hints for
// VirtReg. LOCR instructions need both operands in either high or
- // low parts.
- if (Use.getOpcode() == SystemZ::LOCRMux) {
+ // low parts. Same handling for SELRMux.
+ if (Use.getOpcode() == SystemZ::LOCRMux ||
+ Use.getOpcode() == SystemZ::SELRMux) {
MachineOperand &TrueMO = Use.getOperand(1);
MachineOperand &FalseMO = Use.getOperand(2);
const TargetRegisterClass *RC =
TRI->getCommonSubClass(getRC32(FalseMO, VRM, MRI),
getRC32(TrueMO, VRM, MRI));
+ if (Use.getOpcode() == SystemZ::SELRMux)
+ RC = TRI->getCommonSubClass(RC,
+ getRC32(Use.getOperand(0), VRM, MRI));
if (RC && RC != &SystemZ::GRX32BitRegClass) {
addHints(Order, Hints, RC, MRI);
// Return true to make these hints the only regs available to
Modified: llvm/trunk/lib/Target/SystemZ/SystemZSchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZSchedule.td?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZSchedule.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZSchedule.td Fri Jul 12 11:13:16 2019
@@ -59,6 +59,7 @@ def VBU : SchedWrite; // Virtual branchi
def MCD : SchedWrite; // Millicode
+include "SystemZScheduleArch13.td"
include "SystemZScheduleZ14.td"
include "SystemZScheduleZ13.td"
include "SystemZScheduleZEC12.td"
Added: llvm/trunk/lib/Target/SystemZ/SystemZScheduleArch13.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZScheduleArch13.td?rev=365932&view=auto
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZScheduleArch13.td (added)
+++ llvm/trunk/lib/Target/SystemZ/SystemZScheduleArch13.td Fri Jul 12 11:13:16 2019
@@ -0,0 +1,1695 @@
+//-- SystemZScheduleArch13.td - SystemZ Scheduling Definitions ----*- tblgen -*-=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for Arch13 to support instruction
+// scheduling and other instruction cost heuristics.
+//
+// Pseudos expanded right after isel do not need to be modelled here.
+//
+//===----------------------------------------------------------------------===//
+
+def Arch13Model : SchedMachineModel {
+
+ let UnsupportedFeatures = Arch13UnsupportedFeatures.List;
+
+ let IssueWidth = 6; // Number of instructions decoded per cycle.
+ let MicroOpBufferSize = 60; // Issue queues
+ let LoadLatency = 1; // Optimistic load latency.
+
+ let PostRAScheduler = 1;
+
+ // Extra cycles for a mispredicted branch.
+ let MispredictPenalty = 20;
+}
+
+let SchedModel = Arch13Model in {
+// These definitions need the SchedModel value. They could be put in a
+// subtarget common include file, but it seems the include system in Tablegen
+// currently (2016) rejects multiple includes of same file.
+
+// Decoder grouping rules
+let NumMicroOps = 1 in {
+ def : WriteRes<NormalGr, []>;
+ def : WriteRes<BeginGroup, []> { let BeginGroup = 1; }
+ def : WriteRes<EndGroup, []> { let EndGroup = 1; }
+}
+def : WriteRes<Cracked, []> {
+ let NumMicroOps = 2;
+ let BeginGroup = 1;
+}
+def : WriteRes<GroupAlone, []> {
+ let NumMicroOps = 3;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone2, []> {
+ let NumMicroOps = 6;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+def : WriteRes<GroupAlone3, []> {
+ let NumMicroOps = 9;
+ let BeginGroup = 1;
+ let EndGroup = 1;
+}
+
+// Incoming latency removed from the register operand which is used together
+// with a memory operand by the instruction.
+def : ReadAdvance<RegReadAdv, 4>;
+
+// LoadLatency (above) is not used for instructions in this file. This is
+// instead the role of LSULatency, which is the latency value added to the
+// result of loads and instructions with folded memory operands.
+def : WriteRes<LSULatency, []> { let Latency = 4; let NumMicroOps = 0; }
+
+let NumMicroOps = 0 in {
+ foreach L = 1-30 in
+ def : WriteRes<!cast<SchedWrite>("WLat"#L), []> { let Latency = L; }
+}
+
+// Execution units.
+def Arch13_FXaUnit : ProcResource<2>;
+def Arch13_FXbUnit : ProcResource<2>;
+def Arch13_LSUnit : ProcResource<2>;
+def Arch13_VecUnit : ProcResource<2>;
+def Arch13_VecFPdUnit : ProcResource<2> { let BufferSize = 1; /* blocking */ }
+def Arch13_VBUnit : ProcResource<2>;
+def Arch13_MCD : ProcResource<1>;
+
+// Subtarget specific definitions of scheduling resources.
+let NumMicroOps = 0 in {
+ def : WriteRes<FXa, [Arch13_FXaUnit]>;
+ def : WriteRes<FXb, [Arch13_FXbUnit]>;
+ def : WriteRes<LSU, [Arch13_LSUnit]>;
+ def : WriteRes<VecBF, [Arch13_VecUnit]>;
+ def : WriteRes<VecDF, [Arch13_VecUnit]>;
+ def : WriteRes<VecDFX, [Arch13_VecUnit]>;
+ def : WriteRes<VecMul, [Arch13_VecUnit]>;
+ def : WriteRes<VecStr, [Arch13_VecUnit]>;
+ def : WriteRes<VecXsPm, [Arch13_VecUnit]>;
+ foreach Num = 2-5 in { let ResourceCycles = [Num] in {
+ def : WriteRes<!cast<SchedWrite>("FXa"#Num), [Arch13_FXaUnit]>;
+ def : WriteRes<!cast<SchedWrite>("FXb"#Num), [Arch13_FXbUnit]>;
+ def : WriteRes<!cast<SchedWrite>("LSU"#Num), [Arch13_LSUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecBF"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecDF"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecDFX"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecMul"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecStr"#Num), [Arch13_VecUnit]>;
+ def : WriteRes<!cast<SchedWrite>("VecXsPm"#Num), [Arch13_VecUnit]>;
+ }}
+
+ def : WriteRes<VecFPd, [Arch13_VecFPdUnit]> { let ResourceCycles = [30]; }
+
+ def : WriteRes<VBU, [Arch13_VBUnit]>; // Virtual Branching Unit
+}
+
+def : WriteRes<MCD, [Arch13_MCD]> { let NumMicroOps = 3;
+ let BeginGroup = 1;
+ let EndGroup = 1; }
+
+// -------------------------- INSTRUCTIONS ---------------------------------- //
+
+// InstRW constructs have been used in order to preserve the
+// readability of the InstrInfo files.
+
+// For each instruction, as matched by a regexp, provide a list of
+// resources that it needs. These will be combined into a SchedClass.
+
+//===----------------------------------------------------------------------===//
+// Stack allocation
+//===----------------------------------------------------------------------===//
+
+// Pseudo -> LA / LAY
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ADJDYNALLOC$")>;
+
+//===----------------------------------------------------------------------===//
+// Branch instructions
+//===----------------------------------------------------------------------===//
+
+// Branch
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?BRC(L)?(Asm.*)?$")>;
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Call)?J(G)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?BC(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "(Call)?B(R)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "BI(C)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXa, EndGroup], (instregex "BRCT(G)?$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BRCTH$")>;
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BCT(G)?(R)?$")>;
+def : InstRW<[WLat1, FXa2, FXb2, GroupAlone2],
+ (instregex "B(R)?X(H|L).*$")>;
+
+// Compare and branch
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?(G)?(I|R)J(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb2, GroupAlone],
+ (instregex "C(L)?(G)?(I|R)B(Call|Return|Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Trap instructions
+//===----------------------------------------------------------------------===//
+
+// Trap
+def : InstRW<[WLat1, VBU, NormalGr], (instregex "(Cond)?Trap$")>;
+
+// Compare and trap
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?(I|R)T(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(G)?RT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CL(F|G)IT(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "CL(G)?T(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Call and return instructions
+//===----------------------------------------------------------------------===//
+
+// Call
+def : InstRW<[WLat1, VBU, FXa2, GroupAlone], (instregex "(Call)?BRAS$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BRASL$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "(Call)?BAS(R)?$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "TLS_(G|L)DCALL$")>;
+
+// Return
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Return$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CondReturn$")>;
+
+//===----------------------------------------------------------------------===//
+// Move instructions
+//===----------------------------------------------------------------------===//
+
+// Moves
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MV(G|H)?HI$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "MVI(Y)?$")>;
+
+// Move character
+def : InstRW<[WLat1, FXb, LSU3, GroupAlone], (instregex "MVC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVCL(E|U)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "MVCRL$")>;
+
+// Pseudo -> reg move
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "COPY(_TO_REGCLASS)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "EXTRACT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "INSERT_SUBREG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "REG_SEQUENCE$")>;
+
+// Loads
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L(Y|FH|RL|Mux)?$")>;
+def : InstRW<[LSULatency, LSULatency, LSU, NormalGr], (instregex "LCBB$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LG(RL)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "L128$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIH(F|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLIL(F|H|L)$")>;
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(F|H)I$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LHI(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LR(Mux)?$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LZR(F|G)$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "L(FH|G)?AT$")>;
+
+// Load and test
+def : InstRW<[WLat1LSU, WLat1LSU, LSU, FXa, NormalGr], (instregex "LT(G)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LT(G)?R$")>;
+
+// Stores
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STG(RL)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST128$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(Y|FH|RL|Mux)?$")>;
+
+// String moves.
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "MVST$")>;
+
+//===----------------------------------------------------------------------===//
+// Conditional move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOCRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|FH)?R(Asm.*)?$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "LOC(G|H)?HI(Mux|(Asm.*))?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "LOC(G|FH|Mux)?(Asm.*)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr],
+ (instregex "STOC(G|FH|Mux)?(Asm.*)?$")>;
+
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SELRMux$")>;
+def : InstRW<[WLat2, FXa, NormalGr], (instregex "SEL(G|FH)?R(Asm.*)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Sign extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "L(B|H|G)R$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LG(B|H|F)R$")>;
+
+def : InstRW<[WLat1LSU, WLat1LSU, FXa, LSU, NormalGr], (instregex "LTGF$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LTGFR$")>;
+
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LB(H|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(Y)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LH(H|Mux|RL)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(B|H|F)$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LG(H|F)RL$")>;
+
+//===----------------------------------------------------------------------===//
+// Zero extensions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLCR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLHR(Mux)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LLG(C|H|F|T)R$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLC(Mux)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLH(Mux)?$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LL(C|H)H$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLHRL$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLG(C|H|F|T|HRL|FRL)$")>;
+
+// Load and zero rightmost byte
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLZRGF$")>;
+
+// Load and trap
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "LLG(F|T)?AT$")>;
+
+//===----------------------------------------------------------------------===//
+// Truncations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STC(H|Y|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STH(H|Y|RL|Mux)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STCM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Multi-register moves
+//===----------------------------------------------------------------------===//
+
+// Load multiple (estimated average of 5 ops)
+def : InstRW<[WLat10, WLat10, LSU5, GroupAlone], (instregex "LM(H|Y|G)?$")>;
+
+// Load multiple disjoint
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LMD$")>;
+
+// Store multiple
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone], (instregex "STM(G|H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LRV(G)?R$")>;
+def : InstRW<[WLat1LSU, FXa, LSU, NormalGr], (instregex "LRV(G|H)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STRV(G|H)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCIN$")>;
+
+//===----------------------------------------------------------------------===//
+// Load address instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LA(Y|RL)?$")>;
+
+// Load the Global Offset Table address ( -> larl )
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "GOT$")>;
+
+//===----------------------------------------------------------------------===//
+// Absolute and Negation
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LP(G)?R$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "L(N|P)GFR$")>;
+def : InstRW<[WLat1, WLat1, FXa, NormalGr], (instregex "LN(R|GR)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "LC(R|GR)$")>;
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "LCGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Insertion
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "IC(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "IC32(Y)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, WLat1LSU, FXa, LSU, NormalGr],
+ (instregex "ICM(H|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "II(F|H|L)Mux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "IILL(64)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Addition
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "A(Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AIH$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AG$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGFI$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHI(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AHIMux(K)?$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AL(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AL(FI|HSIK)$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "ALG(F)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGHSIK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "AR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "A(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "A(L)?HHLR$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ALSIH(N)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "A(L)?(G)?SI$")>;
+
+// Logical addition with carry
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+ (instregex "ALC(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "ALC(G)?R$")>;
+
+// Add with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "AG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "AGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// Subtraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "S(G|Y)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SH(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLFI$")>;
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SL(G|GF|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGF(I|R)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "S(L)?HHHR$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "S(L)?HHLR$")>;
+
+// Subtraction with borrow
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, GroupAlone],
+ (instregex "SLB(G)?$")>;
+def : InstRW<[WLat2, WLat2, FXa, GroupAlone], (instregex "SLB(G)?R$")>;
+
+// Subtraction with sign extension (16/32 -> 64)
+def : InstRW<[WLat2LSU, WLat2LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "SG(F|H)$")>;
+def : InstRW<[WLat2, WLat2, FXa, NormalGr], (instregex "SGFR$")>;
+
+//===----------------------------------------------------------------------===//
+// AND
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "N(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "NI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "NC$")>;
+
+//===----------------------------------------------------------------------===//
+// OR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "O(G|Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OGR(K)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "OI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OI(FMux|HMux|LMux)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OIHL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILH(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OILL(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "OC$")>;
+
+//===----------------------------------------------------------------------===//
+// XOR
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, WLat1LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "X(G|Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "XI(Y)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIFMux$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XGR(K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XIHF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XILF(64)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "XR(K)?$")>;
+def : InstRW<[WLat3LSU, LSU2, FXb, Cracked], (instregex "XC$")>;
+
+//===----------------------------------------------------------------------===//
+// Combined logical operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "OC(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NN(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NO(G)?RK$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "NX(G)?RK$")>;
+
+//===----------------------------------------------------------------------===//
+// Multiplication
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat5LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MS(GF|Y)?$")>;
+def : InstRW<[WLat5, FXa, NormalGr], (instregex "MS(R|FI)$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MSG$")>;
+def : InstRW<[WLat7, FXa, NormalGr], (instregex "MSGR$")>;
+def : InstRW<[WLat5, FXa, NormalGr], (instregex "MSGF(I|R)$")>;
+def : InstRW<[WLat8LSU, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MLG$")>;
+def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MLGR$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MGHI$")>;
+def : InstRW<[WLat4, FXa, NormalGr], (instregex "MHI$")>;
+def : InstRW<[WLat4LSU, RegReadAdv, FXa, LSU, NormalGr], (instregex "MH(Y)?$")>;
+def : InstRW<[WLat6, FXa2, GroupAlone], (instregex "M(L)?R$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, FXa2, LSU, GroupAlone],
+ (instregex "M(FY|L)?$")>;
+def : InstRW<[WLat8, RegReadAdv, FXa, LSU, NormalGr], (instregex "MGH$")>;
+def : InstRW<[WLat12, RegReadAdv, FXa2, LSU, GroupAlone], (instregex "MG$")>;
+def : InstRW<[WLat8, FXa2, GroupAlone], (instregex "MGRK$")>;
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MSC$")>;
+def : InstRW<[WLat8LSU, WLat8LSU, RegReadAdv, FXa, LSU, NormalGr],
+ (instregex "MSGC$")>;
+def : InstRW<[WLat6, WLat6, FXa, NormalGr], (instregex "MSRKC$")>;
+def : InstRW<[WLat8, WLat8, FXa, NormalGr], (instregex "MSGRKC$")>;
+
+//===----------------------------------------------------------------------===//
+// Division and remainder
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2], (instregex "D$")>;
+def : InstRW<[WLat30, FXa2, GroupAlone], (instregex "DSG(F)?R$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa2, LSU, GroupAlone2],
+ (instregex "DSG(F)?$")>;
+def : InstRW<[WLat20, FXa4, GroupAlone], (instregex "DLR$")>;
+def : InstRW<[WLat30, FXa4, GroupAlone], (instregex "DLGR$")>;
+def : InstRW<[WLat30, RegReadAdv, FXa4, LSU, GroupAlone2],
+ (instregex "DL(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Shifts
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRL(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SRA(G|K)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "SLA(G|K)?$")>;
+def : InstRW<[WLat5LSU, WLat5LSU, FXa4, LSU, GroupAlone2],
+ (instregex "S(L|R)D(A|L)$")>;
+
+// Rotate
+def : InstRW<[WLat2LSU, FXa, LSU, NormalGr], (instregex "RLL(G)?$")>;
+
+// Rotate and insert
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBG(N|32)?$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBH(G|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBL(G|H|L)$")>;
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "RISBMux$")>;
+
+// Rotate and Select
+def : InstRW<[WLat2, WLat2, FXa2, Cracked], (instregex "R(N|O|X)SBG$")>;
+
+//===----------------------------------------------------------------------===//
+// Comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "C(G|Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(F|H)I(Mux)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CG(F|H)I$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CG(HSI|RL)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(G)?R$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CIH$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CHSI$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "CL(Y|Mux)?$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLFHSI$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLFI(Mux)?$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLG$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLG(HRL|HSI)$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLGF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGFRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGF(I|R)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLGR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLGRL$")>;
+def : InstRW<[WLat1LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CLHF$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLH(RL|HSI)$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLIH$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLI(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "CLR$")>;
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "CLRL$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "C(L)?HHR$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "C(L)?HLR$")>;
+
+// Compare halfword
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CH(Y)?$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CHRL$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGH$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGHRL$")>;
+def : InstRW<[WLat2LSU, FXa, FXb, LSU, Cracked], (instregex "CHHSI$")>;
+
+// Compare with sign extension (32 -> 64)
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr], (instregex "CGF$")>;
+def : InstRW<[WLat2LSU, FXb, LSU, NormalGr], (instregex "CGFRL$")>;
+def : InstRW<[WLat2, FXb, NormalGr], (instregex "CGFR$")>;
+
+// Compare logical character
+def : InstRW<[WLat6, FXb, LSU2, Cracked], (instregex "CLC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLCL(E|U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CLST$")>;
+
+// Test under mask
+def : InstRW<[WLat1LSU, FXb, LSU, NormalGr], (instregex "TM(Y)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TM(H|L)Mux$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMHL(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLH(64)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TMLL(64)?$")>;
+
+// Compare logical characters under mask
+def : InstRW<[WLat2LSU, RegReadAdv, FXb, LSU, NormalGr],
+ (instregex "CLM(H|Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Prefetch and execution hint
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU, NormalGr], (instregex "PFD(RL)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "BPP$")>;
+def : InstRW<[FXb, EndGroup], (instregex "BPRP$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "NIAI$")>;
+
+//===----------------------------------------------------------------------===//
+// Atomic operations
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "Serialize$")>;
+
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAA(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAAL(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAN(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAO(G)?$")>;
+def : InstRW<[WLat2LSU, WLat2LSU, FXb, LSU, NormalGr], (instregex "LAX(G)?$")>;
+
+// Test and set
+def : InstRW<[WLat2LSU, FXb, LSU, EndGroup], (instregex "TS$")>;
+
+// Compare and swap
+def : InstRW<[WLat3LSU, WLat3LSU, FXa, FXb, LSU, GroupAlone],
+ (instregex "CS(G|Y)?$")>;
+
+// Compare double and swap
+def : InstRW<[WLat6LSU, WLat6LSU, FXa3, FXb2, LSU, GroupAlone2],
+ (instregex "CDS(Y)?$")>;
+def : InstRW<[WLat15, WLat15, FXa2, FXb4, LSU3,
+ GroupAlone3], (instregex "CDSG$")>;
+
+// Compare and swap and store
+def : InstRW<[WLat30, MCD], (instregex "CSST$")>;
+
+// Perform locked operation
+def : InstRW<[WLat30, MCD], (instregex "PLO$")>;
+
+// Load/store pair from/to quadword
+def : InstRW<[WLat4LSU, LSU2, GroupAlone], (instregex "LPQ$")>;
+def : InstRW<[WLat1, FXb2, LSU, GroupAlone], (instregex "STPQ$")>;
+
+// Load pair disjoint
+def : InstRW<[WLat1LSU, WLat1LSU, LSU2, GroupAlone], (instregex "LPD(G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Translate and convert
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "TR$")>;
+def : InstRW<[WLat30, WLat30, WLat30, FXa3, LSU2, GroupAlone2],
+ (instregex "TRT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRTR$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TRE$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TRT(R)?E(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "TR(T|O)(T|O)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "CU(12|14|21|24|41|42)(Opt)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "(CUUTF|CUTFU)(Opt)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Message-security assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD],
+ (instregex "KM(C|F|O|CTR|A)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "(KIMD|KLMD|KMAC|KDSA)$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD],
+ (instregex "(PCC|PPNO|PRNO)$")>;
+
+//===----------------------------------------------------------------------===//
+// Guarded storage
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LGG$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LLGFSG$")>;
+def : InstRW<[WLat30, MCD], (instregex "(L|ST)GSC$")>;
+
+//===----------------------------------------------------------------------===//
+// Decimal arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF2, LSU2, GroupAlone2],
+ (instregex "CVBG$")>;
+def : InstRW<[WLat20, RegReadAdv, FXb, VecDF, LSU, GroupAlone2],
+ (instregex "CVB(Y)?$")>;
+def : InstRW<[WLat1, FXb3, VecDF4, LSU, GroupAlone3], (instregex "CVDG$")>;
+def : InstRW<[WLat1, FXb2, VecDF, LSU, GroupAlone2], (instregex "CVD(Y)?$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "MV(N|O|Z)$")>;
+def : InstRW<[WLat1, LSU5, GroupAlone], (instregex "(PACK|PKA|PKU)$")>;
+def : InstRW<[WLat12, LSU5, GroupAlone], (instregex "UNPK(A|U)$")>;
+def : InstRW<[WLat1, FXb, LSU2, Cracked], (instregex "UNPK$")>;
+
+def : InstRW<[WLat5LSU, FXb, VecDFX, LSU3, GroupAlone2],
+ (instregex "(A|S|ZA)P$")>;
+def : InstRW<[WLat1, FXb, VecDFX2, LSU3, GroupAlone2], (instregex "MP$")>;
+def : InstRW<[WLat1, FXb, VecDFX4, LSU3, GroupAlone2], (instregex "DP$")>;
+def : InstRW<[WLat15, FXb, VecDFX2, LSU2, GroupAlone3], (instregex "SRP$")>;
+def : InstRW<[WLat8, VecDFX, LSU, LSU, GroupAlone], (instregex "CP$")>;
+def : InstRW<[WLat3LSU, VecDFX, LSU, Cracked], (instregex "TP$")>;
+def : InstRW<[WLat30, MCD], (instregex "ED(MK)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Access registers
+//===----------------------------------------------------------------------===//
+
+// Extract/set/copy access register
+def : InstRW<[WLat3, LSU, NormalGr], (instregex "(EAR|SAR|CPYA)$")>;
+
+// Load address extended
+def : InstRW<[WLat5, LSU, FXa, Cracked], (instregex "LAE(Y)?$")>;
+
+// Load/store access multiple (not modeled precisely)
+def : InstRW<[WLat20, WLat20, LSU5, GroupAlone], (instregex "LAM(Y)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STAM(Y)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Program mask and addressing mode
+//===----------------------------------------------------------------------===//
+
+// Insert Program Mask
+def : InstRW<[WLat3, FXa, EndGroup], (instregex "IPM$")>;
+
+// Set Program Mask
+def : InstRW<[WLat3, LSU, EndGroup], (instregex "SPM$")>;
+
+// Branch and link
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BAL(R)?$")>;
+
+// Test addressing mode
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TAM$")>;
+
+// Set addressing mode
+def : InstRW<[WLat1, FXb, EndGroup], (instregex "SAM(24|31|64)$")>;
+
+// Branch (and save) and set mode.
+def : InstRW<[WLat1, FXa, FXb, GroupAlone], (instregex "BSM$")>;
+def : InstRW<[WLat1, FXa2, FXb, GroupAlone], (instregex "BASSM$")>;
+
+//===----------------------------------------------------------------------===//
+// Transactional execution
+//===----------------------------------------------------------------------===//
+
+// Transaction begin
+def : InstRW<[WLat9, LSU2, FXb5, GroupAlone2], (instregex "TBEGIN(C)?$")>;
+
+// Transaction end
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "TEND$")>;
+
+// Transaction abort
+def : InstRW<[WLat30, MCD], (instregex "TABORT$")>;
+
+// Extract Transaction Nesting Depth
+def : InstRW<[WLat1, FXa, NormalGr], (instregex "ETND$")>;
+
+// Nontransactional store
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "NTSTG$")>;
+
+//===----------------------------------------------------------------------===//
+// Processor assist
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "PPA$")>;
+
+//===----------------------------------------------------------------------===//
+// Miscellaneous Instructions.
+//===----------------------------------------------------------------------===//
+
+// Find leftmost one
+def : InstRW<[WLat5, WLat5, FXa2, GroupAlone], (instregex "FLOGR$")>;
+
+// Population count
+def : InstRW<[WLat3, WLat3, FXa, NormalGr], (instregex "POPCNT(Opt)?$")>;
+
+// String instructions
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "SRST(U)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CUSE$")>;
+
+// Various complex instructions
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CFC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, WLat30, WLat30, MCD],
+ (instregex "UPT$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "CKSM$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "CMPSC$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "SORTL$")>;
+def : InstRW<[WLat30, WLat30, WLat30, WLat30, MCD], (instregex "DFLTCC$")>;
+
+// Execute
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "EX(RL)?$")>;
+
+//===----------------------------------------------------------------------===//
+// .insn directive instructions
+//===----------------------------------------------------------------------===//
+
+// An "empty" sched-class will be assigned instead of the "invalid sched-class".
+// getNumDecoderSlots() will then return 1 instead of 0.
+def : InstRW<[], (instregex "Insn.*")>;
+
+
+// ----------------------------- Floating point ----------------------------- //
+
+//===----------------------------------------------------------------------===//
+// FP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load zero
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LZ(DR|ER)$")>;
+def : InstRW<[WLat2, FXb2, Cracked], (instregex "LZXR$")>;
+
+// Load
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "LER$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LD(R|R32|GR)$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LGDR$")>;
+def : InstRW<[WLat2, FXb2, GroupAlone], (instregex "LXR$")>;
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BR$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)BRCompare$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone],
+ (instregex "LTXBR(Compare)?$")>;
+
+// Copy sign
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "CPSDR(d|s)(d|s)$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Load instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2LSU, VecXsPm, LSU, NormalGr], (instregex "LE(Y)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LD(Y|E32)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Store instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "ST(E|D)(Y)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "STX$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEDBR(A)?$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "L(E|D)XBR(A)?$")>;
+
+// Load lengthened
+def : InstRW<[WLat6LSU, VecBF, LSU, NormalGr], (instregex "LDEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LDEBR$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)B$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)BR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)BR(A)?$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)BR(A)?$")>;
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)L(F|G)BR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CXL(F|G)BR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked],
+ (instregex "C(F|G)(E|D)BR(A)?$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked],
+ (instregex "C(F|G)XBR(A)?$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, GroupAlone], (instregex "CLFEBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLFDBR$")>;
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "CLG(E|D)BR$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "CL(F|G)XBR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)BR$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "L(C|N|P)DFR(_32)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XBR$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)B$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)BR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXBR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)BR(A)?$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXBR(A)?$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "A(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXBR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "S(E|D)B$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D)BR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXBR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "M(D|DE|EE)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|EE)BR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MXDB$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDBR$")>;
+def : InstRW<[WLat15, VecDF4, GroupAlone], (instregex "MXBR$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)EB$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)EBR$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(A|S)DBR$")>;
+
+// Division
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr],
+ (instregex "D(E|D)B$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)BR$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXBR$")>;
+
+// Divide to integer
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "DI(E|D)BR$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat3LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "(K|C)(E|D)B$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "(K|C)(E|D)BR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XBR$")>;
+
+// Test Data Class
+def : InstRW<[WLat5, LSU, VecXsPm, NormalGr], (instregex "TC(E|D)B$")>;
+def : InstRW<[WLat10, LSU, VecDF4, GroupAlone], (instregex "TCXB$")>;
+
+//===----------------------------------------------------------------------===//
+// FP: Floating-point control register instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4, FXa, LSU, GroupAlone], (instregex "EFPC$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "STFPC$")>;
+def : InstRW<[WLat3, LSU, GroupAlone], (instregex "SFPC$")>;
+def : InstRW<[WLat3LSU, LSU2, GroupAlone], (instregex "LFPC$")>;
+def : InstRW<[WLat30, MCD], (instregex "SFASR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LFAS$")>;
+def : InstRW<[WLat3, FXb, GroupAlone], (instregex "SRNM(B|T)?$")>;
+
+
+// --------------------- Hexadecimal floating point ------------------------- //
+
+//===----------------------------------------------------------------------===//
+// HFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "LT(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "(LEDR|LRER)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "LEXR$")>;
+def : InstRW<[WLat9, VecDF2, NormalGr], (instregex "(LDXR|LRDR)$")>;
+
+// Load lengthened
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "LDE$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LDER$")>;
+def : InstRW<[WLat7LSU, VecBF4, LSU, GroupAlone], (instregex "LX(E|D)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "LX(E|D)R$")>;
+
+// Convert from fixed
+def : InstRW<[WLat7, FXb, VecBF, Cracked], (instregex "C(E|D)(F|G)R$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "CX(F|G)R$")>;
+
+// Convert to fixed
+def : InstRW<[WLat9, WLat9, FXb, VecBF, Cracked], (instregex "C(F|G)(E|D)R$")>;
+def : InstRW<[WLat12, WLat12, FXb, VecDF2, Cracked], (instregex "C(F|G)XR$")>;
+
+// Convert BFP to HFP / HFP to BFP.
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "THD(E)?R$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "TB(E)?DR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load Complement / Negative / Positive
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "L(C|N|P)(E|D)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "L(C|N|P)XR$")>;
+
+// Halve
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "H(E|D)R$")>;
+
+// Square root
+def : InstRW<[WLat30, VecFPd, LSU, NormalGr], (instregex "SQ(E|D)$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "SQ(E|D)R$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "SQXR$")>;
+
+// Load FP integer
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "FI(E|D)R$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "A(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "A(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXR$")>;
+
+// Subtraction
+def : InstRW<[WLat6LSU, WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "S(E|D|U|W)$")>;
+def : InstRW<[WLat6, WLat6, VecBF, NormalGr], (instregex "S(E|D|U|W)R$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXR$")>;
+
+// Multiply
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "M(D|DE|E|EE)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "M(D|DE|E|EE)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MXD$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MXDR$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXR$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, VecBF4, LSU, GroupAlone], (instregex "MY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "MY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MY(H|L)R$")>;
+
+// Multiply and add / subtract
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "M(A|S)(E|D)$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "M(A|S)(E|D)R$")>;
+def : InstRW<[WLat7LSU, RegReadAdv, RegReadAdv, VecBF4, LSU, GroupAlone],
+ (instregex "MAY$")>;
+def : InstRW<[WLat6LSU, RegReadAdv, RegReadAdv, VecBF2, LSU, GroupAlone],
+ (instregex "MAY(H|L)$")>;
+def : InstRW<[WLat7, VecBF4, GroupAlone], (instregex "MAYR$")>;
+def : InstRW<[WLat6, VecBF, GroupAlone], (instregex "MAY(H|L)R$")>;
+
+// Division
+def : InstRW<[WLat30, RegReadAdv, VecFPd, LSU, NormalGr], (instregex "D(E|D)$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "D(E|D)R$")>;
+def : InstRW<[WLat30, VecFPd, GroupAlone], (instregex "DXR$")>;
+
+//===----------------------------------------------------------------------===//
+// HFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat6LSU, RegReadAdv, VecBF, LSU, NormalGr],
+ (instregex "C(E|D)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "C(E|D)R$")>;
+def : InstRW<[WLat10, VecDF2, GroupAlone], (instregex "CXR$")>;
+
+
+// ------------------------ Decimal floating point -------------------------- //
+
+//===----------------------------------------------------------------------===//
+// DFP: Move instructions
+//===----------------------------------------------------------------------===//
+
+// Load and Test
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "LTDTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "LTXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Conversion instructions
+//===----------------------------------------------------------------------===//
+
+// Load rounded
+def : InstRW<[WLat15, VecDF, NormalGr], (instregex "LEDTR$")>;
+def : InstRW<[WLat15, VecDF2, NormalGr], (instregex "LDXTR$")>;
+
+// Load lengthened
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "LDETR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "LXDTR$")>;
+
+// Convert from fixed / logical
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDFTR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXFTR(A)?$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXGTR(A)?$")>;
+def : InstRW<[WLat15, FXb, VecDF, Cracked], (instregex "CDLFTR$")>;
+def : InstRW<[WLat30, FXb, VecDF, Cracked], (instregex "CDLGTR$")>;
+def : InstRW<[WLat15, FXb, VecDF4, GroupAlone2], (instregex "CXLFTR$")>;
+def : InstRW<[WLat30, FXb, VecDF4, GroupAlone2], (instregex "CXLGTR$")>;
+
+// Convert to fixed / logical
+def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked],
+ (instregex "C(F|G)DTR(A)?$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked],
+ (instregex "C(F|G)XTR(A)?$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF, Cracked], (instregex "CL(F|G)DTR$")>;
+def : InstRW<[WLat30, WLat30, FXb, VecDF2, Cracked], (instregex "CL(F|G)XTR$")>;
+
+// Convert from / to signed / unsigned packed
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "CD(S|U)TR$")>;
+def : InstRW<[WLat12, FXb2, VecDF4, GroupAlone2], (instregex "CX(S|U)TR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "C(S|U)DTR$")>;
+def : InstRW<[WLat15, FXb2, VecDF4, GroupAlone2], (instregex "C(S|U)XTR$")>;
+
+// Convert from / to zoned
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDZT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXZT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CZDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CZXT$")>;
+
+// Convert from / to packed
+def : InstRW<[WLat8LSU, LSU, VecDF, Cracked], (instregex "CDPT$")>;
+def : InstRW<[WLat16LSU, LSU2, VecDF4, GroupAlone3], (instregex "CXPT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF, Cracked], (instregex "CPDT$")>;
+def : InstRW<[WLat1, FXb, LSU, VecDF2, GroupAlone], (instregex "CPXT$")>;
+
+// Perform floating-point operation
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "PFPO$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Unary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Load FP integer
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "FIDTR$")>;
+def : InstRW<[WLat10, VecDF4, GroupAlone], (instregex "FIXTR$")>;
+
+// Extract biased exponent
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "EEXTR$")>;
+
+// Extract significance
+def : InstRW<[WLat11, FXb, VecDF, Cracked], (instregex "ESDTR$")>;
+def : InstRW<[WLat12, FXb, VecDF2, Cracked], (instregex "ESXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Binary arithmetic
+//===----------------------------------------------------------------------===//
+
+// Addition
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "ADTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "AXTR(A)?$")>;
+
+// Subtraction
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "SDTR(A)?$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "SXTR(A)?$")>;
+
+// Multiply
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "MDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "MXTR(A)?$")>;
+
+// Division
+def : InstRW<[WLat30, VecDF, NormalGr], (instregex "DDTR(A)?$")>;
+def : InstRW<[WLat30, VecDF4, GroupAlone], (instregex "DXTR(A)?$")>;
+
+// Quantize
+def : InstRW<[WLat8, WLat8, VecDF, NormalGr], (instregex "QADTR$")>;
+def : InstRW<[WLat10, WLat10, VecDF4, GroupAlone], (instregex "QAXTR$")>;
+
+// Reround
+def : InstRW<[WLat9, WLat9, FXb, VecDF, Cracked], (instregex "RRDTR$")>;
+def : InstRW<[WLat11, WLat11, FXb, VecDF4, GroupAlone2], (instregex "RRXTR$")>;
+
+// Shift significand left/right
+def : InstRW<[WLat11LSU, LSU, VecDF, GroupAlone], (instregex "S(L|R)DT$")>;
+def : InstRW<[WLat11LSU, LSU, VecDF4, GroupAlone], (instregex "S(L|R)XT$")>;
+
+// Insert biased exponent
+def : InstRW<[WLat9, FXb, VecDF, Cracked], (instregex "IEDTR$")>;
+def : InstRW<[WLat11, FXb, VecDF4, GroupAlone2], (instregex "IEXTR$")>;
+
+//===----------------------------------------------------------------------===//
+// DFP: Comparisons
+//===----------------------------------------------------------------------===//
+
+// Compare
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "(K|C)DTR$")>;
+def : InstRW<[WLat9, VecDF2, GroupAlone], (instregex "(K|C)XTR$")>;
+
+// Compare biased exponent
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEDTR$")>;
+def : InstRW<[WLat8, VecDF, NormalGr], (instregex "CEXTR$")>;
+
+// Test Data Class/Group
+def : InstRW<[WLat15, LSU, VecDF, NormalGr], (instregex "TD(C|G)(E|D)T$")>;
+def : InstRW<[WLat15, LSU, VecDF2, GroupAlone], (instregex "TD(C|G)XT$")>;
+
+
+// --------------------------------- Vector --------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// Vector: Move instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLR(32|64)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLGV(B|F|G|H)?$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "VLVG(B|F|G|H)?$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "VLVGP(32)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Immediate instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VZERO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VONE$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGBM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VGM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREPI(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLEI(B|F|G|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Loads
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(L|BB)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VL(32|64)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEZ(B|F|G|H|LF)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "VLE(B|F|G|H)$")>;
+def : InstRW<[WLat5LSU, RegReadAdv, FXb, LSU, VecXsPm, Cracked],
+ (instregex "VGE(F|G)$")>;
+def : InstRW<[WLat4LSU, WLat4LSU, LSU5, GroupAlone],
+ (instregex "VLM(Align)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Stores
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VST(Align|L|32|64)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTE(B|H)$")>;
+def : InstRW<[WLat1, LSU2, FXb3, GroupAlone2], (instregex "VSTM(Align)?$")>;
+def : InstRW<[WLat1, FXb2, LSU, Cracked], (instregex "VSCE(F|G)$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTRL(R)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Byte swaps
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBR(H|F|G|Q)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLER(H|F|G)?$")>;
+def : InstRW<[WLat2LSU, RegReadAdv, VecXsPm, LSU, NormalGr],
+ (instregex "VLEBR(H|F|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLLEBRZ(H|F|G|E)?$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "VLBRREP(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTBR(H|F|G|Q)?$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTER(H|F|G)?$")>;
+def : InstRW<[WLat1, FXb, LSU, VecXsPm, Cracked], (instregex "VSTEBRH$")>;
+def : InstRW<[WLat1, FXb, LSU, NormalGr], (instregex "VSTEBR(F|G)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Selects and permutes
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRH(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPDI$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VBPERM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VREP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEL$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Widening and narrowing
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPK(F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VPKLS(F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSEG(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPH(B|F|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPL(B|F)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLH(B|F|H|W)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VUPLL(B|F|H)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer arithmetic
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VA(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VACC(B|F|G|H|Q|C|CQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVG(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VAVGL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VN(C|O|N|X)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VO(C)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VCKSM$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCLZ(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCTZ(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VX$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFMA(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VGFM(B|F|G|H)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLC(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VLP(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMX(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMXL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMN(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VMNL(B|F|G|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAL(B|F)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALH(B|F|H|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMALO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMAH(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VME(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMH(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VML(B|F)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLE(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLH(B|F|H|W)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMLO(B|F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VMO(B|F|H)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VMSL(G)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VPOPCT(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERLLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VERIM(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESLV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRA(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRAV(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VESRLV(B|F|G|H)?$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSL(DB)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSR(A|L)B$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSLD$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSRD$")>;
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSB(I|IQ|CBI|CBIQ)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VSCBI(B|F|G|H|Q)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VS(F|G|H|Q)?$")>;
+
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUM(B|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMG(F|H)?$")>;
+def : InstRW<[WLat4, VecMul, NormalGr], (instregex "VSUMQ(F|G)?$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Integer comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VEC(B|F|G|H)?$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "VECL(B|F|G|H)?$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCEQ(B|F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCH(B|F|G|H)S$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)?$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VCHL(B|F|G|H)S$")>;
+def : InstRW<[WLat4, VecStr, NormalGr], (instregex "VTM$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point arithmetic
+//===----------------------------------------------------------------------===//
+
+// Conversion and rounding
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCFP(S|L)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?G$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCD(L)?GB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WCE(L)?FB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(S|L)FP$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GD$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?GDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WC(L)?FEB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WL(DE|ED)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(L|R)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFL(LS|RD)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFLLD$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFLRX$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFI(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFIDB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFISB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFISB$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WFIXB$")>;
+
+// Sign operations
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VFPSO$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSODB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FPSOSB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFPSOXB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "(V|W)FL(C|N|P)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFL(C|N|P)XB$")>;
+
+// Minimum / maximum
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WF(MAX|MIN)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WF(MAX|MIN)XB$")>;
+
+// Test data class
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFTCI$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCIDB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "(V|W)FTCISB$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFTCIXB$")>;
+
+// Add / subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(A|S)SB$")>;
+def : InstRW<[WLat10, VecDF2, NormalGr], (instregex "WF(A|S)XB$")>;
+
+// Multiply / multiply-and-add/subtract
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFM(DB)?$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WFM(D|S)B$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VFMSB$")>;
+def : InstRW<[WLat20, VecDF2, NormalGr], (instregex "WFMXB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)DB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "VF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat6, VecBF, NormalGr], (instregex "WF(N)?M(A|S)SB$")>;
+def : InstRW<[WLat30, VecDF2, NormalGr], (instregex "WF(N)?M(A|S)XB$")>;
+
+// Divide / square root
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFD$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDDB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FDSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFDXB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "VFSQ$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQDB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "(V|W)FSQSB$")>;
+def : InstRW<[WLat30, VecFPd, NormalGr], (instregex "WFSQXB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point comparison
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)DB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "VF(C|K)(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XB$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XB$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFC(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "VFK(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+ (instregex "WF(C|K)(E|H|HE)DBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr],
+ (instregex "VF(C|K)(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFC(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecXsPm, NormalGr], (instregex "WFK(E|H|HE)SBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFC(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, WLat3, VecDFX, NormalGr], (instregex "WFK(E|H|HE)XBS$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)DB$")>;
+def : InstRW<[WLat3, VecXsPm, NormalGr], (instregex "WF(C|K)SB$")>;
+def : InstRW<[WLat3, VecDFX, NormalGr], (instregex "WF(C|K)XB$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Floating-point insertion and extraction
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LEFR$")>;
+def : InstRW<[WLat3, FXb, NormalGr], (instregex "LFER$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: String instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(B)?$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAE(F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAE(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VFAEZ(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFEE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+ (instregex "VFEE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VFENE(B|F|H|ZB|ZF|ZH)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr],
+ (instregex "VFENE(B|F|H|ZB|ZF|ZH)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VISTR(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VISTR(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRC(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRC(B|F|H)S$")>;
+def : InstRW<[WLat3, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRCZ(B|F|H)S$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRS(B|F|H)?$")>;
+def : InstRW<[WLat4, WLat4, VecStr, NormalGr], (instregex "VSTRSZ(B|F|H)$")>;
+
+//===----------------------------------------------------------------------===//
+// Vector: Packed-decimal instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "VLIP$")>;
+def : InstRW<[WLat6, VecDFX, LSU, GroupAlone2], (instregex "VPKZ$")>;
+def : InstRW<[WLat1, VecDFX, FXb, LSU2, GroupAlone2], (instregex "VUPKZ$")>;
+def : InstRW<[WLat20, WLat20, VecDF2, FXb, GroupAlone],
+ (instregex "VCVB(G)?(Opt)?$")>;
+def : InstRW<[WLat15, WLat15, VecDF2, FXb, GroupAlone],
+ (instregex "VCVD(G)?$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "V(A|S)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VM(S)?P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "V(D|R)P$")>;
+def : InstRW<[WLat30, WLat30, VecDF2, GroupAlone], (instregex "VSDP$")>;
+def : InstRW<[WLat10, WLat10, VecDF2, NormalGr], (instregex "VSRP$")>;
+def : InstRW<[WLat4, WLat4, VecDFX, NormalGr], (instregex "VPSOP$")>;
+def : InstRW<[WLat2, VecDFX, NormalGr], (instregex "V(T|C)P$")>;
+
+
+// -------------------------------- System ---------------------------------- //
+
+//===----------------------------------------------------------------------===//
+// System: Program-Status Word Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, WLat30, MCD], (instregex "EPSW$")>;
+def : InstRW<[WLat20, GroupAlone3], (instregex "LPSW(E)?$")>;
+def : InstRW<[WLat3, FXa, GroupAlone], (instregex "IPK$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SPKA$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SSM$")>;
+def : InstRW<[WLat1, FXb, LSU, GroupAlone], (instregex "ST(N|O)SM$")>;
+def : InstRW<[WLat3, FXa, NormalGr], (instregex "IAC$")>;
+def : InstRW<[WLat1, LSU, EndGroup], (instregex "SAC(F)?$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Control Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, WLat4LSU, LSU2, GroupAlone], (instregex "LCTL(G)?$")>;
+def : InstRW<[WLat1, LSU5, FXb, GroupAlone2], (instregex "STCT(L|G)$")>;
+def : InstRW<[LSULatency, LSU, NormalGr], (instregex "E(P|S)A(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSA(I)?R$")>;
+def : InstRW<[WLat30, MCD], (instregex "ESEA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Prefix-Register Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "S(T)?PX$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Storage-Key and Real Memory Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "ISKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "IVSK$")>;
+def : InstRW<[WLat30, MCD], (instregex "SSKE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RRB(E|M)$")>;
+def : InstRW<[WLat30, MCD], (instregex "IRBM$")>;
+def : InstRW<[WLat30, MCD], (instregex "PFMF$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "TB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGIN$")>;
+def : InstRW<[WLat30, MCD], (instregex "PGOUT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Dynamic-Address-Translation Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "IPTE(Opt)?(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "IDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "CRDTE(Opt)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTLB$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "CSP(G)?$")>;
+def : InstRW<[WLat30, WLat30, WLat30, MCD], (instregex "LPTEA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "LRA(Y|G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STRAG$")>;
+def : InstRW<[WLat30, MCD], (instregex "LURA(G)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "STUR(A|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPROT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Memory-move Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat4LSU, FXa2, FXb, LSU5, GroupAlone2], (instregex "MVC(K|P|S)$")>;
+def : InstRW<[WLat1, FXa, LSU5, GroupAlone2], (instregex "MVC(S|D)K$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVCOS$")>;
+def : InstRW<[WLat30, MCD], (instregex "MVPG$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Address-Space Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "LASP$")>;
+def : InstRW<[WLat1, LSU, GroupAlone], (instregex "PALB$")>;
+def : InstRW<[WLat30, MCD], (instregex "PC$")>;
+def : InstRW<[WLat30, MCD], (instregex "PR$")>;
+def : InstRW<[WLat30, MCD], (instregex "PT(I)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "RP$")>;
+def : InstRW<[WLat30, MCD], (instregex "BS(G|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TAR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Linkage-Stack Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "BAKR$")>;
+def : InstRW<[WLat30, MCD], (instregex "EREG(G)?$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "(E|M)STA$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Time-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "PTFF$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCK(PF|C)?$")>;
+def : InstRW<[WLat1, LSU2, GroupAlone], (instregex "SPT$")>;
+def : InstRW<[WLat15, LSU3, FXa2, FXb, GroupAlone2], (instregex "STCK(F)?$")>;
+def : InstRW<[WLat20, LSU4, FXa2, FXb2, GroupAlone3], (instregex "STCKE$")>;
+def : InstRW<[WLat30, MCD], (instregex "STCKC$")>;
+def : InstRW<[WLat1, LSU2, FXb, Cracked], (instregex "STPT$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Related Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "STAP$")>;
+def : InstRW<[WLat30, MCD], (instregex "STIDP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STSI$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "STFL(E)?$")>;
+def : InstRW<[WLat30, MCD], (instregex "ECAG$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECTG$")>;
+def : InstRW<[WLat30, MCD], (instregex "PTF$")>;
+def : InstRW<[WLat30, MCD], (instregex "PCKMO$")>;
+
+//===----------------------------------------------------------------------===//
+// System: Miscellaneous Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "SVC$")>;
+def : InstRW<[WLat1, FXb, GroupAlone], (instregex "MC$")>;
+def : InstRW<[WLat30, MCD], (instregex "DIAG$")>;
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "TRAC(E|G)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TRAP(2|4)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIG(P|A)$")>;
+def : InstRW<[WLat30, MCD], (instregex "SIE$")>;
+
+//===----------------------------------------------------------------------===//
+// System: CPU-Measurement Facility Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat1, FXb, NormalGr], (instregex "LPP$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "ECPGA$")>;
+def : InstRW<[WLat30, WLat30, MCD], (instregex "E(C|P)CTR$")>;
+def : InstRW<[WLat30, MCD], (instregex "LCCTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "L(P|S)CTL$")>;
+def : InstRW<[WLat30, MCD], (instregex "Q(S|CTR)I$")>;
+def : InstRW<[WLat30, MCD], (instregex "S(C|P)CTR$")>;
+
+//===----------------------------------------------------------------------===//
+// System: I/O Instructions
+//===----------------------------------------------------------------------===//
+
+def : InstRW<[WLat30, MCD], (instregex "(C|H|R|X)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "(M|S|ST|T)SCH$")>;
+def : InstRW<[WLat30, MCD], (instregex "RCHP$")>;
+def : InstRW<[WLat30, MCD], (instregex "SCHM$")>;
+def : InstRW<[WLat30, MCD], (instregex "STC(PS|RW)$")>;
+def : InstRW<[WLat30, MCD], (instregex "TPI$")>;
+def : InstRW<[WLat30, MCD], (instregex "SAL$")>;
+
+}
+
Modified: llvm/trunk/lib/Target/SystemZ/SystemZShortenInst.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZShortenInst.cpp?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZShortenInst.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZShortenInst.cpp Fri Jul 12 11:13:16 2019
@@ -46,6 +46,7 @@ private:
bool shortenOn001(MachineInstr &MI, unsigned Opcode);
bool shortenOn001AddCC(MachineInstr &MI, unsigned Opcode);
bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
+ bool shortenSelect(MachineInstr &MI, unsigned Opcode);
const SystemZInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -175,6 +176,23 @@ bool SystemZShortenInst::shortenFPConv(M
return false;
}
+// MI is a three-operand select instruction. If one of the sources match
+// the destination, convert to the equivalent load-on-condition.
+bool SystemZShortenInst::shortenSelect(MachineInstr &MI, unsigned Opcode) {
+ if (MI.getOperand(0).getReg() == MI.getOperand(1).getReg()) {
+ MI.setDesc(TII->get(Opcode));
+ MI.tieOperands(0, 1);
+ return true;
+ }
+ if (MI.getOperand(0).getReg() == MI.getOperand(2).getReg()) {
+ TII->commuteInstruction(MI, false, 1, 2);
+ MI.setDesc(TII->get(Opcode));
+ MI.tieOperands(0, 1);
+ return true;
+ }
+ return false;
+}
+
// Process all instructions in MBB. Return true if something changed.
bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
@@ -195,6 +213,18 @@ bool SystemZShortenInst::processBlock(Ma
Changed |= shortenIIF(MI, SystemZ::LLIHL, SystemZ::LLIHH);
break;
+ case SystemZ::SELR:
+ Changed |= shortenSelect(MI, SystemZ::LOCR);
+ break;
+
+ case SystemZ::SELFHR:
+ Changed |= shortenSelect(MI, SystemZ::LOCFHR);
+ break;
+
+ case SystemZ::SELGR:
+ Changed |= shortenSelect(MI, SystemZ::LOCGR);
+ break;
+
case SystemZ::WFADB:
Changed |= shortenOn001AddCC(MI, SystemZ::ADBR);
break;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.cpp Fri Jul 12 11:13:16 2019
@@ -55,6 +55,9 @@ SystemZSubtarget::SystemZSubtarget(const
HasMessageSecurityAssist7(false), HasMessageSecurityAssist8(false),
HasVectorEnhancements1(false), HasVectorPackedDecimal(false),
HasInsertReferenceBitsMultiple(false),
+ HasMiscellaneousExtensions3(false), HasMessageSecurityAssist9(false),
+ HasVectorEnhancements2(false), HasVectorPackedDecimalEnhancement(false),
+ HasEnhancedSort(false), HasDeflateConversion(false),
TargetTriple(TT), InstrInfo(initializeSubtargetDependencies(CPU, FS)),
TLInfo(TM, *this), TSInfo(), FrameLowering() {}
Modified: llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZSubtarget.h Fri Jul 12 11:13:16 2019
@@ -62,6 +62,12 @@ protected:
bool HasVectorEnhancements1;
bool HasVectorPackedDecimal;
bool HasInsertReferenceBitsMultiple;
+ bool HasMiscellaneousExtensions3;
+ bool HasMessageSecurityAssist9;
+ bool HasVectorEnhancements2;
+ bool HasVectorPackedDecimalEnhancement;
+ bool HasEnhancedSort;
+ bool HasDeflateConversion;
private:
Triple TargetTriple;
@@ -209,6 +215,30 @@ public:
return HasInsertReferenceBitsMultiple;
}
+ // Return true if the target has the miscellaneous-extensions facility 3.
+ bool hasMiscellaneousExtensions3() const {
+ return HasMiscellaneousExtensions3;
+ }
+
+ // Return true if the target has the message-security-assist
+ // extension facility 9.
+ bool hasMessageSecurityAssist9() const { return HasMessageSecurityAssist9; }
+
+ // Return true if the target has the vector-enhancements facility 2.
+ bool hasVectorEnhancements2() const { return HasVectorEnhancements2; }
+
+ // Return true if the target has the vector-packed-decimal
+ // enhancement facility.
+ bool hasVectorPackedDecimalEnhancement() const {
+ return HasVectorPackedDecimalEnhancement;
+ }
+
+ // Return true if the target has the enhanced-sort facility.
+ bool hasEnhancedSort() const { return HasEnhancedSort; }
+
+ // Return true if the target has the deflate-conversion facility.
+ bool hasDeflateConversion() const { return HasDeflateConversion; }
+
// Return true if GV can be accessed using LARL for reloc model RM
// and code model CM.
bool isPC32DBLSymbol(const GlobalValue *GV, CodeModel::Model CM) const;
Modified: llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp Fri Jul 12 11:13:16 2019
@@ -466,6 +466,27 @@ int SystemZTTIImpl::getArithmeticInstrCo
if (Opcode == Instruction::FRem)
return LIBCALL_COST;
+ // Give discount for some combined logical operations if supported.
+ if (Args.size() == 2 && ST->hasMiscellaneousExtensions3()) {
+ if (Opcode == Instruction::Xor) {
+ for (const Value *A : Args) {
+ if (const Instruction *I = dyn_cast<Instruction>(A))
+ if (I->hasOneUse() &&
+ (I->getOpcode() == Instruction::And ||
+ I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::Xor))
+ return 0;
+ }
+ }
+ else if (Opcode == Instruction::Or || Opcode == Instruction::And) {
+ for (const Value *A : Args) {
+ if (const Instruction *I = dyn_cast<Instruction>(A))
+ if (I->hasOneUse() && I->getOpcode() == Instruction::Xor)
+ return 0;
+ }
+ }
+ }
+
// Or requires one instruction, although it has custom handling for i64.
if (Opcode == Instruction::Or)
return 1;
@@ -686,9 +707,9 @@ int SystemZTTIImpl::getCastInstrCost(uns
// TODO: Fix base implementation which could simplify things a bit here
// (seems to miss on differentiating on scalar/vector types).
- // Only 64 bit vector conversions are natively supported.
- if (DstScalarBits == 64) {
- if (SrcScalarBits == 64)
+ // Only 64 bit vector conversions are natively supported before arch13.
+ if (DstScalarBits == 64 || ST->hasVectorEnhancements2()) {
+ if (SrcScalarBits == DstScalarBits)
return NumDstVectors;
if (SrcScalarBits == 1)
@@ -856,7 +877,7 @@ int SystemZTTIImpl::getCmpSelInstrCost(u
case Instruction::Select:
if (ValTy->isFloatingPointTy())
return 4; // No load on condition for FP - costs a conditional jump.
- return 1; // Load On Condition.
+ return 1; // Load On Condition / Select Register.
}
}
@@ -1009,7 +1030,8 @@ int SystemZTTIImpl::getMemoryOpCost(unsi
(Src->isVectorTy() ? getNumVectorRegs(Src) : getNumberOfParts(Src));
// Store/Load reversed saves one instruction.
- if (!Src->isVectorTy() && NumOps == 1 && I != nullptr) {
+ if (((!Src->isVectorTy() && NumOps == 1) || ST->hasVectorEnhancements2()) &&
+ I != nullptr) {
if (Opcode == Instruction::Load && I->hasOneUse()) {
const Instruction *LdUser = cast<Instruction>(*I->user_begin());
// In case of load -> bswap -> store, return normal cost for the load.
Modified: llvm/trunk/test/Analysis/CostModel/SystemZ/fp-cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/SystemZ/fp-cast.ll?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/SystemZ/fp-cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/SystemZ/fp-cast.ll Fri Jul 12 11:13:16 2019
@@ -1,4 +1,7 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
;
; Note: The scalarized vector instructions costs are not including any
; extracts, due to the undef operands.
@@ -114,7 +117,8 @@ define void @fptosi() {
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptosi <2 x double> undef to <2 x i16>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptosi <2 x double> undef to <2 x i8>
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptosi <2 x float> undef to <2 x i64>
-; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
+; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
+; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptosi <2 x float> undef to <2 x i32>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptosi <2 x float> undef to <2 x i16>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptosi <2 x float> undef to <2 x i8>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptosi <4 x fp128> undef to <4 x i64>
@@ -126,7 +130,8 @@ define void @fptosi() {
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptosi <4 x double> undef to <4 x i16>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptosi <4 x double> undef to <4 x i8>
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptosi <4 x float> undef to <4 x i64>
-; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
+; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
+; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptosi <4 x float> undef to <4 x i32>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptosi <4 x float> undef to <4 x i16>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptosi <4 x float> undef to <4 x i8>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptosi <8 x fp128> undef to <8 x i64>
@@ -138,7 +143,8 @@ define void @fptosi() {
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptosi <8 x double> undef to <8 x i16>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptosi <8 x double> undef to <8 x i8>
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptosi <8 x float> undef to <8 x i64>
-; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
+; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
+; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptosi <8 x float> undef to <8 x i32>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptosi <8 x float> undef to <8 x i16>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptosi <8 x float> undef to <8 x i8>
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptosi <16 x double> undef to <16 x i64>
@@ -146,7 +152,8 @@ define void @fptosi() {
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptosi <16 x double> undef to <16 x i16>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptosi <16 x double> undef to <16 x i8>
; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptosi <16 x float> undef to <16 x i64>
-; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
+; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
+; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptosi <16 x float> undef to <16 x i32>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptosi <16 x float> undef to <16 x i16>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptosi <16 x float> undef to <16 x i8>
@@ -233,7 +240,8 @@ define void @fptoui() {
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v18 = fptoui <2 x double> undef to <2 x i16>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v19 = fptoui <2 x double> undef to <2 x i8>
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v20 = fptoui <2 x float> undef to <2 x i64>
-; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
+; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
+; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v21 = fptoui <2 x float> undef to <2 x i32>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v22 = fptoui <2 x float> undef to <2 x i16>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v23 = fptoui <2 x float> undef to <2 x i8>
; CHECK: Cost Model: Found an estimated cost of 6 for instruction: %v24 = fptoui <4 x fp128> undef to <4 x i64>
@@ -245,7 +253,8 @@ define void @fptoui() {
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v30 = fptoui <4 x double> undef to <4 x i16>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v31 = fptoui <4 x double> undef to <4 x i8>
; CHECK: Cost Model: Found an estimated cost of 10 for instruction: %v32 = fptoui <4 x float> undef to <4 x i64>
-; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
+; Z13: Cost Model: Found an estimated cost of 12 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
+; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v33 = fptoui <4 x float> undef to <4 x i32>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v34 = fptoui <4 x float> undef to <4 x i16>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v35 = fptoui <4 x float> undef to <4 x i8>
; CHECK: Cost Model: Found an estimated cost of 12 for instruction: %v36 = fptoui <8 x fp128> undef to <8 x i64>
@@ -257,7 +266,8 @@ define void @fptoui() {
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v42 = fptoui <8 x double> undef to <8 x i16>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v43 = fptoui <8 x double> undef to <8 x i8>
; CHECK: Cost Model: Found an estimated cost of 20 for instruction: %v44 = fptoui <8 x float> undef to <8 x i64>
-; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
+; Z13: Cost Model: Found an estimated cost of 24 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
+; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v45 = fptoui <8 x float> undef to <8 x i32>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v46 = fptoui <8 x float> undef to <8 x i16>
; CHECK: Cost Model: Found an estimated cost of 24 for instruction: %v47 = fptoui <8 x float> undef to <8 x i8>
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = fptoui <16 x double> undef to <16 x i64>
@@ -265,7 +275,8 @@ define void @fptoui() {
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v50 = fptoui <16 x double> undef to <16 x i16>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v51 = fptoui <16 x double> undef to <16 x i8>
; CHECK: Cost Model: Found an estimated cost of 40 for instruction: %v52 = fptoui <16 x float> undef to <16 x i64>
-; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
+; Z13: Cost Model: Found an estimated cost of 48 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
+; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v53 = fptoui <16 x float> undef to <16 x i32>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v54 = fptoui <16 x float> undef to <16 x i16>
; CHECK: Cost Model: Found an estimated cost of 48 for instruction: %v55 = fptoui <16 x float> undef to <16 x i8>
@@ -379,7 +390,8 @@ define void @sitofp() {
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = sitofp <2 x i64> undef to <2 x float>
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = sitofp <2 x i32> undef to <2 x fp128>
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = sitofp <2 x i32> undef to <2 x double>
-; CHECK: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
+; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
+; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = sitofp <2 x i32> undef to <2 x float>
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = sitofp <2 x i16> undef to <2 x fp128>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = sitofp <2 x i16> undef to <2 x double>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = sitofp <2 x i16> undef to <2 x float>
@@ -391,7 +403,8 @@ define void @sitofp() {
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v26 = sitofp <4 x i64> undef to <4 x float>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = sitofp <4 x i32> undef to <4 x fp128>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = sitofp <4 x i32> undef to <4 x double>
-; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
+; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
+; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = sitofp <4 x i32> undef to <4 x float>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = sitofp <4 x i16> undef to <4 x fp128>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = sitofp <4 x i16> undef to <4 x double>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = sitofp <4 x i16> undef to <4 x float>
@@ -403,7 +416,8 @@ define void @sitofp() {
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v38 = sitofp <8 x i64> undef to <8 x float>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = sitofp <8 x i32> undef to <8 x fp128>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = sitofp <8 x i32> undef to <8 x double>
-; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
+; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
+; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = sitofp <8 x i32> undef to <8 x float>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = sitofp <8 x i16> undef to <8 x fp128>
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = sitofp <8 x i16> undef to <8 x double>
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = sitofp <8 x i16> undef to <8 x float>
@@ -413,7 +427,8 @@ define void @sitofp() {
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = sitofp <16 x i64> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = sitofp <16 x i64> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = sitofp <16 x i32> undef to <16 x double>
-; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
+; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
+; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = sitofp <16 x i32> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = sitofp <16 x i16> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = sitofp <16 x i16> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = sitofp <16 x i8> undef to <16 x double>
@@ -497,7 +512,8 @@ define void @uitofp() {
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v14 = uitofp <2 x i64> undef to <2 x float>
; CHECK: Cost Model: Found an estimated cost of 5 for instruction: %v15 = uitofp <2 x i32> undef to <2 x fp128>
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v16 = uitofp <2 x i32> undef to <2 x double>
-; CHECK: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
+; Z13: Cost Model: Found an estimated cost of 14 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
+; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v17 = uitofp <2 x i32> undef to <2 x float>
; CHECK: Cost Model: Found an estimated cost of 7 for instruction: %v18 = uitofp <2 x i16> undef to <2 x fp128>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v19 = uitofp <2 x i16> undef to <2 x double>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v20 = uitofp <2 x i16> undef to <2 x float>
@@ -509,7 +525,8 @@ define void @uitofp() {
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v26 = uitofp <4 x i64> undef to <4 x float>
; CHECK: Cost Model: Found an estimated cost of 9 for instruction: %v27 = uitofp <4 x i32> undef to <4 x fp128>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v28 = uitofp <4 x i32> undef to <4 x double>
-; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
+; Z13: Cost Model: Found an estimated cost of 13 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
+; AR13: Cost Model: Found an estimated cost of 1 for instruction: %v29 = uitofp <4 x i32> undef to <4 x float>
; CHECK: Cost Model: Found an estimated cost of 13 for instruction: %v30 = uitofp <4 x i16> undef to <4 x fp128>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v31 = uitofp <4 x i16> undef to <4 x double>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v32 = uitofp <4 x i16> undef to <4 x float>
@@ -521,7 +538,8 @@ define void @uitofp() {
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v38 = uitofp <8 x i64> undef to <8 x float>
; CHECK: Cost Model: Found an estimated cost of 17 for instruction: %v39 = uitofp <8 x i32> undef to <8 x fp128>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v40 = uitofp <8 x i32> undef to <8 x double>
-; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
+; Z13: Cost Model: Found an estimated cost of 25 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
+; AR13: Cost Model: Found an estimated cost of 2 for instruction: %v41 = uitofp <8 x i32> undef to <8 x float>
; CHECK: Cost Model: Found an estimated cost of 25 for instruction: %v42 = uitofp <8 x i16> undef to <8 x fp128>
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v43 = uitofp <8 x i16> undef to <8 x double>
; CHECK: Cost Model: Found an estimated cost of 33 for instruction: %v44 = uitofp <8 x i16> undef to <8 x float>
@@ -531,7 +549,8 @@ define void @uitofp() {
; CHECK: Cost Model: Found an estimated cost of 8 for instruction: %v48 = uitofp <16 x i64> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v49 = uitofp <16 x i64> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v50 = uitofp <16 x i32> undef to <16 x double>
-; CHECK: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
+; Z13: Cost Model: Found an estimated cost of 49 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
+; AR13: Cost Model: Found an estimated cost of 4 for instruction: %v51 = uitofp <16 x i32> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v52 = uitofp <16 x i16> undef to <16 x double>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v53 = uitofp <16 x i16> undef to <16 x float>
; CHECK: Cost Model: Found an estimated cost of 65 for instruction: %v54 = uitofp <16 x i8> undef to <16 x double>
Modified: llvm/trunk/test/Analysis/CostModel/SystemZ/intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/SystemZ/intrinsics.ll?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/SystemZ/intrinsics.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/SystemZ/intrinsics.ll Fri Jul 12 11:13:16 2019
@@ -1,4 +1,7 @@
-; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 | FileCheck %s
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
define void @bswap_i64(i64 %arg, <2 x i64> %arg2) {
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i64':
@@ -63,6 +66,32 @@ define void @bswap_i64_mem(i64* %src, i6
ret void
}
+define void @bswap_v2i64_mem(<2 x i64>* %src, <2 x i64> %arg, <2 x i64>* %dst) {
+; CHECK:Printing analysis 'Cost Model Analysis' for function 'bswap_v2i64_mem':
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <2 x i64>, <2 x i64>* %src
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1)
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg)
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp2, <2 x i64>* %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <2 x i64>, <2 x i64>* %src
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2)
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <2 x i64> %swp3, <2 x i64>* %dst
+
+ %Ld1 = load <2 x i64>, <2 x i64>* %src
+ %swp1 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld1)
+
+ %swp2 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %arg)
+ store <2 x i64> %swp2, <2 x i64>* %dst
+
+ %Ld2 = load <2 x i64>, <2 x i64>* %src
+ %swp3 = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %Ld2)
+ store <2 x i64> %swp3, <2 x i64>* %dst
+
+ ret void
+}
+
define void @bswap_i32_mem(i32* %src, i32 %arg, i32* %dst) {
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i32_mem':
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i32, i32* %src
@@ -85,6 +114,31 @@ define void @bswap_i32_mem(i32* %src, i3
ret void
}
+define void @bswap_v4i32_mem(<4 x i32>* %src, <4 x i32> %arg, <4 x i32>* %dst) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v4i32_mem':
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <4 x i32>, <4 x i32>* %src
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1)
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg)
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp2, <4 x i32>* %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <4 x i32>, <4 x i32>* %src
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2)
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <4 x i32> %swp3, <4 x i32>* %dst
+%Ld1 = load <4 x i32>, <4 x i32>* %src
+ %swp1 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld1)
+
+ %swp2 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %arg)
+ store <4 x i32> %swp2, <4 x i32>* %dst
+
+ %Ld2 = load <4 x i32>, <4 x i32>* %src
+ %swp3 = tail call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %Ld2)
+ store <4 x i32> %swp3, <4 x i32>* %dst
+
+ ret void
+}
+
define void @bswap_i16_mem(i16* %src, i16 %arg, i16* %dst) {
; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_i16_mem':
; CHECK: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load i16, i16* %src
@@ -107,6 +161,30 @@ define void @bswap_i16_mem(i16* %src, i1
ret void
}
+define void @bswap_v8i16_mem(<8 x i16>* %src, <8 x i16> %arg, <8 x i16>* %dst) {
+; CHECK: Printing analysis 'Cost Model Analysis' for function 'bswap_v8i16_mem':
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %Ld1 = load <8 x i16>, <8 x i16>* %src
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1)
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg)
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp2, <8 x i16>* %dst
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %Ld2 = load <8 x i16>, <8 x i16>* %src
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2)
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: store <8 x i16> %swp3, <8 x i16>* %dst
+%Ld1 = load <8 x i16>, <8 x i16>* %src
+ %swp1 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld1)
+
+ %swp2 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %arg)
+ store <8 x i16> %swp2, <8 x i16>* %dst
+
+ %Ld2 = load <8 x i16>, <8 x i16>* %src
+ %swp3 = tail call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %Ld2)
+ store <8 x i16> %swp3, <8 x i16>* %dst
+
+ ret void
+}
declare i64 @llvm.bswap.i64(i64)
declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
Added: llvm/trunk/test/Analysis/CostModel/SystemZ/logic-miscext3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/SystemZ/logic-miscext3.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/SystemZ/logic-miscext3.ll (added)
+++ llvm/trunk/test/Analysis/CostModel/SystemZ/logic-miscext3.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,97 @@
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=z13 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,Z13
+; RUN: opt < %s -cost-model -analyze -mtriple=systemz-unknown -mcpu=arch13 \
+; RUN: | FileCheck %s -check-prefixes=CHECK,AR13
+
+define void @fun0(i32 %a) {
+; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun0':
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i32 %l0, -1
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i32 %a, %c0
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i32 %a, %c0
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i32 %l1, -1
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i32 %a, %c1
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i32 %a, %c1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i32 %l2, %a
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i32 %c2, -1
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i32 %c2, -1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i32 %l3, %a
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i32 %c3, -1
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i32 %c3, -1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i32 %l4, %a
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i32 %c4, -1
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i32 %c4, -1
+
+entry:
+ %l0 = load i32, i32* undef
+ %c0 = xor i32 %l0, -1
+ %res0 = or i32 %a, %c0
+ store i32 %res0, i32* undef
+
+ %l1 = load i32, i32* undef
+ %c1 = xor i32 %l1, -1
+ %res1 = and i32 %a, %c1
+ store i32 %res1, i32* undef
+
+ %l2 = load i32, i32* undef
+ %c2 = and i32 %l2, %a
+ %res2 = xor i32 %c2, -1
+ store i32 %res2, i32* undef
+
+ %l3 = load i32, i32* undef
+ %c3 = or i32 %l3, %a
+ %res3 = xor i32 %c3, -1
+ store i32 %res3, i32* undef
+
+ %l4 = load i32, i32* undef
+ %c4 = xor i32 %l4, %a
+ %res4 = xor i32 %c4, -1
+ store i32 %res4, i32* undef
+
+ ret void
+}
+
+define void @fun1(i64 %a) {
+; CHECK-LABEL: Printing analysis 'Cost Model Analysis' for function 'fun1':
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c0 = xor i64 %l0, -1
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res0 = or i64 %a, %c0
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res0 = or i64 %a, %c0
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c1 = xor i64 %l1, -1
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res1 = and i64 %a, %c1
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res1 = and i64 %a, %c1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c2 = and i64 %l2, %a
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res2 = xor i64 %c2, -1
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res2 = xor i64 %c2, -1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c3 = or i64 %l3, %a
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res3 = xor i64 %c3, -1
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res3 = xor i64 %c3, -1
+; CHECK: Cost Model: Found an estimated cost of 1 for instruction: %c4 = xor i64 %l4, %a
+; Z13: Cost Model: Found an estimated cost of 1 for instruction: %res4 = xor i64 %c4, -1
+; AR13: Cost Model: Found an estimated cost of 0 for instruction: %res4 = xor i64 %c4, -1
+entry:
+ %l0 = load i64, i64* undef
+ %c0 = xor i64 %l0, -1
+ %res0 = or i64 %a, %c0
+ store i64 %res0, i64* undef
+
+ %l1 = load i64, i64* undef
+ %c1 = xor i64 %l1, -1
+ %res1 = and i64 %a, %c1
+ store i64 %res1, i64* undef
+
+ %l2 = load i64, i64* undef
+ %c2 = and i64 %l2, %a
+ %res2 = xor i64 %c2, -1
+ store i64 %res2, i64* undef
+
+ %l3 = load i64, i64* undef
+ %c3 = or i64 %l3, %a
+ %res3 = xor i64 %c3, -1
+ store i64 %res3, i64* undef
+
+ %l4 = load i64, i64* undef
+ %c4 = xor i64 %l4, %a
+ %res4 = xor i64 %c4, -1
+ store i64 %res4, i64* undef
+
+ ret void
+}
Modified: llvm/trunk/test/CodeGen/SystemZ/cond-move-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-move-01.ll?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-move-01.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-move-01.ll Fri Jul 12 11:13:16 2019
@@ -5,6 +5,9 @@
; Run the test again to make sure it still works the same even
; in the presence of the load-store-on-condition-2 facility.
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
+;
+; And again in the presence of the select instructions.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
; Test LOCR.
define i32 @f1(i32 %a, i32 %b, i32 %limit) {
Modified: llvm/trunk/test/CodeGen/SystemZ/cond-move-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-move-02.ll?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-move-02.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-move-02.ll Fri Jul 12 11:13:16 2019
@@ -1,6 +1,11 @@
; Test LOCHI and LOCGHI.
;
; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -verify-machineinstrs | FileCheck %s
+;
+; Run the test again to make sure it still works the same even
+; in the presence of the select instructions.
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
+
define i32 @f1(i32 %x) {
; CHECK-LABEL: f1:
Modified: llvm/trunk/test/CodeGen/SystemZ/cond-move-03.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-move-03.ll?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-move-03.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-move-03.ll Fri Jul 12 11:13:16 2019
@@ -3,31 +3,36 @@
;
; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=z13 \
; RUN: -no-integrated-as | FileCheck %s
+;
+; Run the test again to make sure it still works the same even
+; in the presence of the select instructions.
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \
+; RUN: -no-integrated-as | FileCheck %s
define void @f1(i32 %limit) {
; CHECK-LABEL: f1:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 42
-; CHECK: locfhrl [[REG2]], [[REG1]]
-; CHECK: stepc [[REG2]]
+; CHECK: locfhrhe [[REG1]], [[REG2]]
+; CHECK: stepc [[REG1]]
; CHECK: br %r14
%a = call i32 asm sideeffect "stepa $0", "=h"()
%b = call i32 asm sideeffect "stepb $0", "=h"()
%cond = icmp ult i32 %limit, 42
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
-; FIXME: We should commute the LOCRMux to save one move.
define void @f2(i32 %limit) {
; CHECK-LABEL: f2:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
-; CHECK-DAG: clijhe %r2, 42,
-; CHECK: risblg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK-DAG: clijl %r2, 42, [[LABEL:.LBB[0-9_]+]]
; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: [[LABEL]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
@@ -37,16 +42,18 @@ define void @f2(i32 %limit) {
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ call void asm sideeffect "use $0", "r"(i32 %b)
ret void
}
define void @f3(i32 %limit) {
; CHECK-LABEL: f3:
-; CHECK-DAG: stepa [[REG2:%r[0-5]]]
-; CHECK-DAG: stepb [[REG1:%r[0-5]]]
-; CHECK-DAG: clijhe %r2, 42,
-; CHECK: risbhg [[REG1]], [[REG2]], 0, 159, 32
-; CHECK: stepc [[REG1]]
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clijhe %r2, 42, [[LABEL:.LBB[0-9_]+]]
+; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK: [[LABEL]]
+; CHECK: stepc [[REG2]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
%a = call i32 asm sideeffect "stepa $0", "=r"()
@@ -55,17 +62,17 @@ define void @f3(i32 %limit) {
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "h"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ call void asm sideeffect "use $0", "r"(i32 %a)
ret void
}
-; FIXME: We should commute the LOCRMux to save one move.
define void @f4(i32 %limit) {
; CHECK-LABEL: f4:
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
-; CHECK-DAG: clijhe %r2, 42,
-; CHECK: risbhg [[REG2]], [[REG1]], 0, 159, 32
+; CHECK-DAG: clijl %r2, 42, [[LABEL:.LBB[0-9_]+]]
; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: [[LABEL]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
@@ -75,6 +82,7 @@ define void @f4(i32 %limit) {
%res = select i1 %cond, i32 %a, i32 %b
call void asm sideeffect "stepc $0", "r"(i32 %res)
call void asm sideeffect "dummy $0", "h"(i32 %dummy)
+ call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
@@ -82,8 +90,9 @@ define void @f5(i32 %limit) {
; CHECK-LABEL: f5:
; CHECK-DAG: stepa [[REG2:%r[0-5]]]
; CHECK-DAG: stepb [[REG1:%r[0-5]]]
-; CHECK-DAG: clijhe %r2, 42,
+; CHECK-DAG: clijhe %r2, 42, [[LABEL:.LBB[0-9_]+]]
; CHECK: risblg [[REG1]], [[REG2]], 0, 159, 32
+; CHECK: [[LABEL]]
; CHECK: stepc [[REG1]]
; CHECK: br %r14
%dummy = call i32 asm sideeffect "dummy $0", "=h"()
@@ -102,8 +111,8 @@ define void @f6(i32 %limit) {
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
-; CHECK: locfhrle [[REG2]], [[REG1]]
-; CHECK: stepc [[REG2]]
+; CHECK: locfhrh [[REG1]], [[REG2]]
+; CHECK: stepc [[REG1]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
@@ -117,6 +126,7 @@ if.then:
return:
%res = phi i32 [ %a, %if.then ], [ %b, %entry ]
call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
@@ -126,8 +136,8 @@ define void @f7(i32 %limit) {
; CHECK-DAG: stepa [[REG1:%r[0-5]]]
; CHECK-DAG: stepb [[REG2:%r[0-5]]]
; CHECK-DAG: clfi %r2, 41
-; CHECK: locfhrh [[REG2]], [[REG1]]
-; CHECK: stepc [[REG2]]
+; CHECK: locfhrle [[REG1]], [[REG2]]
+; CHECK: stepc [[REG1]]
; CHECK: br %r14
entry:
%a = call i32 asm sideeffect "stepa $0", "=h"()
@@ -141,6 +151,7 @@ if.then:
return:
%res = phi i32 [ %b, %if.then ], [ %a, %entry ]
call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "use $0", "h"(i32 %b)
ret void
}
Added: llvm/trunk/test/CodeGen/SystemZ/cond-move-06.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-move-06.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-move-06.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-move-06.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,121 @@
+; Test SELR and SELGR.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 -verify-machineinstrs | FileCheck %s
+
+; Test SELR.
+define i32 @f1(i32 %limit, i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: clfi %r2, 42
+; CHECK: selrl %r2, %r3, %r4
+; CHECK: br %r14
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ ret i32 %res
+}
+
+; Test SELGR.
+define i64 @f2(i64 %limit, i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: clgfi %r2, 42
+; CHECK: selgrl %r2, %r3, %r4
+; CHECK: br %r14
+ %cond = icmp ult i64 %limit, 42
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Test SELR in a case that could use COMPARE AND BRANCH. We prefer using
+; SELR if possible.
+define i32 @f3(i32 %limit, i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: chi %r2, 42
+; CHECK: selre %r2, %r3, %r4
+; CHECK: br %r14
+ %cond = icmp eq i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ ret i32 %res
+}
+
+; ...and again for SELGR.
+define i64 @f4(i64 %limit, i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: cghi %r2, 42
+; CHECK: selgre %r2, %r3, %r4
+; CHECK: br %r14
+ %cond = icmp eq i64 %limit, 42
+ %res = select i1 %cond, i64 %a, i64 %b
+ ret i64 %res
+}
+
+; Check that we also get SELR as a result of early if-conversion.
+define i32 @f5(i32 %limit, i32 %a, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: clfi %r2, 41
+; CHECK: selrh %r2, %r4, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %a, %if.then ], [ %b, %entry ]
+ ret i32 %res
+}
+
+; ... and likewise for SELGR.
+define i64 @f6(i64 %limit, i64 %a, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: clgfi %r2, 41
+; CHECK: selgrh %r2, %r4, %r3
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i64 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ %a, %if.then ], [ %b, %entry ]
+ ret i64 %res
+}
+
+; Check that inverting the condition works as well.
+define i32 @f7(i32 %limit, i32 %a, i32 %b) {
+; CHECK-LABEL: f7:
+; CHECK: clfi %r2, 41
+; CHECK: selrh %r2, %r3, %r4
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %b, %if.then ], [ %a, %entry ]
+ ret i32 %res
+}
+
+; ... and likewise for SELGR.
+define i64 @f8(i64 %limit, i64 %a, i64 %b) {
+; CHECK-LABEL: f8:
+; CHECK: clgfi %r2, 41
+; CHECK: selgrh %r2, %r3, %r4
+; CHECK: br %r14
+entry:
+ %cond = icmp ult i64 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i64 [ %b, %if.then ], [ %a, %entry ]
+ ret i64 %res
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/cond-move-07.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-move-07.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-move-07.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-move-07.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,76 @@
+; Test SELFHR.
+; See comments in asm-18.ll about testing high-word operations.
+;
+; RUN: llc < %s -verify-machineinstrs -mtriple=s390x-linux-gnu -mcpu=arch13 \
+; RUN: -no-integrated-as | FileCheck %s
+
+define void @f1(i32 %limit) {
+; CHECK-LABEL: f1:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 42
+; CHECK: selfhrl [[REG3:%r[0-5]]], [[REG1]], [[REG2]]
+; CHECK: stepc [[REG3]]
+; CHECK: br %r14
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ %res = select i1 %cond, i32 %a, i32 %b
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "use $0", "h"(i32 %a)
+ call void asm sideeffect "use $0", "h"(i32 %b)
+ ret void
+}
+
+; Check that we also get SELFHR as a result of early if-conversion.
+define void @f2(i32 %limit) {
+; CHECK-LABEL: f2:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG2]], [[REG1]]
+; CHECK: stepc [[REG3]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %a, %if.then ], [ %b, %entry ]
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "use $0", "h"(i32 %a)
+ call void asm sideeffect "use $0", "h"(i32 %b)
+ ret void
+}
+
+; Check that inverting the condition works as well.
+define void @f3(i32 %limit) {
+; CHECK-LABEL: f3:
+; CHECK-DAG: stepa [[REG1:%r[0-5]]]
+; CHECK-DAG: stepb [[REG2:%r[0-5]]]
+; CHECK-DAG: clfi %r2, 41
+; CHECK: selfhrh [[REG3:%r[0-5]]], [[REG1]], [[REG2]]
+; CHECK: stepc [[REG3]]
+; CHECK: br %r14
+entry:
+ %a = call i32 asm sideeffect "stepa $0", "=h"()
+ %b = call i32 asm sideeffect "stepb $0", "=h"()
+ %cond = icmp ult i32 %limit, 42
+ br i1 %cond, label %if.then, label %return
+
+if.then:
+ br label %return
+
+return:
+ %res = phi i32 [ %b, %if.then ], [ %a, %entry ]
+ call void asm sideeffect "stepc $0", "h"(i32 %res)
+ call void asm sideeffect "use $0", "h"(i32 %a)
+ call void asm sideeffect "use $0", "h"(i32 %b)
+ ret void
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/cond-move-08.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/cond-move-08.mir?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/cond-move-08.mir (added)
+++ llvm/trunk/test/CodeGen/SystemZ/cond-move-08.mir Fri Jul 12 11:13:16 2019
@@ -0,0 +1,179 @@
+# RUN: llc -mtriple=s390x-linux-gnu -mcpu=arch13 -start-before=greedy %s -o - \
+# RUN: | FileCheck %s
+#
+# Test that regalloc manages (via regalloc hints) to avoid a LOCRMux jump
+# sequence expansion, and a SELR instuction is emitted.
+
+--- |
+ ; ModuleID = 'tc.ll'
+ source_filename = "tc.ll"
+ target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+
+ @globvar = external global i32
+
+ declare void @fun() #0
+
+ define void @fun1() #0 {
+ bb5:
+ br label %bb6
+
+ bb6: ; preds = %bb33, %bb5
+ %tmp = phi i1 [ %tmp34, %bb33 ], [ undef, %bb5 ]
+ br label %bb7
+
+ bb7: ; preds = %bb7, %bb6
+ %lsr.iv1 = phi [512 x i32]* [ %0, %bb7 ], [ undef, %bb6 ]
+ %tmp8 = phi i32 [ %tmp27, %bb7 ], [ -1000000, %bb6 ]
+ %tmp9 = phi i64 [ %tmp28, %bb7 ], [ 0, %bb6 ]
+ %lsr3 = trunc i64 %tmp9 to i32
+ %lsr.iv12 = bitcast [512 x i32]* %lsr.iv1 to i32*
+ %tmp11 = load i32, i32* %lsr.iv12
+ %tmp12 = icmp sgt i32 %tmp11, undef
+ %tmp13 = trunc i64 %tmp9 to i32
+ %tmp14 = select i1 %tmp12, i32 %lsr3, i32 0
+ %tmp15 = select i1 %tmp12, i32 %tmp13, i32 %tmp8
+ %tmp16 = load i32, i32* undef
+ %tmp17 = select i1 false, i32 undef, i32 %tmp14
+ %tmp18 = select i1 false, i32 undef, i32 %tmp15
+ %tmp19 = select i1 false, i32 %tmp16, i32 undef
+ %tmp20 = select i1 undef, i32 undef, i32 %tmp17
+ %tmp21 = select i1 undef, i32 undef, i32 %tmp18
+ %tmp22 = select i1 undef, i32 undef, i32 %tmp19
+ %tmp23 = or i64 %tmp9, 3
+ %tmp24 = icmp sgt i32 undef, %tmp22
+ %tmp25 = trunc i64 %tmp23 to i32
+ %tmp26 = select i1 %tmp24, i32 %tmp25, i32 %tmp20
+ %tmp27 = select i1 %tmp24, i32 %tmp25, i32 %tmp21
+ %tmp28 = add nuw nsw i64 %tmp9, 4
+ %tmp29 = icmp eq i64 undef, 0
+ %scevgep = getelementptr [512 x i32], [512 x i32]* %lsr.iv1, i64 0, i64 4
+ %0 = bitcast i32* %scevgep to [512 x i32]*
+ br i1 %tmp29, label %bb30, label %bb7
+
+ bb30: ; preds = %bb7
+ %tmp32 = icmp sgt i32 %tmp27, -1000000
+ br i1 %tmp32, label %bb33, label %bb35
+
+ bb33: ; preds = %bb30
+ call void @fun()
+ store i32 %tmp26, i32* @globvar
+ %tmp34 = icmp ugt i32 undef, 1
+ br label %bb6
+
+ bb35: ; preds = %bb30
+ br i1 %tmp, label %bb37, label %bb38
+
+ bb37: ; preds = %bb35
+ unreachable
+
+ bb38: ; preds = %bb35
+ unreachable
+ }
+
+ ; Function Attrs: nounwind
+ declare void @llvm.stackprotector(i8*, i8**) #1
+
+ attributes #0 = { "target-cpu"="arch13" }
+ attributes #1 = { nounwind }
+
+...
+
+# CHECK: selr
+# CHECK-NOT: risblg
+
+---
+name: fun1
+alignment: 4
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: grx32bit }
+ - { id: 1, class: addr64bit }
+ - { id: 2, class: grx32bit }
+ - { id: 3, class: addr64bit }
+ - { id: 4, class: gr32bit }
+ - { id: 5, class: grx32bit }
+ - { id: 6, class: gr64bit }
+ - { id: 7, class: gr64bit }
+ - { id: 8, class: grx32bit }
+ - { id: 9, class: grx32bit }
+ - { id: 10, class: gr64bit }
+ - { id: 11, class: grx32bit }
+ - { id: 12, class: gr64bit }
+ - { id: 13, class: grx32bit }
+ - { id: 14, class: gr32bit }
+ - { id: 15, class: gr32bit }
+ - { id: 16, class: grx32bit }
+ - { id: 17, class: grx32bit }
+ - { id: 18, class: gr32bit }
+ - { id: 19, class: addr64bit }
+ - { id: 20, class: grx32bit }
+ - { id: 21, class: gr32bit }
+ - { id: 22, class: gr64bit }
+ - { id: 23, class: grx32bit }
+ - { id: 24, class: grx32bit }
+ - { id: 25, class: grx32bit }
+ - { id: 26, class: addr64bit }
+ - { id: 27, class: grx32bit }
+ - { id: 28, class: addr64bit }
+frameInfo:
+ hasCalls: true
+body: |
+ bb.0.bb5:
+ %25:grx32bit = IMPLICIT_DEF
+
+ bb.1.bb6:
+ %28:addr64bit = LGHI 0
+ %27:grx32bit = IIFMux 4293967296
+ %26:addr64bit = IMPLICIT_DEF
+
+ bb.2.bb7:
+ successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+
+ %14:gr32bit = LMux %26, 0, $noreg :: (load 4 from %ir.lsr.iv12)
+ CR %14, undef %15:gr32bit, implicit-def $cc
+ %16:grx32bit = COPY %28.subreg_l32
+ %16:grx32bit = LOCHIMux %16, 0, 14, 12, implicit $cc
+ %17:grx32bit = SELRMux %27, %28.subreg_l32, 14, 2, implicit killed $cc
+ %18:gr32bit = LMux undef %19:addr64bit, 0, $noreg :: (load 4 from `i32* undef`)
+ %20:grx32bit = COPY %28.subreg_l32
+ %20:grx32bit = OILMux %20, 3, implicit-def dead $cc
+ CR undef %21:gr32bit, %18, implicit-def $cc
+ %4:gr32bit = SELRMux %16, %20, 14, 2, implicit $cc
+ %27:grx32bit = SELRMux %17, %20, 14, 2, implicit killed $cc
+ %28:addr64bit = nuw nsw LA %28, 4, $noreg
+ %26:addr64bit = LA %26, 16, $noreg
+ CGHI undef %22:gr64bit, 0, implicit-def $cc
+ BRC 14, 6, %bb.2, implicit killed $cc
+ J %bb.3
+
+ bb.3.bb30:
+ successors: %bb.4(0x7fffffff), %bb.5(0x00000001)
+
+ CFIMux %27, -999999, implicit-def $cc
+ BRC 14, 4, %bb.5, implicit killed $cc
+ J %bb.4
+
+ bb.4.bb33:
+ ADJCALLSTACKDOWN 0, 0
+ CallBRASL @fun, csr_systemz, implicit-def dead $r14d, implicit-def dead $cc
+ ADJCALLSTACKUP 0, 0
+ STRL %4, @globvar :: (store 4 into @globvar)
+ CLFIMux undef %23:grx32bit, 1, implicit-def $cc
+ %25:grx32bit = LHIMux 0
+ %25:grx32bit = LOCHIMux %25, 1, 14, 2, implicit killed $cc
+ J %bb.1
+
+ bb.5.bb35:
+ successors: %bb.6, %bb.7
+
+ TMLMux %25, 1, implicit-def $cc
+ BRC 15, 8, %bb.7, implicit killed $cc
+ J %bb.6
+
+ bb.6.bb37:
+ successors:
+
+
+ bb.7.bb38:
+
+...
Added: llvm/trunk/test/CodeGen/SystemZ/ctpop-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/ctpop-02.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/ctpop-02.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/ctpop-02.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,74 @@
+; Test population-count instruction on arch13
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare i32 @llvm.ctpop.i32(i32 %a)
+declare i64 @llvm.ctpop.i64(i64 %a)
+
+define i32 @f1(i32 %a) {
+; CHECK-LABEL: f1:
+; CHECK: llgfr %r0, %r2
+; CHECK: popcnt %r2, %r0, 8
+; CHECK: br %r14
+
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %a)
+ ret i32 %popcnt
+}
+
+define i32 @f2(i32 %a) {
+; CHECK-LABEL: f2:
+; CHECK: llghr %r0, %r2
+; CHECK: popcnt %r2, %r0, 8
+; CHECK: br %r14
+ %and = and i32 %a, 65535
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
+ ret i32 %popcnt
+}
+
+define i32 @f3(i32 %a) {
+; CHECK-LABEL: f3:
+; CHECK: llgcr %r0, %r2
+; CHECK: popcnt %r2, %r0, 8
+; CHECK: br %r14
+ %and = and i32 %a, 255
+ %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
+ ret i32 %popcnt
+}
+
+define i64 @f4(i64 %a) {
+; CHECK-LABEL: f4:
+; CHECK: popcnt %r2, %r2, 8
+; CHECK: br %r14
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %a)
+ ret i64 %popcnt
+}
+
+define i64 @f5(i64 %a) {
+; CHECK-LABEL: f5:
+; CHECK: llgfr %r0, %r2
+; CHECK: popcnt %r2, %r0, 8
+ %and = and i64 %a, 4294967295
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
+define i64 @f6(i64 %a) {
+; CHECK-LABEL: f6:
+; CHECK: llghr %r0, %r2
+; CHECK: popcnt %r2, %r0, 8
+; CHECK: br %r14
+ %and = and i64 %a, 65535
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
+define i64 @f7(i64 %a) {
+; CHECK-LABEL: f7:
+; CHECK: llgcr %r0, %r2
+; CHECK: popcnt %r2, %r0, 8
+; CHECK: br %r14
+ %and = and i64 %a, 255
+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
+ ret i64 %popcnt
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/not-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/not-01.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/not-01.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/not-01.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,126 @@
+; Combined logical operations involving complement on arch13
+;
+; RUN: llc -mcpu=arch13 < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+; And-with-complement 32-bit.
+define i32 @f1(i32 %dummy, i32 %a, i32 %b) {
+; CHECK-LABEL: f1:
+; CHECK: ncrk %r2, %r3, %r4
+; CHECK: br %r14
+ %neg = xor i32 %b, -1
+ %ret = and i32 %neg, %a
+ ret i32 %ret
+}
+
+; And-with-complement 64-bit.
+define i64 @f2(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f2:
+; CHECK: ncgrk %r2, %r3, %r4
+; CHECK: br %r14
+ %neg = xor i64 %b, -1
+ %ret = and i64 %neg, %a
+ ret i64 %ret
+}
+
+; Or-with-complement 32-bit.
+define i32 @f3(i32 %dummy, i32 %a, i32 %b) {
+; CHECK-LABEL: f3:
+; CHECK: ocrk %r2, %r3, %r4
+; CHECK: br %r14
+ %neg = xor i32 %b, -1
+ %ret = or i32 %neg, %a
+ ret i32 %ret
+}
+
+; Or-with-complement 64-bit.
+define i64 @f4(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f4:
+; CHECK: ocgrk %r2, %r3, %r4
+; CHECK: br %r14
+ %neg = xor i64 %b, -1
+ %ret = or i64 %neg, %a
+ ret i64 %ret
+}
+
+; NAND 32-bit.
+define i32 @f5(i32 %dummy, i32 %a, i32 %b) {
+; CHECK-LABEL: f5:
+; CHECK: nnrk %r2, %r3, %r4
+; CHECK: br %r14
+ %tmp = and i32 %a, %b
+ %ret = xor i32 %tmp, -1
+ ret i32 %ret
+}
+
+; NAND 64-bit.
+define i64 @f6(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f6:
+; CHECK: nngrk %r2, %r3, %r4
+; CHECK: br %r14
+ %tmp = and i64 %a, %b
+ %ret = xor i64 %tmp, -1
+ ret i64 %ret
+}
+
+; NOR 32-bit.
+define i32 @f7(i32 %dummy, i32 %a, i32 %b) {
+; CHECK-LABEL: f7:
+; CHECK: nork %r2, %r3, %r4
+; CHECK: br %r14
+ %tmp = or i32 %a, %b
+ %ret = xor i32 %tmp, -1
+ ret i32 %ret
+}
+
+; NOR 64-bit.
+define i64 @f8(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f8:
+; CHECK: nogrk %r2, %r3, %r4
+; CHECK: br %r14
+ %tmp = or i64 %a, %b
+ %ret = xor i64 %tmp, -1
+ ret i64 %ret
+}
+
+; NXOR 32-bit.
+define i32 @f9(i32 %dummy, i32 %a, i32 %b) {
+; CHECK-LABEL: f9:
+; CHECK: nxrk %r2, %r3, %r4
+; CHECK: br %r14
+ %tmp = xor i32 %a, %b
+ %ret = xor i32 %tmp, -1
+ ret i32 %ret
+}
+
+; NXOR 64-bit.
+define i64 @f10(i64 %dummy, i64 %a, i64 %b) {
+; CHECK-LABEL: f10:
+; CHECK: nxgrk %r2, %r3, %r4
+; CHECK: br %r14
+ %tmp = xor i64 %a, %b
+ %ret = xor i64 %tmp, -1
+ ret i64 %ret
+}
+
+; Or-with-complement 32-bit of a constant.
+define i32 @f11(i32 %a) {
+; CHECK-LABEL: f11:
+; CHECK: lhi [[REG:%r[0-5]]], -256
+; CHECK: ocrk %r2, [[REG]], %r2
+; CHECK: br %r14
+ %neg = xor i32 %a, -1
+ %ret = or i32 %neg, -256
+ ret i32 %ret
+}
+
+; Or-with-complement 64-bit of a constant.
+define i64 @f12(i64 %a) {
+; CHECK-LABEL: f12:
+; CHECK: lghi [[REG:%r[0-5]]], -256
+; CHECK: ocgrk %r2, [[REG]], %r2
+; CHECK: br %r14
+ %neg = xor i64 %a, -1
+ %ret = or i64 %neg, -256
+ ret i64 %ret
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-bswap-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-bswap-01.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-bswap-01.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-bswap-01.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,97 @@
+; Test loads of byte-swapped vector elements.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+; Test v8i16 loads.
+define <8 x i16> @f1(<8 x i16> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vlbrh %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <8 x i16>, <8 x i16> *%ptr
+ %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %load)
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 loads.
+define <4 x i32> @f2(<4 x i32> *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vlbrf %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 loads.
+define <2 x i64> @f3(<2 x i64> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vlbrg %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <2 x i64>, <2 x i64> *%ptr
+ %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %load)
+ ret <2 x i64> %ret
+}
+
+; Test the highest aligned in-range offset.
+define <4 x i32> @f4(<4 x i32> *%base) {
+; CHECK-LABEL: f4:
+; CHECK: vlbrf %v24, 4080(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
+ ret <4 x i32> %ret
+}
+
+; Test the highest unaligned in-range offset.
+define <4 x i32> @f5(i8 *%base) {
+; CHECK-LABEL: f5:
+; CHECK: vlbrf %v24, 4095(%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 4095
+ %ptr = bitcast i8 *%addr to <4 x i32> *
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
+ ret <4 x i32> %ret
+}
+
+; Test the next offset up, which requires separate address logic,
+define <4 x i32> @f6(<4 x i32> *%base) {
+; CHECK-LABEL: f6:
+; CHECK: aghi %r2, 4096
+; CHECK: vlbrf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
+ ret <4 x i32> %ret
+}
+
+; Test negative offsets, which also require separate address logic,
+define <4 x i32> @f7(<4 x i32> *%base) {
+; CHECK-LABEL: f7:
+; CHECK: aghi %r2, -16
+; CHECK: vlbrf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
+ ret <4 x i32> %ret
+}
+
+; Check that indexes are allowed.
+define <4 x i32> @f8(i8 *%base, i64 %index) {
+; CHECK-LABEL: f8:
+; CHECK: vlbrf %v24, 0(%r3,%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 %index
+ %ptr = bitcast i8 *%addr to <4 x i32> *
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %load)
+ ret <4 x i32> %ret
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-bswap-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-bswap-02.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-bswap-02.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-bswap-02.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,97 @@
+; Test stores of byte-swapped vector elements.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+; Test v8i16 stores.
+define void @f1(<8 x i16> %val, <8 x i16> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vstbrh %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
+ store <8 x i16> %swap, <8 x i16> *%ptr
+ ret void
+}
+
+; Test v4i32 stores.
+define void @f2(<4 x i32> %val, <4 x i32> *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vstbrf %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
+ store <4 x i32> %swap, <4 x i32> *%ptr
+ ret void
+}
+
+; Test v2i64 stores.
+define void @f3(<2 x i64> %val, <2 x i64> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vstbrg %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
+ store <2 x i64> %swap, <2 x i64> *%ptr
+ ret void
+}
+
+; Test the highest aligned in-range offset.
+define void @f4(<4 x i32> %val, <4 x i32> *%base) {
+; CHECK-LABEL: f4:
+; CHECK: vstbrf %v24, 4080(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
+ store <4 x i32> %swap, <4 x i32> *%ptr
+ ret void
+}
+
+; Test the highest unaligned in-range offset.
+define void @f5(<4 x i32> %val, i8 *%base) {
+; CHECK-LABEL: f5:
+; CHECK: vstbrf %v24, 4095(%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 4095
+ %ptr = bitcast i8 *%addr to <4 x i32> *
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
+ store <4 x i32> %swap, <4 x i32> *%ptr, align 1
+ ret void
+}
+
+; Test the next offset up, which requires separate address logic,
+define void @f6(<4 x i32> %val, <4 x i32> *%base) {
+; CHECK-LABEL: f6:
+; CHECK: aghi %r2, 4096
+; CHECK: vstbrf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
+ store <4 x i32> %swap, <4 x i32> *%ptr
+ ret void
+}
+
+; Test negative offsets, which also require separate address logic,
+define void @f7(<4 x i32> %val, <4 x i32> *%base) {
+; CHECK-LABEL: f7:
+; CHECK: aghi %r2, -16
+; CHECK: vstbrf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
+ store <4 x i32> %swap, <4 x i32> *%ptr
+ ret void
+}
+
+; Check that indexes are allowed.
+define void @f8(<4 x i32> %val, i8 *%base, i64 %index) {
+; CHECK-LABEL: f8:
+; CHECK: vstbrf %v24, 0(%r3,%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 %index
+ %ptr = bitcast i8 *%addr to <4 x i32> *
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
+ store <4 x i32> %swap, <4 x i32> *%ptr, align 1
+ ret void
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-bswap-03.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-bswap-03.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-bswap-03.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-bswap-03.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,220 @@
+; Test vector insertion of byte-swapped memory values.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+; Test v8i16 insertion into the first element.
+define <8 x i16> @f1(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vlebrh %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ %ret = insertelement <8 x i16> %val, i16 %swap, i32 0
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into the last element.
+define <8 x i16> @f2(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vlebrh %v24, 0(%r2), 7
+; CHECK: br %r14
+ %element = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ %ret = insertelement <8 x i16> %val, i16 %swap, i32 7
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion with the highest in-range offset.
+define <8 x i16> @f3(<8 x i16> %val, i16 *%base) {
+; CHECK-LABEL: f3:
+; CHECK: vlebrh %v24, 4094(%r2), 5
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i32 2047
+ %element = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ %ret = insertelement <8 x i16> %val, i16 %swap, i32 5
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion with the first ouf-of-range offset.
+define <8 x i16> @f4(<8 x i16> %val, i16 *%base) {
+; CHECK-LABEL: f4:
+; CHECK: aghi %r2, 4096
+; CHECK: vlebrh %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i32 2048
+ %element = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ %ret = insertelement <8 x i16> %val, i16 %swap, i32 1
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into a variable element.
+define <8 x i16> @f5(<8 x i16> %val, i16 *%ptr, i32 %index) {
+; CHECK-LABEL: f5:
+; CHECK-NOT: vlebrh
+; CHECK: br %r14
+ %element = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ %ret = insertelement <8 x i16> %val, i16 %swap, i32 %index
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion using a pair of vector bswaps.
+define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vlebrh %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i16, i16 *%ptr
+ %swapval = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
+ %insert = insertelement <8 x i16> %swapval, i16 %element, i32 0
+ %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 insertion into the first element.
+define <4 x i32> @f7(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: vlebrf %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ %ret = insertelement <4 x i32> %val, i32 %swap, i32 0
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into the last element.
+define <4 x i32> @f8(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vlebrf %v24, 0(%r2), 3
+; CHECK: br %r14
+ %element = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ %ret = insertelement <4 x i32> %val, i32 %swap, i32 3
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion with the highest in-range offset.
+define <4 x i32> @f9(<4 x i32> %val, i32 *%base) {
+; CHECK-LABEL: f9:
+; CHECK: vlebrf %v24, 4092(%r2), 2
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i32 1023
+ %element = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ %ret = insertelement <4 x i32> %val, i32 %swap, i32 2
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion with the first ouf-of-range offset.
+define <4 x i32> @f10(<4 x i32> %val, i32 *%base) {
+; CHECK-LABEL: f10:
+; CHECK: aghi %r2, 4096
+; CHECK: vlebrf %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i32 1024
+ %element = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ %ret = insertelement <4 x i32> %val, i32 %swap, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into a variable element.
+define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr, i32 %index) {
+; CHECK-LABEL: f11:
+; CHECK-NOT: vlebrf
+; CHECK: br %r14
+ %element = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ %ret = insertelement <4 x i32> %val, i32 %swap, i32 %index
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion using a pair of vector bswaps.
+define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vlebrf %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i32, i32 *%ptr
+ %swapval = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
+ %insert = insertelement <4 x i32> %swapval, i32 %element, i32 0
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 insertion into the first element.
+define <2 x i64> @f13(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f13:
+; CHECK: vlebrg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ %ret = insertelement <2 x i64> %val, i64 %swap, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into the last element.
+define <2 x i64> @f14(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: vlebrg %v24, 0(%r2), 1
+; CHECK: br %r14
+ %element = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ %ret = insertelement <2 x i64> %val, i64 %swap, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion with the highest in-range offset.
+define <2 x i64> @f15(<2 x i64> %val, i64 *%base) {
+; CHECK-LABEL: f15:
+; CHECK: vlebrg %v24, 4088(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 511
+ %element = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ %ret = insertelement <2 x i64> %val, i64 %swap, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion with the first ouf-of-range offset.
+define <2 x i64> @f16(<2 x i64> %val, i64 *%base) {
+; CHECK-LABEL: f16:
+; CHECK: aghi %r2, 4096
+; CHECK: vlebrg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 512
+ %element = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ %ret = insertelement <2 x i64> %val, i64 %swap, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into a variable element.
+define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr, i32 %index) {
+; CHECK-LABEL: f17:
+; CHECK-NOT: vlebrg
+; CHECK: br %r14
+ %element = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ %ret = insertelement <2 x i64> %val, i64 %swap, i32 %index
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion using a pair of vector bswaps.
+define <2 x i64> @f18(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f18:
+; CHECK: vlebrg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = load i64, i64 *%ptr
+ %swapval = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
+ %insert = insertelement <2 x i64> %swapval, i64 %element, i32 0
+ %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
+ ret <2 x i64> %ret
+}
Added: llvm/trunk/test/CodeGen/SystemZ/vec-bswap-04.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-bswap-04.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-bswap-04.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-bswap-04.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,254 @@
+; Test vector extraction of byte-swapped value to memory.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+; Test v8i16 extraction from the first element.
+define void @f1(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vstebrh %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <8 x i16> %val, i32 0
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ store i16 %swap, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction from the last element.
+define void @f2(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vstebrh %v24, 0(%r2), 7
+; CHECK: br %r14
+ %element = extractelement <8 x i16> %val, i32 7
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ store i16 %swap, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction of an invalid element. This must compile,
+; but we don't care what it does.
+define void @f3(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK-NOT: vstebrh %v24, 0(%r2), 8
+; CHECK: br %r14
+ %element = extractelement <8 x i16> %val, i32 8
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ store i16 %swap, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction with the highest in-range offset.
+define void @f4(<8 x i16> %val, i16 *%base) {
+; CHECK-LABEL: f4:
+; CHECK: vstebrh %v24, 4094(%r2), 5
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i32 2047
+ %element = extractelement <8 x i16> %val, i32 5
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ store i16 %swap, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction with the first ouf-of-range offset.
+define void @f5(<8 x i16> %val, i16 *%base) {
+; CHECK-LABEL: f5:
+; CHECK: aghi %r2, 4096
+; CHECK: vstebrh %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i32 2048
+ %element = extractelement <8 x i16> %val, i32 1
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ store i16 %swap, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction from a variable element.
+define void @f6(<8 x i16> %val, i16 *%ptr, i32 %index) {
+; CHECK-LABEL: f6:
+; CHECK-NOT: vstebrh
+; CHECK: br %r14
+ %element = extractelement <8 x i16> %val, i32 %index
+ %swap = call i16 @llvm.bswap.i16(i16 %element)
+ store i16 %swap, i16 *%ptr
+ ret void
+}
+
+; Test v8i16 extraction using a vector bswap.
+define void @f7(<8 x i16> %val, i16 *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: vstebrh %v24, 0(%r2), 0
+; CHECK: br %r14
+ %swap = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %val)
+ %element = extractelement <8 x i16> %swap, i32 0
+ store i16 %element, i16 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction from the first element.
+define void @f8(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vstebrf %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <4 x i32> %val, i32 0
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ store i32 %swap, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction from the last element.
+define void @f9(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vstebrf %v24, 0(%r2), 3
+; CHECK: br %r14
+ %element = extractelement <4 x i32> %val, i32 3
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ store i32 %swap, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction of an invalid element. This must compile,
+; but we don't care what it does.
+define void @f10(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f10:
+; CHECK-NOT: vstebrf %v24, 0(%r2), 4
+; CHECK: br %r14
+ %element = extractelement <4 x i32> %val, i32 4
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ store i32 %swap, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction with the highest in-range offset.
+define void @f11(<4 x i32> %val, i32 *%base) {
+; CHECK-LABEL: f11:
+; CHECK: vstebrf %v24, 4092(%r2), 2
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i32 1023
+ %element = extractelement <4 x i32> %val, i32 2
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ store i32 %swap, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction with the first ouf-of-range offset.
+define void @f12(<4 x i32> %val, i32 *%base) {
+; CHECK-LABEL: f12:
+; CHECK: aghi %r2, 4096
+; CHECK: vstebrf %v24, 0(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i32 1024
+ %element = extractelement <4 x i32> %val, i32 1
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ store i32 %swap, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction from a variable element.
+define void @f13(<4 x i32> %val, i32 *%ptr, i32 %index) {
+; CHECK-LABEL: f13:
+; CHECK-NOT: vstebrf
+; CHECK: br %r14
+ %element = extractelement <4 x i32> %val, i32 %index
+ %swap = call i32 @llvm.bswap.i32(i32 %element)
+ store i32 %swap, i32 *%ptr
+ ret void
+}
+
+; Test v4i32 extraction using a vector bswap.
+define void @f14(<4 x i32> %val, i32 *%ptr) {
+; CHECK-LABEL: f14:
+; CHECK: vstebrf %v24, 0(%r2), 0
+; CHECK: br %r14
+ %swap = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %val)
+ %element = extractelement <4 x i32> %swap, i32 0
+ store i32 %element, i32 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction from the first element.
+define void @f15(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f15:
+; CHECK: vstebrg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %element = extractelement <2 x i64> %val, i32 0
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ store i64 %swap, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction from the last element.
+define void @f16(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f16:
+; CHECK: vstebrg %v24, 0(%r2), 1
+; CHECK: br %r14
+ %element = extractelement <2 x i64> %val, i32 1
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ store i64 %swap, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction of an invalid element. This must compile,
+; but we don't care what it does.
+define void @f17(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f17:
+; CHECK-NOT: vstebrg %v24, 0(%r2), 2
+; CHECK: br %r14
+ %element = extractelement <2 x i64> %val, i32 2
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ store i64 %swap, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction with the highest in-range offset.
+define void @f18(<2 x i64> %val, i64 *%base) {
+; CHECK-LABEL: f18:
+; CHECK: vstebrg %v24, 4088(%r2), 1
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 511
+ %element = extractelement <2 x i64> %val, i32 1
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ store i64 %swap, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction with the first ouf-of-range offset.
+define void @f19(<2 x i64> %val, i64 *%base) {
+; CHECK-LABEL: f19:
+; CHECK: aghi %r2, 4096
+; CHECK: vstebrg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 512
+ %element = extractelement <2 x i64> %val, i32 0
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ store i64 %swap, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction from a variable element.
+define void @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
+; CHECK-LABEL: f20:
+; CHECK-NOT: vstebrg
+; CHECK: br %r14
+ %element = extractelement <2 x i64> %val, i32 %index
+ %swap = call i64 @llvm.bswap.i64(i64 %element)
+ store i64 %swap, i64 *%ptr
+ ret void
+}
+
+; Test v2i64 extraction using a vector bswap.
+define void @f21(<2 x i64> %val, i64 *%ptr) {
+; CHECK-LABEL: f21:
+; CHECK: vstebrg %v24, 0(%r2), 0
+; CHECK: br %r14
+ %swap = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %val)
+ %element = extractelement <2 x i64> %swap, i32 0
+ store i64 %element, i64 *%ptr
+ ret void
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-bswap-05.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-bswap-05.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-bswap-05.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-bswap-05.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,136 @@
+; Test vector insertions of byte-swapped memory values into 0.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+; Test VLLEBRZH.
+define <8 x i16> @f1(i16 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vllebrzh %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %val)
+ %ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
+ ret <8 x i16> %ret
+}
+
+; Test VLLEBRZH using a vector bswap.
+define <8 x i16> @f2(i16 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vllebrzh %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i16, i16 *%ptr
+ %insert = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
+ %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
+ ret <8 x i16> %ret
+}
+
+; Test VLLEBRZF.
+define <4 x i32> @f3(i32 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vllebrzf %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %val)
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 1
+ ret <4 x i32> %ret
+}
+
+; Test VLLEBRZF using a vector bswap.
+define <4 x i32> @f4(i32 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vllebrzf %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i32, i32 *%ptr
+ %insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
+ ret <4 x i32> %ret
+}
+
+; Test VLLEBRZG.
+define <2 x i64> @f5(i64 *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vllebrzg %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %val)
+ %ret = insertelement <2 x i64> zeroinitializer, i64 %swap, i32 0
+ ret <2 x i64> %ret
+}
+
+; Test VLLEBRZG using a vector bswap.
+define <2 x i64> @f6(i64 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vllebrzg %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i64, i64 *%ptr
+ %insert = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
+ %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
+ ret <2 x i64> %ret
+}
+
+; Test VLLEBRZE.
+define <4 x i32> @f7(i32 *%ptr) {
+; CHECK-LABEL: f7:
+; CHECK: vllebrze %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %val)
+ %ret = insertelement <4 x i32> zeroinitializer, i32 %swap, i32 0
+ ret <4 x i32> %ret
+}
+
+; Test VLLEBRZE using a vector bswap.
+define <4 x i32> @f8(i32 *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vllebrze %v24, 0(%r2)
+; CHECK: br %r14
+ %val = load i32, i32 *%ptr
+ %insert = insertelement <4 x i32> zeroinitializer, i32 %val, i32 0
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
+ ret <4 x i32> %ret
+}
+
+; Test VLLEBRZH with the highest in-range offset.
+define <8 x i16> @f9(i16 *%base) {
+; CHECK-LABEL: f9:
+; CHECK: vllebrzh %v24, 4094(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i64 2047
+ %val = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %val)
+ %ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
+ ret <8 x i16> %ret
+}
+
+; Test VLLEBRZH with the next highest offset.
+define <8 x i16> @f10(i16 *%base) {
+; CHECK-LABEL: f10:
+; CHECK-NOT: vllebrzh %v24, 4096(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i64 2048
+ %val = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %val)
+ %ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
+ ret <8 x i16> %ret
+}
+
+; Test that VLLEBRZH allows an index.
+define <8 x i16> @f11(i16 *%base, i64 %index) {
+; CHECK-LABEL: f11:
+; CHECK: sllg [[REG:%r[1-5]]], %r3, 1
+; CHECK: vllebrzh %v24, 0([[REG]],%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i64 %index
+ %val = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %val)
+ %ret = insertelement <8 x i16> zeroinitializer, i16 %swap, i32 3
+ ret <8 x i16> %ret
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-bswap-06.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-bswap-06.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-bswap-06.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-bswap-06.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,77 @@
+; Test insertions of byte-swapped memory values into a nonzero index of an undef.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+; Test v8i16 insertion into an undef, with an arbitrary index.
+define <8 x i16> @f1(i16 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vlbrreph %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %val)
+ %ret = insertelement <8 x i16> undef, i16 %swap, i32 5
+ ret <8 x i16> %ret
+}
+
+; Test v8i16 insertion into an undef, using a vector bswap.
+define <8 x i16> @f2(i16 *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vlbrreph %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i16, i16 *%ptr
+ %insert = insertelement <8 x i16> undef, i16 %val, i32 5
+ %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %insert)
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 insertion into an undef, with an arbitrary index.
+define <4 x i32> @f3(i32 *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vlbrrepf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %val)
+ %ret = insertelement <4 x i32> undef, i32 %swap, i32 2
+ ret <4 x i32> %ret
+}
+
+; Test v4i32 insertion into an undef, using a vector bswap.
+define <4 x i32> @f4(i32 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vlbrrepf %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i32, i32 *%ptr
+ %insert = insertelement <4 x i32> undef, i32 %val, i32 2
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %insert)
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 insertion into an undef, with an arbitrary index.
+define <2 x i64> @f5(i64 *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vlbrrepg %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %val)
+ %ret = insertelement <2 x i64> undef, i64 %swap, i32 1
+ ret <2 x i64> %ret
+}
+
+; Test v2i64 insertion into an undef, using a vector bwap.
+define <2 x i64> @f6(i64 *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vlbrrepg %v24, 0(%r2)
+; CHECK-NEXT: br %r14
+ %val = load i64, i64 *%ptr
+ %insert = insertelement <2 x i64> undef, i64 %val, i32 1
+ %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %insert)
+ ret <2 x i64> %ret
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-bswap-07.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-bswap-07.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-bswap-07.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-bswap-07.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,192 @@
+; Test replications of a byte-swapped scalar memory value.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+declare <8 x i16> @llvm.bswap.v8i16(<8 x i16>)
+declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>)
+declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
+
+; Test a v8i16 replicating load with no offset.
+define <8 x i16> @f1(i16 *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vlbrreph %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %scalar)
+ %val = insertelement <8 x i16> undef, i16 %swap, i32 0
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 replicating load with the maximum in-range offset.
+define <8 x i16> @f2(i16 *%base) {
+; CHECK-LABEL: f2:
+; CHECK: vlbrreph %v24, 4094(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i64 2047
+ %scalar = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %scalar)
+ %val = insertelement <8 x i16> undef, i16 %swap, i32 0
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 replicating load with the first out-of-range offset.
+define <8 x i16> @f3(i16 *%base) {
+; CHECK-LABEL: f3:
+; CHECK: aghi %r2, 4096
+; CHECK: vlbrreph %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i16, i16 *%base, i64 2048
+ %scalar = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %scalar)
+ %val = insertelement <8 x i16> undef, i16 %swap, i32 0
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
+; Test a v8i16 replicating load using a vector bswap.
+define <8 x i16> @f4(i16 *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vlbrreph %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i16, i16 *%ptr
+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
+ %rep = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ %ret = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> %rep)
+ ret <8 x i16> %ret
+}
+
+; Test a v4i32 replicating load with no offset.
+define <4 x i32> @f5(i32 *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vlbrrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %scalar)
+ %val = insertelement <4 x i32> undef, i32 %swap, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 replicating load with the maximum in-range offset.
+define <4 x i32> @f6(i32 *%base) {
+; CHECK-LABEL: f6:
+; CHECK: vlbrrepf %v24, 4092(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i64 1023
+ %scalar = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %scalar)
+ %val = insertelement <4 x i32> undef, i32 %swap, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 replicating load with the first out-of-range offset.
+define <4 x i32> @f7(i32 *%base) {
+; CHECK-LABEL: f7:
+; CHECK: aghi %r2, 4096
+; CHECK: vlbrrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i32, i32 *%base, i64 1024
+ %scalar = load i32, i32 *%ptr
+ %swap = call i32 @llvm.bswap.i32(i32 %scalar)
+ %val = insertelement <4 x i32> undef, i32 %swap, i32 0
+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ ret <4 x i32> %ret
+}
+
+; Test a v4i32 replicating load using a vector bswap.
+define <4 x i32> @f8(i32 *%ptr) {
+; CHECK-LABEL: f8:
+; CHECK: vlbrrepf %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i32, i32 *%ptr
+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %rep = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+ %ret = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %rep)
+ ret <4 x i32> %ret
+}
+
+; Test a v2i64 replicating load with no offset.
+define <2 x i64> @f9(i64 *%ptr) {
+; CHECK-LABEL: f9:
+; CHECK: vlbrrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %scalar)
+ %val = insertelement <2 x i64> undef, i64 %swap, i32 0
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 replicating load with the maximum in-range offset.
+define <2 x i64> @f10(i64 *%base) {
+; CHECK-LABEL: f10:
+; CHECK: vlbrrepg %v24, 4088(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 511
+ %scalar = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %scalar)
+ %val = insertelement <2 x i64> undef, i64 %swap, i32 0
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 replicating load with the first out-of-range offset.
+define <2 x i64> @f11(i64 *%base) {
+; CHECK-LABEL: f11:
+; CHECK: aghi %r2, 4096
+; CHECK: vlbrrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr i64, i64 *%base, i32 512
+ %scalar = load i64, i64 *%ptr
+ %swap = call i64 @llvm.bswap.i64(i64 %scalar)
+ %val = insertelement <2 x i64> undef, i64 %swap, i32 0
+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ ret <2 x i64> %ret
+}
+
+; Test a v2i64 replicating load using a vector bswap.
+define <2 x i64> @f12(i64 *%ptr) {
+; CHECK-LABEL: f12:
+; CHECK: vlbrrepg %v24, 0(%r2)
+; CHECK: br %r14
+ %scalar = load i64, i64 *%ptr
+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
+ %rep = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+ %ret = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %rep)
+ ret <2 x i64> %ret
+}
+
+; Test a v8i16 replicating load with an index.
+define <8 x i16> @f13(i16 *%base, i64 %index) {
+; CHECK-LABEL: f13:
+; CHECK: sllg [[REG:%r[1-5]]], %r3, 1
+; CHECK: vlbrreph %v24, 2046([[REG]],%r2)
+; CHECK: br %r14
+ %ptr1 = getelementptr i16, i16 *%base, i64 %index
+ %ptr = getelementptr i16, i16 *%ptr1, i64 1023
+ %scalar = load i16, i16 *%ptr
+ %swap = call i16 @llvm.bswap.i16(i16 %scalar)
+ %val = insertelement <8 x i16> undef, i16 %swap, i32 0
+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+ ret <8 x i16> %ret
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-conv-03.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-conv-03.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-conv-03.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-conv-03.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,40 @@
+; Test conversions between integer and float elements on arch13.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+; Test conversion of f32s to signed i32s.
+define <4 x i32> @f1(<4 x float> %floats) {
+; CHECK-LABEL: f1:
+; CHECK: vcfeb %v24, %v24, 0, 5
+; CHECK: br %r14
+ %dwords = fptosi <4 x float> %floats to <4 x i32>
+ ret <4 x i32> %dwords
+}
+
+; Test conversion of f32s to unsigned i32s.
+define <4 x i32> @f2(<4 x float> %floats) {
+; CHECK-LABEL: f2:
+; CHECK: vclfeb %v24, %v24, 0, 5
+; CHECK: br %r14
+ %dwords = fptoui <4 x float> %floats to <4 x i32>
+ ret <4 x i32> %dwords
+}
+
+; Test conversion of signed i32s to f32s.
+define <4 x float> @f3(<4 x i32> %dwords) {
+; CHECK-LABEL: f3:
+; CHECK: vcefb %v24, %v24, 0, 0
+; CHECK: br %r14
+ %floats = sitofp <4 x i32> %dwords to <4 x float>
+ ret <4 x float> %floats
+}
+
+; Test conversion of unsigned i32s to f32s.
+define <4 x float> @f4(<4 x i32> %dwords) {
+; CHECK-LABEL: f4:
+; CHECK: vcelfb %v24, %v24, 0, 0
+; CHECK: br %r14
+ %floats = uitofp <4 x i32> %dwords to <4 x float>
+ ret <4 x float> %floats
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-eswap-01.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-eswap-01.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-eswap-01.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-eswap-01.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,138 @@
+; Test loads of byte-swapped vector elements.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+; Test v16i8 loads.
+define <16 x i8> @f1(<16 x i8> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vlbrq %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <16 x i8>, <16 x i8> *%ptr
+ %ret = shufflevector <16 x i8> %load, <16 x i8> undef,
+ <16 x i32> <i32 15, i32 14, i32 13, i32 12,
+ i32 11, i32 10, i32 9, i32 8,
+ i32 7, i32 6, i32 5, i32 4,
+ i32 3, i32 2, i32 1, i32 0>
+ ret <16 x i8> %ret
+}
+
+; Test v8i16 loads.
+define <8 x i16> @f2(<8 x i16> *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vlerh %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <8 x i16>, <8 x i16> *%ptr
+ %ret = shufflevector <8 x i16> %load, <8 x i16> undef,
+ <8 x i32> <i32 7, i32 6, i32 5, i32 4,
+ i32 3, i32 2, i32 1, i32 0>
+ ret <8 x i16> %ret
+}
+
+; Test v4i32 loads.
+define <4 x i32> @f3(<4 x i32> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vlerf %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %ret
+}
+
+; Test v2i64 loads.
+define <2 x i64> @f4(<2 x i64> *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vlerg %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <2 x i64>, <2 x i64> *%ptr
+ %ret = shufflevector <2 x i64> %load, <2 x i64> undef,
+ <2 x i32> <i32 1, i32 0>
+ ret <2 x i64> %ret
+}
+
+; Test v4f32 loads.
+define <4 x float> @f5(<4 x float> *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vlerf %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <4 x float>, <4 x float> *%ptr
+ %ret = shufflevector <4 x float> %load, <4 x float> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x float> %ret
+}
+
+; Test v2f64 loads.
+define <2 x double> @f6(<2 x double> *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vlerg %v24, 0(%r2)
+; CHECK: br %r14
+ %load = load <2 x double>, <2 x double> *%ptr
+ %ret = shufflevector <2 x double> %load, <2 x double> undef,
+ <2 x i32> <i32 1, i32 0>
+ ret <2 x double> %ret
+}
+
+; Test the highest aligned in-range offset.
+define <4 x i32> @f7(<4 x i32> *%base) {
+; CHECK-LABEL: f7:
+; CHECK: vlerf %v24, 4080(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %ret
+}
+
+; Test the highest unaligned in-range offset.
+define <4 x i32> @f8(i8 *%base) {
+; CHECK-LABEL: f8:
+; CHECK: vlerf %v24, 4095(%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 4095
+ %ptr = bitcast i8 *%addr to <4 x i32> *
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %ret
+}
+
+; Test the next offset up, which requires separate address logic,
+define <4 x i32> @f9(<4 x i32> *%base) {
+; CHECK-LABEL: f9:
+; CHECK: aghi %r2, 4096
+; CHECK: vlerf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %ret
+}
+
+; Test negative offsets, which also require separate address logic,
+define <4 x i32> @f10(<4 x i32> *%base) {
+; CHECK-LABEL: f10:
+; CHECK: aghi %r2, -16
+; CHECK: vlerf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %ret
+}
+
+; Check that indexes are allowed.
+define <4 x i32> @f11(i8 *%base, i64 %index) {
+; CHECK-LABEL: f11:
+; CHECK: vlerf %v24, 0(%r3,%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 %index
+ %ptr = bitcast i8 *%addr to <4 x i32> *
+ %load = load <4 x i32>, <4 x i32> *%ptr
+ %ret = shufflevector <4 x i32> %load, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ ret <4 x i32> %ret
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-eswap-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-eswap-02.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-eswap-02.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-eswap-02.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,138 @@
+; Test stores of element-swapped vector elements.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+; Test v16i8 stores.
+define void @f1(<16 x i8> %val, <16 x i8> *%ptr) {
+; CHECK-LABEL: f1:
+; CHECK: vstbrq %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = shufflevector <16 x i8> %val, <16 x i8> undef,
+ <16 x i32> <i32 15, i32 14, i32 13, i32 12,
+ i32 11, i32 10, i32 9, i32 8,
+ i32 7, i32 6, i32 5, i32 4,
+ i32 3, i32 2, i32 1, i32 0>
+ store <16 x i8> %swap, <16 x i8> *%ptr
+ ret void
+}
+
+; Test v8i16 stores.
+define void @f2(<8 x i16> %val, <8 x i16> *%ptr) {
+; CHECK-LABEL: f2:
+; CHECK: vsterh %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = shufflevector <8 x i16> %val, <8 x i16> undef,
+ <8 x i32> <i32 7, i32 6, i32 5, i32 4,
+ i32 3, i32 2, i32 1, i32 0>
+ store <8 x i16> %swap, <8 x i16> *%ptr
+ ret void
+}
+
+; Test v4i32 stores.
+define void @f3(<4 x i32> %val, <4 x i32> *%ptr) {
+; CHECK-LABEL: f3:
+; CHECK: vsterf %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ store <4 x i32> %swap, <4 x i32> *%ptr
+ ret void
+}
+
+; Test v2i64 stores.
+define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
+; CHECK-LABEL: f4:
+; CHECK: vsterg %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = shufflevector <2 x i64> %val, <2 x i64> undef,
+ <2 x i32> <i32 1, i32 0>
+ store <2 x i64> %swap, <2 x i64> *%ptr
+ ret void
+}
+
+; Test v4f32 stores.
+define void @f5(<4 x float> %val, <4 x float> *%ptr) {
+; CHECK-LABEL: f5:
+; CHECK: vsterf %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = shufflevector <4 x float> %val, <4 x float> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ store <4 x float> %swap, <4 x float> *%ptr
+ ret void
+}
+
+; Test v2f64 stores.
+define void @f6(<2 x double> %val, <2 x double> *%ptr) {
+; CHECK-LABEL: f6:
+; CHECK: vsterg %v24, 0(%r2)
+; CHECK: br %r14
+ %swap = shufflevector <2 x double> %val, <2 x double> undef,
+ <2 x i32> <i32 1, i32 0>
+ store <2 x double> %swap, <2 x double> *%ptr
+ ret void
+}
+
+; Test the highest aligned in-range offset.
+define void @f7(<4 x i32> %val, <4 x i32> *%base) {
+; CHECK-LABEL: f7:
+; CHECK: vsterf %v24, 4080(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 255
+ %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ store <4 x i32> %swap, <4 x i32> *%ptr
+ ret void
+}
+
+; Test the highest unaligned in-range offset.
+define void @f8(<4 x i32> %val, i8 *%base) {
+; CHECK-LABEL: f8:
+; CHECK: vsterf %v24, 4095(%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 4095
+ %ptr = bitcast i8 *%addr to <4 x i32> *
+ %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ store <4 x i32> %swap, <4 x i32> *%ptr, align 1
+ ret void
+}
+
+; Test the next offset up, which requires separate address logic,
+define void @f9(<4 x i32> %val, <4 x i32> *%base) {
+; CHECK-LABEL: f9:
+; CHECK: aghi %r2, 4096
+; CHECK: vsterf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 256
+ %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ store <4 x i32> %swap, <4 x i32> *%ptr
+ ret void
+}
+
+; Test negative offsets, which also require separate address logic,
+define void @f10(<4 x i32> %val, <4 x i32> *%base) {
+; CHECK-LABEL: f10:
+; CHECK: aghi %r2, -16
+; CHECK: vsterf %v24, 0(%r2)
+; CHECK: br %r14
+ %ptr = getelementptr <4 x i32>, <4 x i32> *%base, i64 -1
+ %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ store <4 x i32> %swap, <4 x i32> *%ptr
+ ret void
+}
+
+; Check that indexes are allowed.
+define void @f11(<4 x i32> %val, i8 *%base, i64 %index) {
+; CHECK-LABEL: f11:
+; CHECK: vsterf %v24, 0(%r3,%r2)
+; CHECK: br %r14
+ %addr = getelementptr i8, i8 *%base, i64 %index
+ %ptr = bitcast i8 *%addr to <4 x i32> *
+ %swap = shufflevector <4 x i32> %val, <4 x i32> undef,
+ <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ store <4 x i32> %swap, <4 x i32> *%ptr, align 1
+ ret void
+}
+
Added: llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics-03.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics-03.ll?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics-03.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-intrinsics-03.ll Fri Jul 12 11:13:16 2019
@@ -0,0 +1,154 @@
+; Test vector intrinsics added with arch13.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=arch13 | FileCheck %s
+
+declare <16 x i8> @llvm.s390.vsld(<16 x i8>, <16 x i8>, i32)
+declare <16 x i8> @llvm.s390.vsrd(<16 x i8>, <16 x i8>, i32)
+
+declare {<16 x i8>, i32} @llvm.s390.vstrsb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare {<16 x i8>, i32} @llvm.s390.vstrsh(<8 x i16>, <8 x i16>, <16 x i8>)
+declare {<16 x i8>, i32} @llvm.s390.vstrsf(<4 x i32>, <4 x i32>, <16 x i8>)
+declare {<16 x i8>, i32} @llvm.s390.vstrszb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare {<16 x i8>, i32} @llvm.s390.vstrszh(<8 x i16>, <8 x i16>, <16 x i8>)
+declare {<16 x i8>, i32} @llvm.s390.vstrszf(<4 x i32>, <4 x i32>, <16 x i8>)
+
+
+; VSLD with the minimum useful value.
+define <16 x i8> @test_vsld_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsld_1:
+; CHECK: vsld %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 1)
+ ret <16 x i8> %res
+}
+
+; VSLD with the maximum value.
+define <16 x i8> @test_vsld_7(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsld_7:
+; CHECK: vsld %v24, %v24, %v26, 7
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 7)
+ ret <16 x i8> %res
+}
+
+; VSRD with the minimum useful value.
+define <16 x i8> @test_vsrd_1(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsrd_1:
+; CHECK: vsrd %v24, %v24, %v26, 1
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 1)
+ ret <16 x i8> %res
+}
+
+; VSRD with the maximum value.
+define <16 x i8> @test_vsrd_7(<16 x i8> %a, <16 x i8> %b) {
+; CHECK-LABEL: test_vsrd_7:
+; CHECK: vsrd %v24, %v24, %v26, 7
+; CHECK: br %r14
+ %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 7)
+ ret <16 x i8> %res
+}
+
+
+; VSTRSB.
+define <16 x i8> @test_vstrsb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrsb:
+; CHECK: vstrsb %v24, %v24, %v26, %v28, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vstrsb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VSTRSH.
+define <16 x i8> @test_vstrsh(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrsh:
+; CHECK: vstrsh %v24, %v24, %v26, %v28, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vstrsh(<8 x i16> %a, <8 x i16> %b,
+ <16 x i8> %c)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VSTRSFS.
+define <16 x i8> @test_vstrsf(<4 x i32> %a, <4 x i32> %b, <16 x i8> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrsf:
+; CHECK: vstrsf %v24, %v24, %v26, %v28, 0
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vstrsf(<4 x i32> %a, <4 x i32> %b,
+ <16 x i8> %c)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VSTRSZB.
+define <16 x i8> @test_vstrszb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrszb:
+; CHECK: vstrszb %v24, %v24, %v26, %v28
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vstrszb(<16 x i8> %a, <16 x i8> %b,
+ <16 x i8> %c)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VSTRSZH.
+define <16 x i8> @test_vstrszh(<8 x i16> %a, <8 x i16> %b, <16 x i8> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrszh:
+; CHECK: vstrszh %v24, %v24, %v26, %v28
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vstrszh(<8 x i16> %a, <8 x i16> %b,
+ <16 x i8> %c)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
+; VSTRSZF.
+define <16 x i8> @test_vstrszf(<4 x i32> %a, <4 x i32> %b, <16 x i8> %c,
+ i32 *%ccptr) {
+; CHECK-LABEL: test_vstrszf:
+; CHECK: vstrszf %v24, %v24, %v26, %v28
+; CHECK: ipm [[REG:%r[0-5]]]
+; CHECK: srl [[REG]], 28
+; CHECK: st [[REG]], 0(%r2)
+; CHECK: br %r14
+ %call = call {<16 x i8>, i32} @llvm.s390.vstrszf(<4 x i32> %a, <4 x i32> %b,
+ <16 x i8> %c)
+ %res = extractvalue {<16 x i8>, i32} %call, 0
+ %cc = extractvalue {<16 x i8>, i32} %call, 1
+ store i32 %cc, i32 *%ccptr
+ ret <16 x i8> %res
+}
+
Added: llvm/trunk/test/MC/Disassembler/SystemZ/insns-arch13.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/Disassembler/SystemZ/insns-arch13.txt?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/MC/Disassembler/SystemZ/insns-arch13.txt (added)
+++ llvm/trunk/test/MC/Disassembler/SystemZ/insns-arch13.txt Fri Jul 12 11:13:16 2019
@@ -0,0 +1,1479 @@
+# Test arch13 instructions that don't have PC-relative operands.
+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=arch13 \
+# RUN: | FileCheck %s
+
+# CHECK: dfltcc %r2, %r2, %r2
+0xb9 0x39 0x20 0x22
+
+# CHECK: dfltcc %r2, %r8, %r15
+0xb9 0x39 0xf0 0x28
+
+# CHECK: dfltcc %r14, %r8, %r2
+0xb9 0x39 0x20 0xe8
+
+# CHECK: dfltcc %r6, %r8, %r10
+0xb9 0x39 0xa0 0x68
+
+# CHECK: kdsa %r0, %r2
+0xb9 0x3a 0x00 0x02
+
+# CHECK: kdsa %r0, %r14
+0xb9 0x3a 0x00 0x0e
+
+# CHECK: kdsa %r15, %r2
+0xb9 0x3a 0x00 0xf2
+
+# CHECK: kdsa %r7, %r10
+0xb9 0x3a 0x00 0x7a
+
+# CHECK: mvcrl 0, 0
+0xe5 0x0a 0x00 0x00 0x00 0x00
+
+# CHECK: mvcrl 0(%r1), 0(%r2)
+0xe5 0x0a 0x10 0x00 0x20 0x00
+
+# CHECK: mvcrl 160(%r1), 320(%r15)
+0xe5 0x0a 0x10 0xa0 0xf1 0x40
+
+# CHECK: mvcrl 0(%r1), 4095
+0xe5 0x0a 0x10 0x00 0x0f 0xff
+
+# CHECK: mvcrl 0(%r1), 4095(%r2)
+0xe5 0x0a 0x10 0x00 0x2f 0xff
+
+# CHECK: mvcrl 0(%r1), 4095(%r15)
+0xe5 0x0a 0x10 0x00 0xff 0xff
+
+# CHECK: mvcrl 0(%r1), 0
+0xe5 0x0a 0x10 0x00 0x00 0x00
+
+# CHECK: mvcrl 0(%r15), 0
+0xe5 0x0a 0xf0 0x00 0x00 0x00
+
+# CHECK: mvcrl 4095(%r1), 0
+0xe5 0x0a 0x1f 0xff 0x00 0x00
+
+# CHECK: mvcrl 4095(%r15), 0
+0xe5 0x0a 0xff 0xff 0x00 0x00
+
+# CHECK: ncgrk %r0, %r0, %r0
+0xb9 0xe5 0x00 0x00
+
+# CHECK: ncgrk %r0, %r0, %r15
+0xb9 0xe5 0xf0 0x00
+
+# CHECK: ncgrk %r0, %r15, %r0
+0xb9 0xe5 0x00 0x0f
+
+# CHECK: ncgrk %r15, %r0, %r0
+0xb9 0xe5 0x00 0xf0
+
+# CHECK: ncgrk %r7, %r8, %r9
+0xb9 0xe5 0x90 0x78
+
+# CHECK: ncrk %r0, %r0, %r0
+0xb9 0xf5 0x00 0x00
+
+# CHECK: ncrk %r0, %r0, %r15
+0xb9 0xf5 0xf0 0x00
+
+# CHECK: ncrk %r0, %r15, %r0
+0xb9 0xf5 0x00 0x0f
+
+# CHECK: ncrk %r15, %r0, %r0
+0xb9 0xf5 0x00 0xf0
+
+# CHECK: ncrk %r7, %r8, %r9
+0xb9 0xf5 0x90 0x78
+
+# CHECK: nngrk %r0, %r0, %r0
+0xb9 0x64 0x00 0x00
+
+# CHECK: nngrk %r0, %r0, %r15
+0xb9 0x64 0xf0 0x00
+
+# CHECK: nngrk %r0, %r15, %r0
+0xb9 0x64 0x00 0x0f
+
+# CHECK: nngrk %r15, %r0, %r0
+0xb9 0x64 0x00 0xf0
+
+# CHECK: nngrk %r7, %r8, %r9
+0xb9 0x64 0x90 0x78
+
+# CHECK: nnrk %r0, %r0, %r0
+0xb9 0x74 0x00 0x00
+
+# CHECK: nnrk %r0, %r0, %r15
+0xb9 0x74 0xf0 0x00
+
+# CHECK: nnrk %r0, %r15, %r0
+0xb9 0x74 0x00 0x0f
+
+# CHECK: nnrk %r15, %r0, %r0
+0xb9 0x74 0x00 0xf0
+
+# CHECK: nnrk %r7, %r8, %r9
+0xb9 0x74 0x90 0x78
+
+# CHECK: nogrk %r0, %r0, %r0
+0xb9 0x66 0x00 0x00
+
+# CHECK: nogrk %r0, %r0, %r15
+0xb9 0x66 0xf0 0x00
+
+# CHECK: nogrk %r0, %r15, %r0
+0xb9 0x66 0x00 0x0f
+
+# CHECK: nogrk %r15, %r0, %r0
+0xb9 0x66 0x00 0xf0
+
+# CHECK: nogrk %r7, %r8, %r9
+0xb9 0x66 0x90 0x78
+
+# CHECK: nork %r0, %r0, %r0
+0xb9 0x76 0x00 0x00
+
+# CHECK: nork %r0, %r0, %r15
+0xb9 0x76 0xf0 0x00
+
+# CHECK: nork %r0, %r15, %r0
+0xb9 0x76 0x00 0x0f
+
+# CHECK: nork %r15, %r0, %r0
+0xb9 0x76 0x00 0xf0
+
+# CHECK: nork %r7, %r8, %r9
+0xb9 0x76 0x90 0x78
+
+# CHECK: nxgrk %r0, %r0, %r0
+0xb9 0x67 0x00 0x00
+
+# CHECK: nxgrk %r0, %r0, %r15
+0xb9 0x67 0xf0 0x00
+
+# CHECK: nxgrk %r0, %r15, %r0
+0xb9 0x67 0x00 0x0f
+
+# CHECK: nxgrk %r15, %r0, %r0
+0xb9 0x67 0x00 0xf0
+
+# CHECK: nxgrk %r7, %r8, %r9
+0xb9 0x67 0x90 0x78
+
+# CHECK: nxrk %r0, %r0, %r0
+0xb9 0x77 0x00 0x00
+
+# CHECK: nxrk %r0, %r0, %r15
+0xb9 0x77 0xf0 0x00
+
+# CHECK: nxrk %r0, %r15, %r0
+0xb9 0x77 0x00 0x0f
+
+# CHECK: nxrk %r15, %r0, %r0
+0xb9 0x77 0x00 0xf0
+
+# CHECK: nxrk %r7, %r8, %r9
+0xb9 0x77 0x90 0x78
+
+# CHECK: ocgrk %r0, %r0, %r0
+0xb9 0x65 0x00 0x00
+
+# CHECK: ocgrk %r0, %r0, %r15
+0xb9 0x65 0xf0 0x00
+
+# CHECK: ocgrk %r0, %r15, %r0
+0xb9 0x65 0x00 0x0f
+
+# CHECK: ocgrk %r15, %r0, %r0
+0xb9 0x65 0x00 0xf0
+
+# CHECK: ocgrk %r7, %r8, %r9
+0xb9 0x65 0x90 0x78
+
+# CHECK: ocrk %r0, %r0, %r0
+0xb9 0x75 0x00 0x00
+
+# CHECK: ocrk %r0, %r0, %r15
+0xb9 0x75 0xf0 0x00
+
+# CHECK: ocrk %r0, %r15, %r0
+0xb9 0x75 0x00 0x0f
+
+# CHECK: ocrk %r15, %r0, %r0
+0xb9 0x75 0x00 0xf0
+
+# CHECK: ocrk %r7, %r8, %r9
+0xb9 0x75 0x90 0x78
+
+# CHECK: popcnt %r0, %r0
+0xb9 0xe1 0x00 0x00
+
+# CHECK: popcnt %r0, %r15
+0xb9 0xe1 0x00 0x0f
+
+# CHECK: popcnt %r14, %r0
+0xb9 0xe1 0x00 0xe0
+
+# CHECK: popcnt %r6, %r8
+0xb9 0xe1 0x00 0x68
+
+# CHECK: popcnt %r4, %r13, 1
+0xb9 0xe1 0x10 0x4d
+
+# CHECK: popcnt %r4, %r13, 15
+0xb9 0xe1 0xf0 0x4d
+
+# CHECK: selgr %r0, %r0, %r0, 0
+0xb9 0xe3 0x00 0x00
+
+# CHECK: selgr %r0, %r0, %r0, 15
+0xb9 0xe3 0x0f 0x00
+
+# CHECK: selgr %r0, %r0, %r15, 0
+0xb9 0xe3 0xf0 0x00
+
+# CHECK: selgr %r0, %r15, %r0, 0
+0xb9 0xe3 0x00 0x0f
+
+# CHECK: selgr %r15, %r0, %r0, 0
+0xb9 0xe3 0x00 0xf0
+
+# CHECK: selgro %r1, %r2, %r3
+0xb9 0xe3 0x31 0x12
+
+# CHECK: selgrh %r1, %r2, %r3
+0xb9 0xe3 0x32 0x12
+
+# CHECK: selgrnle %r1, %r2, %r3
+0xb9 0xe3 0x33 0x12
+
+# CHECK: selgrl %r1, %r2, %r3
+0xb9 0xe3 0x34 0x12
+
+# CHECK: selgrnhe %r1, %r2, %r3
+0xb9 0xe3 0x35 0x12
+
+# CHECK: selgrlh %r1, %r2, %r3
+0xb9 0xe3 0x36 0x12
+
+# CHECK: selgrne %r1, %r2, %r3
+0xb9 0xe3 0x37 0x12
+
+# CHECK: selgre %r1, %r2, %r3
+0xb9 0xe3 0x38 0x12
+
+# CHECK: selgrnlh %r1, %r2, %r3
+0xb9 0xe3 0x39 0x12
+
+# CHECK: selgrhe %r1, %r2, %r3
+0xb9 0xe3 0x3a 0x12
+
+# CHECK: selgrnl %r1, %r2, %r3
+0xb9 0xe3 0x3b 0x12
+
+# CHECK: selgrle %r1, %r2, %r3
+0xb9 0xe3 0x3c 0x12
+
+# CHECK: selgrnh %r1, %r2, %r3
+0xb9 0xe3 0x3d 0x12
+
+# CHECK: selgrno %r1, %r2, %r3
+0xb9 0xe3 0x3e 0x12
+
+# CHECK: selfhr %r0, %r0, %r0, 0
+0xb9 0xc0 0x00 0x00
+
+# CHECK: selfhr %r0, %r0, %r0, 15
+0xb9 0xc0 0x0f 0x00
+
+# CHECK: selfhr %r0, %r0, %r15, 0
+0xb9 0xc0 0xf0 0x00
+
+# CHECK: selfhr %r0, %r15, %r0, 0
+0xb9 0xc0 0x00 0x0f
+
+# CHECK: selfhr %r15, %r0, %r0, 0
+0xb9 0xc0 0x00 0xf0
+
+# CHECK: selfhro %r1, %r2, %r3
+0xb9 0xc0 0x31 0x12
+
+# CHECK: selfhrh %r1, %r2, %r3
+0xb9 0xc0 0x32 0x12
+
+# CHECK: selfhrnle %r1, %r2, %r3
+0xb9 0xc0 0x33 0x12
+
+# CHECK: selfhrl %r1, %r2, %r3
+0xb9 0xc0 0x34 0x12
+
+# CHECK: selfhrnhe %r1, %r2, %r3
+0xb9 0xc0 0x35 0x12
+
+# CHECK: selfhrlh %r1, %r2, %r3
+0xb9 0xc0 0x36 0x12
+
+# CHECK: selfhrne %r1, %r2, %r3
+0xb9 0xc0 0x37 0x12
+
+# CHECK: selfhre %r1, %r2, %r3
+0xb9 0xc0 0x38 0x12
+
+# CHECK: selfhrnlh %r1, %r2, %r3
+0xb9 0xc0 0x39 0x12
+
+# CHECK: selfhrhe %r1, %r2, %r3
+0xb9 0xc0 0x3a 0x12
+
+# CHECK: selfhrnl %r1, %r2, %r3
+0xb9 0xc0 0x3b 0x12
+
+# CHECK: selfhrle %r1, %r2, %r3
+0xb9 0xc0 0x3c 0x12
+
+# CHECK: selfhrnh %r1, %r2, %r3
+0xb9 0xc0 0x3d 0x12
+
+# CHECK: selfhrno %r1, %r2, %r3
+0xb9 0xc0 0x3e 0x12
+
+# CHECK: selr %r0, %r0, %r0, 0
+0xb9 0xf0 0x00 0x00
+
+# CHECK: selr %r0, %r0, %r0, 15
+0xb9 0xf0 0x0f 0x00
+
+# CHECK: selr %r0, %r0, %r15, 0
+0xb9 0xf0 0xf0 0x00
+
+# CHECK: selr %r0, %r15, %r0, 0
+0xb9 0xf0 0x00 0x0f
+
+# CHECK: selr %r15, %r0, %r0, 0
+0xb9 0xf0 0x00 0xf0
+
+# CHECK: selro %r1, %r2, %r3
+0xb9 0xf0 0x31 0x12
+
+# CHECK: selrh %r1, %r2, %r3
+0xb9 0xf0 0x32 0x12
+
+# CHECK: selrnle %r1, %r2, %r3
+0xb9 0xf0 0x33 0x12
+
+# CHECK: selrl %r1, %r2, %r3
+0xb9 0xf0 0x34 0x12
+
+# CHECK: selrnhe %r1, %r2, %r3
+0xb9 0xf0 0x35 0x12
+
+# CHECK: selrlh %r1, %r2, %r3
+0xb9 0xf0 0x36 0x12
+
+# CHECK: selrne %r1, %r2, %r3
+0xb9 0xf0 0x37 0x12
+
+# CHECK: selre %r1, %r2, %r3
+0xb9 0xf0 0x38 0x12
+
+# CHECK: selrnlh %r1, %r2, %r3
+0xb9 0xf0 0x39 0x12
+
+# CHECK: selrhe %r1, %r2, %r3
+0xb9 0xf0 0x3a 0x12
+
+# CHECK: selrnl %r1, %r2, %r3
+0xb9 0xf0 0x3b 0x12
+
+# CHECK: selrle %r1, %r2, %r3
+0xb9 0xf0 0x3c 0x12
+
+# CHECK: selrnh %r1, %r2, %r3
+0xb9 0xf0 0x3d 0x12
+
+# CHECK: selrno %r1, %r2, %r3
+0xb9 0xf0 0x3e 0x12
+
+# CHECK: sortl %r2, %r2
+0xb9 0x38 0x00 0x22
+
+# CHECK: sortl %r2, %r14
+0xb9 0x38 0x00 0x2e
+
+# CHECK: sortl %r14, %r2
+0xb9 0x38 0x00 0xe2
+
+# CHECK: sortl %r6, %r10
+0xb9 0x38 0x00 0x6a
+
+# CHECK: vcefb %v0, %v0, 0, 0
+0xe7 0x00 0x00 0x00 0x20 0xc3
+
+# CHECK: vcefb %v0, %v0, 0, 15
+0xe7 0x00 0x00 0xf0 0x20 0xc3
+
+# CHECK: vcefb %v0, %v0, 4, 0
+0xe7 0x00 0x00 0x04 0x20 0xc3
+
+# CHECK: vcefb %v0, %v31, 0, 0
+0xe7 0x0f 0x00 0x00 0x24 0xc3
+
+# CHECK: vcefb %v31, %v0, 0, 0
+0xe7 0xf0 0x00 0x00 0x28 0xc3
+
+# CHECK: vcefb %v14, %v17, 4, 10
+0xe7 0xe1 0x00 0xa4 0x24 0xc3
+
+# CHECK: vcelfb %v0, %v0, 0, 0
+0xe7 0x00 0x00 0x00 0x20 0xc1
+
+# CHECK: vcelfb %v0, %v0, 0, 15
+0xe7 0x00 0x00 0xf0 0x20 0xc1
+
+# CHECK: vcelfb %v0, %v0, 4, 0
+0xe7 0x00 0x00 0x04 0x20 0xc1
+
+# CHECK: vcelfb %v0, %v31, 0, 0
+0xe7 0x0f 0x00 0x00 0x24 0xc1
+
+# CHECK: vcelfb %v31, %v0, 0, 0
+0xe7 0xf0 0x00 0x00 0x28 0xc1
+
+# CHECK: vcelfb %v14, %v17, 4, 10
+0xe7 0xe1 0x00 0xa4 0x24 0xc1
+
+# CHECK: vcfeb %v0, %v0, 0, 0
+0xe7 0x00 0x00 0x00 0x20 0xc2
+
+# CHECK: vcfeb %v0, %v0, 0, 15
+0xe7 0x00 0x00 0xf0 0x20 0xc2
+
+# CHECK: vcfeb %v0, %v0, 4, 0
+0xe7 0x00 0x00 0x04 0x20 0xc2
+
+# CHECK: vcfeb %v0, %v31, 0, 0
+0xe7 0x0f 0x00 0x00 0x24 0xc2
+
+# CHECK: vcfeb %v31, %v0, 0, 0
+0xe7 0xf0 0x00 0x00 0x28 0xc2
+
+# CHECK: vcfeb %v14, %v17, 4, 10
+0xe7 0xe1 0x00 0xa4 0x24 0xc2
+
+# CHECK: vclfeb %v0, %v0, 0, 0
+0xe7 0x00 0x00 0x00 0x20 0xc0
+
+# CHECK: vclfeb %v0, %v0, 0, 15
+0xe7 0x00 0x00 0xf0 0x20 0xc0
+
+# CHECK: vclfeb %v0, %v0, 4, 0
+0xe7 0x00 0x00 0x04 0x20 0xc0
+
+# CHECK: vclfeb %v0, %v31, 0, 0
+0xe7 0x0f 0x00 0x00 0x24 0xc0
+
+# CHECK: vclfeb %v31, %v0, 0, 0
+0xe7 0xf0 0x00 0x00 0x28 0xc0
+
+# CHECK: vclfeb %v14, %v17, 4, 10
+0xe7 0xe1 0x00 0xa4 0x24 0xc0
+
+# CHECK: vcvb %r0, %v0, 0, 15
+0xe6 0x00 0x00 0x0f 0x00 0x50
+
+# CHECK: vcvb %r3, %v18, 4, 6
+0xe6 0x32 0x00 0x46 0x04 0x50
+
+# CHECK: vcvbg %r0, %v0, 0, 15
+0xe6 0x00 0x00 0x0f 0x00 0x52
+
+# CHECK: vcvbg %r3, %v18, 4, 6
+0xe6 0x32 0x00 0x46 0x04 0x52
+
+# CHECK: vlbr %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x06
+
+# CHECK: vlbr %v0, 0, 15
+0xe6 0x00 0x00 0x00 0xf0 0x06
+
+# CHECK: vlbr %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x06
+
+# CHECK: vlbr %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x06
+
+# CHECK: vlbr %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x06
+
+# CHECK: vlbr %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x06
+
+# CHECK: vlbr %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x06
+
+# CHECK: vlbr %v18, 1383(%r3,%r4), 11
+0xe6 0x23 0x45 0x67 0xb8 0x06
+
+# CHECK: vlbrf %v0, 0
+0xe6 0x00 0x00 0x00 0x20 0x06
+
+# CHECK: vlbrf %v0, 4095
+0xe6 0x00 0x0f 0xff 0x20 0x06
+
+# CHECK: vlbrf %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x20 0x06
+
+# CHECK: vlbrf %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x20 0x06
+
+# CHECK: vlbrf %v15, 0
+0xe6 0xf0 0x00 0x00 0x20 0x06
+
+# CHECK: vlbrf %v31, 0
+0xe6 0xf0 0x00 0x00 0x28 0x06
+
+# CHECK: vlbrf %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x28 0x06
+
+# CHECK: vlbrg %v0, 0
+0xe6 0x00 0x00 0x00 0x30 0x06
+
+# CHECK: vlbrg %v0, 4095
+0xe6 0x00 0x0f 0xff 0x30 0x06
+
+# CHECK: vlbrg %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x30 0x06
+
+# CHECK: vlbrg %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x30 0x06
+
+# CHECK: vlbrg %v15, 0
+0xe6 0xf0 0x00 0x00 0x30 0x06
+
+# CHECK: vlbrg %v31, 0
+0xe6 0xf0 0x00 0x00 0x38 0x06
+
+# CHECK: vlbrg %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x38 0x06
+
+# CHECK: vlbrh %v0, 0
+0xe6 0x00 0x00 0x00 0x10 0x06
+
+# CHECK: vlbrh %v0, 4095
+0xe6 0x00 0x0f 0xff 0x10 0x06
+
+# CHECK: vlbrh %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x10 0x06
+
+# CHECK: vlbrh %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x10 0x06
+
+# CHECK: vlbrh %v15, 0
+0xe6 0xf0 0x00 0x00 0x10 0x06
+
+# CHECK: vlbrh %v31, 0
+0xe6 0xf0 0x00 0x00 0x18 0x06
+
+# CHECK: vlbrh %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x18 0x06
+
+# CHECK: vlbrq %v0, 0
+0xe6 0x00 0x00 0x00 0x40 0x06
+
+# CHECK: vlbrq %v0, 4095
+0xe6 0x00 0x0f 0xff 0x40 0x06
+
+# CHECK: vlbrq %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x40 0x06
+
+# CHECK: vlbrq %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x40 0x06
+
+# CHECK: vlbrq %v15, 0
+0xe6 0xf0 0x00 0x00 0x40 0x06
+
+# CHECK: vlbrq %v31, 0
+0xe6 0xf0 0x00 0x00 0x48 0x06
+
+# CHECK: vlbrq %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x48 0x06
+
+# CHECK: vlbrrep %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x05
+
+# CHECK: vlbrrep %v0, 0, 15
+0xe6 0x00 0x00 0x00 0xf0 0x05
+
+# CHECK: vlbrrep %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x05
+
+# CHECK: vlbrrep %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x05
+
+# CHECK: vlbrrep %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x05
+
+# CHECK: vlbrrep %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x05
+
+# CHECK: vlbrrep %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x05
+
+# CHECK: vlbrrep %v18, 1383(%r3,%r4), 11
+0xe6 0x23 0x45 0x67 0xb8 0x05
+
+# CHECK: vlbrrepf %v0, 0
+0xe6 0x00 0x00 0x00 0x20 0x05
+
+# CHECK: vlbrrepf %v0, 4095
+0xe6 0x00 0x0f 0xff 0x20 0x05
+
+# CHECK: vlbrrepf %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x20 0x05
+
+# CHECK: vlbrrepf %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x20 0x05
+
+# CHECK: vlbrrepf %v15, 0
+0xe6 0xf0 0x00 0x00 0x20 0x05
+
+# CHECK: vlbrrepf %v31, 0
+0xe6 0xf0 0x00 0x00 0x28 0x05
+
+# CHECK: vlbrrepf %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x28 0x05
+
+# CHECK: vlbrrepg %v0, 0
+0xe6 0x00 0x00 0x00 0x30 0x05
+
+# CHECK: vlbrrepg %v0, 4095
+0xe6 0x00 0x0f 0xff 0x30 0x05
+
+# CHECK: vlbrrepg %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x30 0x05
+
+# CHECK: vlbrrepg %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x30 0x05
+
+# CHECK: vlbrrepg %v15, 0
+0xe6 0xf0 0x00 0x00 0x30 0x05
+
+# CHECK: vlbrrepg %v31, 0
+0xe6 0xf0 0x00 0x00 0x38 0x05
+
+# CHECK: vlbrrepg %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x38 0x05
+
+# CHECK: vlbrreph %v0, 0
+0xe6 0x00 0x00 0x00 0x10 0x05
+
+# CHECK: vlbrreph %v0, 4095
+0xe6 0x00 0x0f 0xff 0x10 0x05
+
+# CHECK: vlbrreph %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x10 0x05
+
+# CHECK: vlbrreph %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x10 0x05
+
+# CHECK: vlbrreph %v15, 0
+0xe6 0xf0 0x00 0x00 0x10 0x05
+
+# CHECK: vlbrreph %v31, 0
+0xe6 0xf0 0x00 0x00 0x18 0x05
+
+# CHECK: vlbrreph %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x18 0x05
+
+# CHECK: vlebrf %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x03
+
+# CHECK: vlebrf %v0, 0, 3
+0xe6 0x00 0x00 0x00 0x30 0x03
+
+# CHECK: vlebrf %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x03
+
+# CHECK: vlebrf %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x03
+
+# CHECK: vlebrf %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x03
+
+# CHECK: vlebrf %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x03
+
+# CHECK: vlebrf %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x03
+
+# CHECK: vlebrf %v18, 1383(%r3,%r4), 2
+0xe6 0x23 0x45 0x67 0x28 0x03
+
+# CHECK: vlebrg %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x02
+
+# CHECK: vlebrg %v0, 0, 1
+0xe6 0x00 0x00 0x00 0x10 0x02
+
+# CHECK: vlebrg %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x02
+
+# CHECK: vlebrg %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x02
+
+# CHECK: vlebrg %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x02
+
+# CHECK: vlebrg %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x02
+
+# CHECK: vlebrg %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x02
+
+# CHECK: vlebrg %v18, 1383(%r3,%r4), 1
+0xe6 0x23 0x45 0x67 0x18 0x02
+
+# CHECK: vlebrh %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x01
+
+# CHECK: vlebrh %v0, 0, 7
+0xe6 0x00 0x00 0x00 0x70 0x01
+
+# CHECK: vlebrh %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x01
+
+# CHECK: vlebrh %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x01
+
+# CHECK: vlebrh %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x01
+
+# CHECK: vlebrh %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x01
+
+# CHECK: vlebrh %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x01
+
+# CHECK: vlebrh %v18, 1383(%r3,%r4), 4
+0xe6 0x23 0x45 0x67 0x48 0x01
+
+# CHECK: vler %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x07
+
+# CHECK: vler %v0, 0, 15
+0xe6 0x00 0x00 0x00 0xf0 0x07
+
+# CHECK: vler %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x07
+
+# CHECK: vler %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x07
+
+# CHECK: vler %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x07
+
+# CHECK: vler %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x07
+
+# CHECK: vler %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x07
+
+# CHECK: vler %v18, 1383(%r3,%r4), 11
+0xe6 0x23 0x45 0x67 0xb8 0x07
+
+# CHECK: vlerf %v0, 0
+0xe6 0x00 0x00 0x00 0x20 0x07
+
+# CHECK: vlerf %v0, 4095
+0xe6 0x00 0x0f 0xff 0x20 0x07
+
+# CHECK: vlerf %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x20 0x07
+
+# CHECK: vlerf %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x20 0x07
+
+# CHECK: vlerf %v15, 0
+0xe6 0xf0 0x00 0x00 0x20 0x07
+
+# CHECK: vlerf %v31, 0
+0xe6 0xf0 0x00 0x00 0x28 0x07
+
+# CHECK: vlerf %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x28 0x07
+
+# CHECK: vlerg %v0, 0
+0xe6 0x00 0x00 0x00 0x30 0x07
+
+# CHECK: vlerg %v0, 4095
+0xe6 0x00 0x0f 0xff 0x30 0x07
+
+# CHECK: vlerg %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x30 0x07
+
+# CHECK: vlerg %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x30 0x07
+
+# CHECK: vlerg %v15, 0
+0xe6 0xf0 0x00 0x00 0x30 0x07
+
+# CHECK: vlerg %v31, 0
+0xe6 0xf0 0x00 0x00 0x38 0x07
+
+# CHECK: vlerg %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x38 0x07
+
+# CHECK: vlerh %v0, 0
+0xe6 0x00 0x00 0x00 0x10 0x07
+
+# CHECK: vlerh %v0, 4095
+0xe6 0x00 0x0f 0xff 0x10 0x07
+
+# CHECK: vlerh %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x10 0x07
+
+# CHECK: vlerh %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x10 0x07
+
+# CHECK: vlerh %v15, 0
+0xe6 0xf0 0x00 0x00 0x10 0x07
+
+# CHECK: vlerh %v31, 0
+0xe6 0xf0 0x00 0x00 0x18 0x07
+
+# CHECK: vlerh %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x18 0x07
+
+# CHECK: vllebrz %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x04
+
+# CHECK: vllebrz %v0, 0, 15
+0xe6 0x00 0x00 0x00 0xf0 0x04
+
+# CHECK: vllebrz %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x04
+
+# CHECK: vllebrz %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x04
+
+# CHECK: vllebrz %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x04
+
+# CHECK: vllebrz %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x04
+
+# CHECK: vllebrz %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x04
+
+# CHECK: vllebrz %v18, 1383(%r3,%r4), 11
+0xe6 0x23 0x45 0x67 0xb8 0x04
+
+# CHECK: vllebrze %v0, 0
+0xe6 0x00 0x00 0x00 0x60 0x04
+
+# CHECK: vllebrze %v0, 4095
+0xe6 0x00 0x0f 0xff 0x60 0x04
+
+# CHECK: vllebrze %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x60 0x04
+
+# CHECK: vllebrze %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x60 0x04
+
+# CHECK: vllebrze %v15, 0
+0xe6 0xf0 0x00 0x00 0x60 0x04
+
+# CHECK: vllebrze %v31, 0
+0xe6 0xf0 0x00 0x00 0x68 0x04
+
+# CHECK: vllebrze %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x68 0x04
+
+# CHECK: vllebrzf %v0, 0
+0xe6 0x00 0x00 0x00 0x20 0x04
+
+# CHECK: vllebrzf %v0, 4095
+0xe6 0x00 0x0f 0xff 0x20 0x04
+
+# CHECK: vllebrzf %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x20 0x04
+
+# CHECK: vllebrzf %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x20 0x04
+
+# CHECK: vllebrzf %v15, 0
+0xe6 0xf0 0x00 0x00 0x20 0x04
+
+# CHECK: vllebrzf %v31, 0
+0xe6 0xf0 0x00 0x00 0x28 0x04
+
+# CHECK: vllebrzf %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x28 0x04
+
+# CHECK: vllebrzg %v0, 0
+0xe6 0x00 0x00 0x00 0x30 0x04
+
+# CHECK: vllebrzg %v0, 4095
+0xe6 0x00 0x0f 0xff 0x30 0x04
+
+# CHECK: vllebrzg %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x30 0x04
+
+# CHECK: vllebrzg %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x30 0x04
+
+# CHECK: vllebrzg %v15, 0
+0xe6 0xf0 0x00 0x00 0x30 0x04
+
+# CHECK: vllebrzg %v31, 0
+0xe6 0xf0 0x00 0x00 0x38 0x04
+
+# CHECK: vllebrzg %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x38 0x04
+
+# CHECK: vllebrzh %v0, 0
+0xe6 0x00 0x00 0x00 0x10 0x04
+
+# CHECK: vllebrzh %v0, 4095
+0xe6 0x00 0x0f 0xff 0x10 0x04
+
+# CHECK: vllebrzh %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x10 0x04
+
+# CHECK: vllebrzh %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x10 0x04
+
+# CHECK: vllebrzh %v15, 0
+0xe6 0xf0 0x00 0x00 0x10 0x04
+
+# CHECK: vllebrzh %v31, 0
+0xe6 0xf0 0x00 0x00 0x18 0x04
+
+# CHECK: vllebrzh %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x18 0x04
+
+# CHECK: vsld %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x00 0x86
+
+# CHECK: vsld %v0, %v0, %v0, 255
+0xe7 0x00 0x00 0xff 0x00 0x86
+
+# CHECK: vsld %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x02 0x86
+
+# CHECK: vsld %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x04 0x86
+
+# CHECK: vsld %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x08 0x86
+
+# CHECK: vsld %v13, %v17, %v21, 121
+0xe7 0xd1 0x50 0x79 0x06 0x86
+
+# CHECK: vsrd %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x00 0x87
+
+# CHECK: vsrd %v0, %v0, %v0, 255
+0xe7 0x00 0x00 0xff 0x00 0x87
+
+# CHECK: vsrd %v0, %v0, %v31, 0
+0xe7 0x00 0xf0 0x00 0x02 0x87
+
+# CHECK: vsrd %v0, %v31, %v0, 0
+0xe7 0x0f 0x00 0x00 0x04 0x87
+
+# CHECK: vsrd %v31, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x08 0x87
+
+# CHECK: vsrd %v13, %v17, %v21, 121
+0xe7 0xd1 0x50 0x79 0x06 0x87
+
+# CHECK: vstbr %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x0e
+
+# CHECK: vstbr %v0, 0, 15
+0xe6 0x00 0x00 0x00 0xf0 0x0e
+
+# CHECK: vstbr %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x0e
+
+# CHECK: vstbr %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x0e
+
+# CHECK: vstbr %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x0e
+
+# CHECK: vstbr %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x0e
+
+# CHECK: vstbr %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x0e
+
+# CHECK: vstbr %v18, 1383(%r3,%r4), 11
+0xe6 0x23 0x45 0x67 0xb8 0x0e
+
+# CHECK: vstbrf %v0, 0
+0xe6 0x00 0x00 0x00 0x20 0x0e
+
+# CHECK: vstbrf %v0, 4095
+0xe6 0x00 0x0f 0xff 0x20 0x0e
+
+# CHECK: vstbrf %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x20 0x0e
+
+# CHECK: vstbrf %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x20 0x0e
+
+# CHECK: vstbrf %v15, 0
+0xe6 0xf0 0x00 0x00 0x20 0x0e
+
+# CHECK: vstbrf %v31, 0
+0xe6 0xf0 0x00 0x00 0x28 0x0e
+
+# CHECK: vstbrf %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x28 0x0e
+
+# CHECK: vstbrg %v0, 0
+0xe6 0x00 0x00 0x00 0x30 0x0e
+
+# CHECK: vstbrg %v0, 4095
+0xe6 0x00 0x0f 0xff 0x30 0x0e
+
+# CHECK: vstbrg %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x30 0x0e
+
+# CHECK: vstbrg %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x30 0x0e
+
+# CHECK: vstbrg %v15, 0
+0xe6 0xf0 0x00 0x00 0x30 0x0e
+
+# CHECK: vstbrg %v31, 0
+0xe6 0xf0 0x00 0x00 0x38 0x0e
+
+# CHECK: vstbrg %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x38 0x0e
+
+# CHECK: vstbrh %v0, 0
+0xe6 0x00 0x00 0x00 0x10 0x0e
+
+# CHECK: vstbrh %v0, 4095
+0xe6 0x00 0x0f 0xff 0x10 0x0e
+
+# CHECK: vstbrh %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x10 0x0e
+
+# CHECK: vstbrh %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x10 0x0e
+
+# CHECK: vstbrh %v15, 0
+0xe6 0xf0 0x00 0x00 0x10 0x0e
+
+# CHECK: vstbrh %v31, 0
+0xe6 0xf0 0x00 0x00 0x18 0x0e
+
+# CHECK: vstbrh %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x18 0x0e
+
+# CHECK: vstbrq %v0, 0
+0xe6 0x00 0x00 0x00 0x40 0x0e
+
+# CHECK: vstbrq %v0, 4095
+0xe6 0x00 0x0f 0xff 0x40 0x0e
+
+# CHECK: vstbrq %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x40 0x0e
+
+# CHECK: vstbrq %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x40 0x0e
+
+# CHECK: vstbrq %v15, 0
+0xe6 0xf0 0x00 0x00 0x40 0x0e
+
+# CHECK: vstbrq %v31, 0
+0xe6 0xf0 0x00 0x00 0x48 0x0e
+
+# CHECK: vstbrq %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x48 0x0e
+
+# CHECK: vstebrf %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x0b
+
+# CHECK: vstebrf %v0, 0, 3
+0xe6 0x00 0x00 0x00 0x30 0x0b
+
+# CHECK: vstebrf %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x0b
+
+# CHECK: vstebrf %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x0b
+
+# CHECK: vstebrf %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x0b
+
+# CHECK: vstebrf %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x0b
+
+# CHECK: vstebrf %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x0b
+
+# CHECK: vstebrf %v18, 1383(%r3,%r4), 2
+0xe6 0x23 0x45 0x67 0x28 0x0b
+
+# CHECK: vstebrg %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x0a
+
+# CHECK: vstebrg %v0, 0, 1
+0xe6 0x00 0x00 0x00 0x10 0x0a
+
+# CHECK: vstebrg %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x0a
+
+# CHECK: vstebrg %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x0a
+
+# CHECK: vstebrg %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x0a
+
+# CHECK: vstebrg %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x0a
+
+# CHECK: vstebrg %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x0a
+
+# CHECK: vstebrg %v18, 1383(%r3,%r4), 1
+0xe6 0x23 0x45 0x67 0x18 0x0a
+
+# CHECK: vstebrh %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x09
+
+# CHECK: vstebrh %v0, 0, 7
+0xe6 0x00 0x00 0x00 0x70 0x09
+
+# CHECK: vstebrh %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x09
+
+# CHECK: vstebrh %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x09
+
+# CHECK: vstebrh %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x09
+
+# CHECK: vstebrh %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x09
+
+# CHECK: vstebrh %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x09
+
+# CHECK: vstebrh %v18, 1383(%r3,%r4), 4
+0xe6 0x23 0x45 0x67 0x48 0x09
+
+# CHECK: vster %v0, 0, 0
+0xe6 0x00 0x00 0x00 0x00 0x0f
+
+# CHECK: vster %v0, 0, 15
+0xe6 0x00 0x00 0x00 0xf0 0x0f
+
+# CHECK: vster %v0, 4095, 0
+0xe6 0x00 0x0f 0xff 0x00 0x0f
+
+# CHECK: vster %v0, 0(%r15), 0
+0xe6 0x00 0xf0 0x00 0x00 0x0f
+
+# CHECK: vster %v0, 0(%r15,%r1), 0
+0xe6 0x0f 0x10 0x00 0x00 0x0f
+
+# CHECK: vster %v15, 0, 0
+0xe6 0xf0 0x00 0x00 0x00 0x0f
+
+# CHECK: vster %v31, 0, 0
+0xe6 0xf0 0x00 0x00 0x08 0x0f
+
+# CHECK: vster %v18, 1383(%r3,%r4), 11
+0xe6 0x23 0x45 0x67 0xb8 0x0f
+
+# CHECK: vsterf %v0, 0
+0xe6 0x00 0x00 0x00 0x20 0x0f
+
+# CHECK: vsterf %v0, 4095
+0xe6 0x00 0x0f 0xff 0x20 0x0f
+
+# CHECK: vsterf %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x20 0x0f
+
+# CHECK: vsterf %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x20 0x0f
+
+# CHECK: vsterf %v15, 0
+0xe6 0xf0 0x00 0x00 0x20 0x0f
+
+# CHECK: vsterf %v31, 0
+0xe6 0xf0 0x00 0x00 0x28 0x0f
+
+# CHECK: vsterf %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x28 0x0f
+
+# CHECK: vsterg %v0, 0
+0xe6 0x00 0x00 0x00 0x30 0x0f
+
+# CHECK: vsterg %v0, 4095
+0xe6 0x00 0x0f 0xff 0x30 0x0f
+
+# CHECK: vsterg %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x30 0x0f
+
+# CHECK: vsterg %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x30 0x0f
+
+# CHECK: vsterg %v15, 0
+0xe6 0xf0 0x00 0x00 0x30 0x0f
+
+# CHECK: vsterg %v31, 0
+0xe6 0xf0 0x00 0x00 0x38 0x0f
+
+# CHECK: vsterg %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x38 0x0f
+
+# CHECK: vsterh %v0, 0
+0xe6 0x00 0x00 0x00 0x10 0x0f
+
+# CHECK: vsterh %v0, 4095
+0xe6 0x00 0x0f 0xff 0x10 0x0f
+
+# CHECK: vsterh %v0, 0(%r15)
+0xe6 0x00 0xf0 0x00 0x10 0x0f
+
+# CHECK: vsterh %v0, 0(%r15,%r1)
+0xe6 0x0f 0x10 0x00 0x10 0x0f
+
+# CHECK: vsterh %v15, 0
+0xe6 0xf0 0x00 0x00 0x10 0x0f
+
+# CHECK: vsterh %v31, 0
+0xe6 0xf0 0x00 0x00 0x18 0x0f
+
+# CHECK: vsterh %v18, 1383(%r3,%r4)
+0xe6 0x23 0x45 0x67 0x18 0x0f
+
+# CHECK: vstrs %v0, %v0, %v0, %v0, 11, 0
+0xe7 0x00 0x0b 0x00 0x00 0x8b
+
+# CHECK: vstrs %v0, %v0, %v0, %v0, 11, 12
+0xe7 0x00 0x0b 0xc0 0x00 0x8b
+
+# CHECK: vstrs %v18, %v3, %v20, %v5, 11, 0
+0xe7 0x23 0x4b 0x00 0x5a 0x8b
+
+# CHECK: vstrs %v31, %v31, %v31, %v31, 11, 4
+0xe7 0xff 0xfb 0x40 0xff 0x8b
+
+# CHECK: vstrsb %v0, %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x00 0x8b
+
+# CHECK: vstrsb %v0, %v0, %v0, %v0, 0
+0xe7 0x00 0x00 0x00 0x00 0x8b
+
+# CHECK: vstrsb %v0, %v0, %v0, %v0, 12
+0xe7 0x00 0x00 0xc0 0x00 0x8b
+
+# CHECK: vstrsb %v0, %v0, %v0, %v15, 0
+0xe7 0x00 0x00 0x00 0xf0 0x8b
+
+# CHECK: vstrsb %v0, %v0, %v0, %v31, 0
+0xe7 0x00 0x00 0x00 0xf1 0x8b
+
+# CHECK: vstrsb %v0, %v0, %v15, %v0, 0
+0xe7 0x00 0xf0 0x00 0x00 0x8b
+
+# CHECK: vstrsb %v0, %v0, %v31, %v0, 0
+0xe7 0x00 0xf0 0x00 0x02 0x8b
+
+# CHECK: vstrsb %v0, %v15, %v0, %v0, 0
+0xe7 0x0f 0x00 0x00 0x00 0x8b
+
+# CHECK: vstrsb %v0, %v31, %v0, %v0, 0
+0xe7 0x0f 0x00 0x00 0x04 0x8b
+
+# CHECK: vstrsb %v15, %v0, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x00 0x8b
+
+# CHECK: vstrsb %v31, %v0, %v0, %v0, 0
+0xe7 0xf0 0x00 0x00 0x08 0x8b
+
+# CHECK: vstrsb %v18, %v3, %v20, %v5, 4
+0xe7 0x23 0x40 0x40 0x5a 0x8b
+
+# CHECK: vstrsb %v18, %v3, %v20, %v5, 12
+0xe7 0x23 0x40 0xc0 0x5a 0x8b
+
+# CHECK: vstrszb %v18, %v3, %v20, %v5
+0xe7 0x23 0x40 0x20 0x5a 0x8b
+
+# CHECK: vstrsf %v0, %v0, %v0, %v0, 0
+0xe7 0x00 0x02 0x00 0x00 0x8b
+
+# CHECK: vstrsf %v0, %v0, %v0, %v0, 0
+0xe7 0x00 0x02 0x00 0x00 0x8b
+
+# CHECK: vstrsf %v0, %v0, %v0, %v0, 12
+0xe7 0x00 0x02 0xc0 0x00 0x8b
+
+# CHECK: vstrsf %v0, %v0, %v0, %v15, 0
+0xe7 0x00 0x02 0x00 0xf0 0x8b
+
+# CHECK: vstrsf %v0, %v0, %v0, %v31, 0
+0xe7 0x00 0x02 0x00 0xf1 0x8b
+
+# CHECK: vstrsf %v0, %v0, %v15, %v0, 0
+0xe7 0x00 0xf2 0x00 0x00 0x8b
+
+# CHECK: vstrsf %v0, %v0, %v31, %v0, 0
+0xe7 0x00 0xf2 0x00 0x02 0x8b
+
+# CHECK: vstrsf %v0, %v15, %v0, %v0, 0
+0xe7 0x0f 0x02 0x00 0x00 0x8b
+
+# CHECK: vstrsf %v0, %v31, %v0, %v0, 0
+0xe7 0x0f 0x02 0x00 0x04 0x8b
+
+# CHECK: vstrsf %v15, %v0, %v0, %v0, 0
+0xe7 0xf0 0x02 0x00 0x00 0x8b
+
+# CHECK: vstrsf %v31, %v0, %v0, %v0, 0
+0xe7 0xf0 0x02 0x00 0x08 0x8b
+
+# CHECK: vstrsf %v18, %v3, %v20, %v5, 4
+0xe7 0x23 0x42 0x40 0x5a 0x8b
+
+# CHECK: vstrsf %v18, %v3, %v20, %v5, 12
+0xe7 0x23 0x42 0xc0 0x5a 0x8b
+
+# CHECK: vstrszf %v18, %v3, %v20, %v5
+0xe7 0x23 0x42 0x20 0x5a 0x8b
+
+# CHECK: vstrsh %v0, %v0, %v0, %v0, 0
+0xe7 0x00 0x01 0x00 0x00 0x8b
+
+# CHECK: vstrsh %v0, %v0, %v0, %v0, 0
+0xe7 0x00 0x01 0x00 0x00 0x8b
+
+# CHECK: vstrsh %v0, %v0, %v0, %v0, 12
+0xe7 0x00 0x01 0xc0 0x00 0x8b
+
+# CHECK: vstrsh %v0, %v0, %v0, %v15, 0
+0xe7 0x00 0x01 0x00 0xf0 0x8b
+
+# CHECK: vstrsh %v0, %v0, %v0, %v31, 0
+0xe7 0x00 0x01 0x00 0xf1 0x8b
+
+# CHECK: vstrsh %v0, %v0, %v15, %v0, 0
+0xe7 0x00 0xf1 0x00 0x00 0x8b
+
+# CHECK: vstrsh %v0, %v0, %v31, %v0, 0
+0xe7 0x00 0xf1 0x00 0x02 0x8b
+
+# CHECK: vstrsh %v0, %v15, %v0, %v0, 0
+0xe7 0x0f 0x01 0x00 0x00 0x8b
+
+# CHECK: vstrsh %v0, %v31, %v0, %v0, 0
+0xe7 0x0f 0x01 0x00 0x04 0x8b
+
+# CHECK: vstrsh %v15, %v0, %v0, %v0, 0
+0xe7 0xf0 0x01 0x00 0x00 0x8b
+
+# CHECK: vstrsh %v31, %v0, %v0, %v0, 0
+0xe7 0xf0 0x01 0x00 0x08 0x8b
+
+# CHECK: vstrsh %v18, %v3, %v20, %v5, 4
+0xe7 0x23 0x41 0x40 0x5a 0x8b
+
+# CHECK: vstrsh %v18, %v3, %v20, %v5, 12
+0xe7 0x23 0x41 0xc0 0x5a 0x8b
+
+# CHECK: vstrszh %v18, %v3, %v20, %v5
+0xe7 0x23 0x41 0x20 0x5a 0x8b
+
+# CHECK: wcefb %f0, %f0, 0, 0
+0xe7 0x00 0x00 0x08 0x20 0xc3
+
+# CHECK: wcefb %f0, %f0, 0, 0
+0xe7 0x00 0x00 0x08 0x20 0xc3
+
+# CHECK: wcefb %f0, %f0, 0, 15
+0xe7 0x00 0x00 0xf8 0x20 0xc3
+
+# CHECK: wcefb %f0, %f0, 4, 0
+0xe7 0x00 0x00 0x0c 0x20 0xc3
+
+# CHECK: wcefb %f0, %v31, 0, 0
+0xe7 0x0f 0x00 0x08 0x24 0xc3
+
+# CHECK: wcefb %v31, %f0, 0, 0
+0xe7 0xf0 0x00 0x08 0x28 0xc3
+
+# CHECK: wcefb %f14, %v17, 4, 10
+0xe7 0xe1 0x00 0xac 0x24 0xc3
+
+# CHECK: wcelfb %f0, %f0, 0, 0
+0xe7 0x00 0x00 0x08 0x20 0xc1
+
+# CHECK: wcelfb %f0, %f0, 0, 0
+0xe7 0x00 0x00 0x08 0x20 0xc1
+
+# CHECK: wcelfb %f0, %f0, 0, 15
+0xe7 0x00 0x00 0xf8 0x20 0xc1
+
+# CHECK: wcelfb %f0, %f0, 4, 0
+0xe7 0x00 0x00 0x0c 0x20 0xc1
+
+# CHECK: wcelfb %f0, %v31, 0, 0
+0xe7 0x0f 0x00 0x08 0x24 0xc1
+
+# CHECK: wcelfb %v31, %f0, 0, 0
+0xe7 0xf0 0x00 0x08 0x28 0xc1
+
+# CHECK: wcelfb %f14, %v17, 4, 10
+0xe7 0xe1 0x00 0xac 0x24 0xc1
+
+# CHECK: wcfeb %f0, %f0, 0, 0
+0xe7 0x00 0x00 0x08 0x20 0xc2
+
+# CHECK: wcfeb %f0, %f0, 0, 0
+0xe7 0x00 0x00 0x08 0x20 0xc2
+
+# CHECK: wcfeb %f0, %f0, 0, 15
+0xe7 0x00 0x00 0xf8 0x20 0xc2
+
+# CHECK: wcfeb %f0, %f0, 4, 0
+0xe7 0x00 0x00 0x0c 0x20 0xc2
+
+# CHECK: wcfeb %f0, %v31, 0, 0
+0xe7 0x0f 0x00 0x08 0x24 0xc2
+
+# CHECK: wcfeb %v31, %f0, 0, 0
+0xe7 0xf0 0x00 0x08 0x28 0xc2
+
+# CHECK: wcfeb %f14, %v17, 4, 10
+0xe7 0xe1 0x00 0xac 0x24 0xc2
+
+# CHECK: wclfeb %f0, %f0, 0, 0
+0xe7 0x00 0x00 0x08 0x20 0xc0
+
+# CHECK: wclfeb %f0, %f0, 0, 0
+0xe7 0x00 0x00 0x08 0x20 0xc0
+
+# CHECK: wclfeb %f0, %f0, 0, 15
+0xe7 0x00 0x00 0xf8 0x20 0xc0
+
+# CHECK: wclfeb %f0, %f0, 4, 0
+0xe7 0x00 0x00 0x0c 0x20 0xc0
+
+# CHECK: wclfeb %f0, %v31, 0, 0
+0xe7 0x0f 0x00 0x08 0x24 0xc0
+
+# CHECK: wclfeb %v31, %f0, 0, 0
+0xe7 0xf0 0x00 0x08 0x28 0xc0
+
+# CHECK: wclfeb %f14, %v17, 4, 10
+0xe7 0xe1 0x00 0xac 0x24 0xc0
Added: llvm/trunk/test/MC/SystemZ/insn-bad-arch13.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/SystemZ/insn-bad-arch13.s?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/MC/SystemZ/insn-bad-arch13.s (added)
+++ llvm/trunk/test/MC/SystemZ/insn-bad-arch13.s Fri Jul 12 11:13:16 2019
@@ -0,0 +1,881 @@
+# For arch13 only.
+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=arch13 < %s 2> %t
+# RUN: FileCheck < %t %s
+
+#CHECK: error: invalid register pair
+#CHECK: dfltcc %r1, %r2, %r4
+#CHECK: error: invalid register pair
+#CHECK: dfltcc %r2, %r1, %r4
+
+ dfltcc %r1, %r2, %r4
+ dfltcc %r2, %r1, %r4
+
+#CHECK: error: invalid register pair
+#CHECK: kdsa %r0, %r1
+
+ kdsa %r0, %r1
+
+#CHECK: error: invalid operand
+#CHECK: ldrv %f0, -1
+#CHECK: error: invalid operand
+#CHECK: ldrv %f0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: ldrv %f0, 0(%v1,%r2)
+
+ ldrv %f0, -1
+ ldrv %f0, 4096
+ ldrv %f0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: lerv %f0, -1
+#CHECK: error: invalid operand
+#CHECK: lerv %f0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: lerv %f0, 0(%v1,%r2)
+
+ lerv %f0, -1
+ lerv %f0, 4096
+ lerv %f0, 0(%v1,%r2)
+
+#CHECK: error: invalid use of indexed addressing
+#CHECK: mvcrl 160(%r1,%r15),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcrl -1(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcrl 4096(%r1),160(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcrl 0(%r1),-1(%r15)
+#CHECK: error: invalid operand
+#CHECK: mvcrl 0(%r1),4096(%r15)
+
+ mvcrl 160(%r1,%r15),160(%r15)
+ mvcrl -1(%r1),160(%r15)
+ mvcrl 4096(%r1),160(%r15)
+ mvcrl 0(%r1),-1(%r15)
+ mvcrl 0(%r1),4096(%r15)
+
+#CHECK: error: invalid operand
+#CHECK: popcnt %r2, %r4, -1
+#CHECK: error: invalid operand
+#CHECK: popcnt %r2, %r4, 16
+
+ popcnt %r2, %r4, -1
+ popcnt %r2, %r4, 16
+
+#CHECK: error: invalid operand
+#CHECK: selgr %r0, %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: selgr %r0, %r0, %r0, 16
+
+ selgr %r0, %r0, %r0, -1
+ selgr %r0, %r0, %r0, 16
+
+#CHECK: error: invalid operand
+#CHECK: selfhr %r0, %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: selfhr %r0, %r0, %r0, 16
+
+ selfhr %r0, %r0, %r0, -1
+ selfhr %r0, %r0, %r0, 16
+
+#CHECK: error: invalid operand
+#CHECK: selr %r0, %r0, %r0, -1
+#CHECK: error: invalid operand
+#CHECK: selr %r0, %r0, %r0, 16
+
+ selr %r0, %r0, %r0, -1
+ selr %r0, %r0, %r0, 16
+
+#CHECK: error: invalid register pair
+#CHECK: sortl %r1, %r2
+#CHECK: error: invalid register pair
+#CHECK: sortl %r2, %r1
+
+ sortl %r1, %r2
+ sortl %r2, %r1
+
+#CHECK: error: invalid operand
+#CHECK: stdrv %f0, -1
+#CHECK: error: invalid operand
+#CHECK: stdrv %f0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: stdrv %f0, 0(%v1,%r2)
+
+ stdrv %f0, -1
+ stdrv %f0, 4096
+ stdrv %f0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: sterv %f0, -1
+#CHECK: error: invalid operand
+#CHECK: sterv %f0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: sterv %f0, 0(%v1,%r2)
+
+ sterv %f0, -1
+ sterv %f0, 4096
+ sterv %f0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vcefb %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcefb %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vcefb %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vcefb %v0, %v0, 16, 0
+
+ vcefb %v0, %v0, 0, -1
+ vcefb %v0, %v0, 0, 16
+ vcefb %v0, %v0, -1, 0
+ vcefb %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: vcelfb %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcelfb %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vcelfb %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vcelfb %v0, %v0, 16, 0
+
+ vcelfb %v0, %v0, 0, -1
+ vcelfb %v0, %v0, 0, 16
+ vcelfb %v0, %v0, -1, 0
+ vcelfb %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: vcfeb %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcfeb %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vcfeb %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vcfeb %v0, %v0, 16, 0
+
+ vcfeb %v0, %v0, 0, -1
+ vcfeb %v0, %v0, 0, 16
+ vcfeb %v0, %v0, -1, 0
+ vcfeb %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: vcfpl %v0, %v0, 0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcfpl %v0, %v0, 0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vcfpl %v0, %v0, 0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vcfpl %v0, %v0, 0, 16, 0
+#CHECK: error: invalid operand
+#CHECK: vcfpl %v0, %v0, -1, 0, 0
+#CHECK: error: invalid operand
+#CHECK: vcfpl %v0, %v0, 16, 0, 0
+
+ vcfpl %v0, %v0, 0, 0, -1
+ vcfpl %v0, %v0, 0, 0, 16
+ vcfpl %v0, %v0, 0, -1, 0
+ vcfpl %v0, %v0, 0, 16, 0
+ vcfpl %v0, %v0, -1, 0, 0
+ vcfpl %v0, %v0, 16, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vcfps %v0, %v0, 0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcfps %v0, %v0, 0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vcfps %v0, %v0, 0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vcfps %v0, %v0, 0, 16, 0
+#CHECK: error: invalid operand
+#CHECK: vcfps %v0, %v0, -1, 0, 0
+#CHECK: error: invalid operand
+#CHECK: vcfps %v0, %v0, 16, 0, 0
+
+ vcfps %v0, %v0, 0, 0, -1
+ vcfps %v0, %v0, 0, 0, 16
+ vcfps %v0, %v0, 0, -1, 0
+ vcfps %v0, %v0, 0, 16, 0
+ vcfps %v0, %v0, -1, 0, 0
+ vcfps %v0, %v0, 16, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vclfeb %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vclfeb %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vclfeb %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vclfeb %v0, %v0, 16, 0
+
+ vclfeb %v0, %v0, 0, -1
+ vclfeb %v0, %v0, 0, 16
+ vclfeb %v0, %v0, -1, 0
+ vclfeb %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: vclfp %v0, %v0, 0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vclfp %v0, %v0, 0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vclfp %v0, %v0, 0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vclfp %v0, %v0, 0, 16, 0
+#CHECK: error: invalid operand
+#CHECK: vclfp %v0, %v0, -1, 0, 0
+#CHECK: error: invalid operand
+#CHECK: vclfp %v0, %v0, 16, 0, 0
+
+ vclfp %v0, %v0, 0, 0, -1
+ vclfp %v0, %v0, 0, 0, 16
+ vclfp %v0, %v0, 0, -1, 0
+ vclfp %v0, %v0, 0, 16, 0
+ vclfp %v0, %v0, -1, 0, 0
+ vclfp %v0, %v0, 16, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vcsfp %v0, %v0, 0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcsfp %v0, %v0, 0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vcsfp %v0, %v0, 0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vcsfp %v0, %v0, 0, 16, 0
+#CHECK: error: invalid operand
+#CHECK: vcsfp %v0, %v0, -1, 0, 0
+#CHECK: error: invalid operand
+#CHECK: vcsfp %v0, %v0, 16, 0, 0
+
+ vcsfp %v0, %v0, 0, 0, -1
+ vcsfp %v0, %v0, 0, 0, 16
+ vcsfp %v0, %v0, 0, -1, 0
+ vcsfp %v0, %v0, 0, 16, 0
+ vcsfp %v0, %v0, -1, 0, 0
+ vcsfp %v0, %v0, 16, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vcvb %r0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcvb %r0, %v0, 0, 16
+
+ vcvb %r0, %v0, 0, -1
+ vcvb %r0, %v0, 0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vcvbg %r0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vcvbg %r0, %v0, 0, 16
+
+ vcvbg %r0, %v0, 0, -1
+ vcvbg %r0, %v0, 0, 16
+
+#CHECK: error: invalid operand
+#CHECK: vlbr %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbr %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vlbr %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vlbr %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbr %v0, 0(%v1,%r2), 0
+
+ vlbr %v0, 0, -1
+ vlbr %v0, 0, 16
+ vlbr %v0, -1, 0
+ vlbr %v0, 4096, 0
+ vlbr %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vlbrf %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbrf %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbrf %v0, 0(%v1,%r2)
+
+ vlbrf %v0, -1
+ vlbrf %v0, 4096
+ vlbrf %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlbrg %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbrg %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbrg %v0, 0(%v1,%r2)
+
+ vlbrg %v0, -1
+ vlbrg %v0, 4096
+ vlbrg %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlbrh %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbrh %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbrh %v0, 0(%v1,%r2)
+
+ vlbrh %v0, -1
+ vlbrh %v0, 4096
+ vlbrh %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlbrq %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbrq %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbrq %v0, 0(%v1,%r2)
+
+ vlbrq %v0, -1
+ vlbrq %v0, 4096
+ vlbrq %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlbrrep %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbrrep %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vlbrrep %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vlbrrep %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbrrep %v0, 0(%v1,%r2), 0
+
+ vlbrrep %v0, 0, -1
+ vlbrrep %v0, 0, 16
+ vlbrrep %v0, -1, 0
+ vlbrrep %v0, 4096, 0
+ vlbrrep %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vlbrrepf %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbrrepf %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbrrepf %v0, 0(%v1,%r2)
+
+ vlbrrepf %v0, -1
+ vlbrrepf %v0, 4096
+ vlbrrepf %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlbrrepg %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbrrepg %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbrrepg %v0, 0(%v1,%r2)
+
+ vlbrrepg %v0, -1
+ vlbrrepg %v0, 4096
+ vlbrrepg %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlbrreph %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlbrreph %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlbrreph %v0, 0(%v1,%r2)
+
+ vlbrreph %v0, -1
+ vlbrreph %v0, 4096
+ vlbrreph %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlebrf %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vlebrf %v0, 0, 4
+#CHECK: error: invalid operand
+#CHECK: vlebrf %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vlebrf %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlebrf %v0, 0(%v1,%r2), 0
+
+ vlebrf %v0, 0, -1
+ vlebrf %v0, 0, 4
+ vlebrf %v0, -1, 0
+ vlebrf %v0, 4096, 0
+ vlebrf %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vlebrg %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vlebrg %v0, 0, 2
+#CHECK: error: invalid operand
+#CHECK: vlebrg %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vlebrg %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlebrg %v0, 0(%v1,%r2), 0
+
+ vlebrg %v0, 0, -1
+ vlebrg %v0, 0, 2
+ vlebrg %v0, -1, 0
+ vlebrg %v0, 4096, 0
+ vlebrg %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vlebrh %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vlebrh %v0, 0, 8
+#CHECK: error: invalid operand
+#CHECK: vlebrh %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vlebrh %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlebrh %v0, 0(%v1,%r2), 0
+
+ vlebrh %v0, 0, -1
+ vlebrh %v0, 0, 8
+ vlebrh %v0, -1, 0
+ vlebrh %v0, 4096, 0
+ vlebrh %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vler %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vler %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vler %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vler %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vler %v0, 0(%v1,%r2), 0
+
+ vler %v0, 0, -1
+ vler %v0, 0, 16
+ vler %v0, -1, 0
+ vler %v0, 4096, 0
+ vler %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vlerf %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlerf %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlerf %v0, 0(%v1,%r2)
+
+ vlerf %v0, -1
+ vlerf %v0, 4096
+ vlerf %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlerg %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlerg %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlerg %v0, 0(%v1,%r2)
+
+ vlerg %v0, -1
+ vlerg %v0, 4096
+ vlerg %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vlerh %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vlerh %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vlerh %v0, 0(%v1,%r2)
+
+ vlerh %v0, -1
+ vlerh %v0, 4096
+ vlerh %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vllebrz %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vllebrz %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vllebrz %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vllebrz %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vllebrz %v0, 0(%v1,%r2), 0
+
+ vllebrz %v0, 0, -1
+ vllebrz %v0, 0, 16
+ vllebrz %v0, -1, 0
+ vllebrz %v0, 4096, 0
+ vllebrz %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vllebrze %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vllebrze %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vllebrze %v0, 0(%v1,%r2)
+
+ vllebrze %v0, -1
+ vllebrze %v0, 4096
+ vllebrze %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vllebrzf %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vllebrzf %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vllebrzf %v0, 0(%v1,%r2)
+
+ vllebrzf %v0, -1
+ vllebrzf %v0, 4096
+ vllebrzf %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vllebrzg %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vllebrzg %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vllebrzg %v0, 0(%v1,%r2)
+
+ vllebrzg %v0, -1
+ vllebrzg %v0, 4096
+ vllebrzg %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vllebrzh %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vllebrzh %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vllebrzh %v0, 0(%v1,%r2)
+
+ vllebrzh %v0, -1
+ vllebrzh %v0, 4096
+ vllebrzh %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vsld %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vsld %v0, %v0, %v0, 256
+
+ vsld %v0, %v0, %v0, -1
+ vsld %v0, %v0, %v0, 256
+
+#CHECK: error: invalid operand
+#CHECK: vsrd %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vsrd %v0, %v0, %v0, 256
+
+ vsrd %v0, %v0, %v0, -1
+ vsrd %v0, %v0, %v0, 256
+
+#CHECK: error: invalid operand
+#CHECK: vstbr %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vstbr %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vstbr %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vstbr %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vstbr %v0, 0(%v1,%r2), 0
+
+ vstbr %v0, 0, -1
+ vstbr %v0, 0, 16
+ vstbr %v0, -1, 0
+ vstbr %v0, 4096, 0
+ vstbr %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vstbrf %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstbrf %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vstbrf %v0, 0(%v1,%r2)
+
+ vstbrf %v0, -1
+ vstbrf %v0, 4096
+ vstbrf %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vstbrg %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstbrg %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vstbrg %v0, 0(%v1,%r2)
+
+ vstbrg %v0, -1
+ vstbrg %v0, 4096
+ vstbrg %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vstbrh %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstbrh %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vstbrh %v0, 0(%v1,%r2)
+
+ vstbrh %v0, -1
+ vstbrh %v0, 4096
+ vstbrh %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vstbrq %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstbrq %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vstbrq %v0, 0(%v1,%r2)
+
+ vstbrq %v0, -1
+ vstbrq %v0, 4096
+ vstbrq %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vstebrf %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vstebrf %v0, 0, 4
+#CHECK: error: invalid operand
+#CHECK: vstebrf %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vstebrf %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vstebrf %v0, 0(%v1,%r2), 0
+
+ vstebrf %v0, 0, -1
+ vstebrf %v0, 0, 4
+ vstebrf %v0, -1, 0
+ vstebrf %v0, 4096, 0
+ vstebrf %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vstebrg %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vstebrg %v0, 0, 2
+#CHECK: error: invalid operand
+#CHECK: vstebrg %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vstebrg %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vstebrg %v0, 0(%v1,%r2), 0
+
+ vstebrg %v0, 0, -1
+ vstebrg %v0, 0, 2
+ vstebrg %v0, -1, 0
+ vstebrg %v0, 4096, 0
+ vstebrg %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vstebrh %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vstebrh %v0, 0, 8
+#CHECK: error: invalid operand
+#CHECK: vstebrh %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vstebrh %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vstebrh %v0, 0(%v1,%r2), 0
+
+ vstebrh %v0, 0, -1
+ vstebrh %v0, 0, 8
+ vstebrh %v0, -1, 0
+ vstebrh %v0, 4096, 0
+ vstebrh %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vster %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vster %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vster %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vster %v0, 4096, 0
+#CHECK: error: invalid use of vector addressing
+#CHECK: vster %v0, 0(%v1,%r2), 0
+
+ vster %v0, 0, -1
+ vster %v0, 0, 16
+ vster %v0, -1, 0
+ vster %v0, 4096, 0
+ vster %v0, 0(%v1,%r2), 0
+
+#CHECK: error: invalid operand
+#CHECK: vsterf %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vsterf %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vsterf %v0, 0(%v1,%r2)
+
+ vsterf %v0, -1
+ vsterf %v0, 4096
+ vsterf %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vsterg %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vsterg %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vsterg %v0, 0(%v1,%r2)
+
+ vsterg %v0, -1
+ vsterg %v0, 4096
+ vsterg %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vsterh %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vsterh %v0, 4096
+#CHECK: error: invalid use of vector addressing
+#CHECK: vsterh %v0, 0(%v1,%r2)
+
+ vsterh %v0, -1
+ vsterh %v0, 4096
+ vsterh %v0, 0(%v1,%r2)
+
+#CHECK: error: invalid operand
+#CHECK: vstrs %v0, %v0, %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: vstrs %v0, %v0, %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: vstrs %v0, %v0, %v0, %v0, 16, 0
+#CHECK: error: too few operands
+#CHECK: vstrs %v0, %v0, %v0, %v0
+#CHECK: error: invalid operand
+#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 0, 0
+
+ vstrs %v0, %v0, %v0, %v0, 0, -1
+ vstrs %v0, %v0, %v0, %v0, 0, 16
+ vstrs %v0, %v0, %v0, %v0, -1, 0
+ vstrs %v0, %v0, %v0, %v0, 16, 0
+ vstrs %v0, %v0, %v0, %v0
+ vstrs %v0, %v0, %v0, %v0, 0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vstrsb %v0, %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstrsb %v0, %v0, %v0, %v0, 16
+#CHECK: error: too few operands
+#CHECK: vstrsb %v0, %v0, %v0
+#CHECK: error: invalid operand
+#CHECK: vstrsb %v0, %v0, %v0, %v0, 0, 0
+
+ vstrsb %v0, %v0, %v0, %v0, -1
+ vstrsb %v0, %v0, %v0, %v0, 16
+ vstrsb %v0, %v0, %v0
+ vstrsb %v0, %v0, %v0, %v0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vstrsf %v0, %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstrsf %v0, %v0, %v0, %v0, 16
+#CHECK: error: too few operands
+#CHECK: vstrsf %v0, %v0, %v0
+#CHECK: error: invalid operand
+#CHECK: vstrsf %v0, %v0, %v0, %v0, 0, 0
+
+ vstrsf %v0, %v0, %v0, %v0, -1
+ vstrsf %v0, %v0, %v0, %v0, 16
+ vstrsf %v0, %v0, %v0
+ vstrsf %v0, %v0, %v0, %v0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vstrsh %v0, %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstrsh %v0, %v0, %v0, %v0, 16
+#CHECK: error: too few operands
+#CHECK: vstrsh %v0, %v0, %v0
+#CHECK: error: invalid operand
+#CHECK: vstrsh %v0, %v0, %v0, %v0, 0, 0
+
+ vstrsh %v0, %v0, %v0, %v0, -1
+ vstrsh %v0, %v0, %v0, %v0, 16
+ vstrsh %v0, %v0, %v0
+ vstrsh %v0, %v0, %v0, %v0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vstrszb %v0, %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstrszb %v0, %v0, %v0, %v0, 16
+#CHECK: error: too few operands
+#CHECK: vstrszb %v0, %v0, %v0
+#CHECK: error: invalid operand
+#CHECK: vstrszb %v0, %v0, %v0, %v0, 0, 0
+
+ vstrszb %v0, %v0, %v0, %v0, -1
+ vstrszb %v0, %v0, %v0, %v0, 16
+ vstrszb %v0, %v0, %v0
+ vstrszb %v0, %v0, %v0, %v0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vstrszf %v0, %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstrszf %v0, %v0, %v0, %v0, 16
+#CHECK: error: too few operands
+#CHECK: vstrszf %v0, %v0, %v0
+#CHECK: error: invalid operand
+#CHECK: vstrszf %v0, %v0, %v0, %v0, 0, 0
+
+ vstrszf %v0, %v0, %v0, %v0, -1
+ vstrszf %v0, %v0, %v0, %v0, 16
+ vstrszf %v0, %v0, %v0
+ vstrszf %v0, %v0, %v0, %v0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: vstrszh %v0, %v0, %v0, %v0, -1
+#CHECK: error: invalid operand
+#CHECK: vstrszh %v0, %v0, %v0, %v0, 16
+#CHECK: error: too few operands
+#CHECK: vstrszh %v0, %v0, %v0
+#CHECK: error: invalid operand
+#CHECK: vstrszh %v0, %v0, %v0, %v0, 0, 0
+
+ vstrszh %v0, %v0, %v0, %v0, -1
+ vstrszh %v0, %v0, %v0, %v0, 16
+ vstrszh %v0, %v0, %v0
+ vstrszh %v0, %v0, %v0, %v0, 0, 0
+
+#CHECK: error: invalid operand
+#CHECK: wcefb %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: wcefb %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: wcefb %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: wcefb %v0, %v0, 16, 0
+
+ wcefb %v0, %v0, 0, -1
+ wcefb %v0, %v0, 0, 16
+ wcefb %v0, %v0, -1, 0
+ wcefb %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: wcelfb %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: wcelfb %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: wcelfb %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: wcelfb %v0, %v0, 16, 0
+
+ wcelfb %v0, %v0, 0, -1
+ wcelfb %v0, %v0, 0, 16
+ wcelfb %v0, %v0, -1, 0
+ wcelfb %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: wcfeb %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: wcfeb %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: wcfeb %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: wcfeb %v0, %v0, 16, 0
+
+ wcfeb %v0, %v0, 0, -1
+ wcfeb %v0, %v0, 0, 16
+ wcfeb %v0, %v0, -1, 0
+ wcfeb %v0, %v0, 16, 0
+
+#CHECK: error: invalid operand
+#CHECK: wclfeb %v0, %v0, 0, -1
+#CHECK: error: invalid operand
+#CHECK: wclfeb %v0, %v0, 0, 16
+#CHECK: error: invalid operand
+#CHECK: wclfeb %v0, %v0, -1, 0
+#CHECK: error: invalid operand
+#CHECK: wclfeb %v0, %v0, 16, 0
+
+ wclfeb %v0, %v0, 0, -1
+ wclfeb %v0, %v0, 0, 16
+ wclfeb %v0, %v0, -1, 0
+ wclfeb %v0, %v0, 16, 0
+
Modified: llvm/trunk/test/MC/SystemZ/insn-bad-z14.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/SystemZ/insn-bad-z14.s?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/test/MC/SystemZ/insn-bad-z14.s (original)
+++ llvm/trunk/test/MC/SystemZ/insn-bad-z14.s Fri Jul 12 11:13:16 2019
@@ -34,6 +34,16 @@
agh %r0, -524289
agh %r0, 524288
+#CHECK: error: instruction requires: deflate-conversion
+#CHECK: dfltcc %r2, %r4, %r6
+
+ dfltcc %r2, %r4, %r6
+
+#CHECK: error: instruction requires: message-security-assist-extension9
+#CHECK: kdsa %r0, %r2
+
+ kdsa %r0, %r2
+
#CHECK: error: invalid register pair
#CHECK: kma %r1, %r2, %r4
#CHECK: error: invalid register pair
@@ -109,6 +119,66 @@
msgc %r0, -524289
msgc %r0, 524288
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: mvcrl 0, 0
+
+ mvcrl 0, 0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: ncgrk %r0, %r0, %r0
+
+ ncgrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: ncrk %r0, %r0, %r0
+
+ ncrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: nngrk %r0, %r0, %r0
+
+ nngrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: nnrk %r0, %r0, %r0
+
+ nnrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: nogrk %r0, %r0, %r0
+
+ nogrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: nork %r0, %r0, %r0
+
+ nork %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: nxgrk %r0, %r0, %r0
+
+ nxgrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: nxrk %r0, %r0, %r0
+
+ nxrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: ocgrk %r0, %r0, %r0
+
+ ocgrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: ocrk %r0, %r0, %r0
+
+ ocrk %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: popcnt %r2, %r4, 1
+
+ popcnt %r2, %r4, 1
+
#CHECK: error: invalid register pair
#CHECK: prno %r1, %r2
#CHECK: error: invalid register pair
@@ -117,6 +187,30 @@
prno %r1, %r2
prno %r2, %r1
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: selgr %r0, %r0, %r0, 0
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: selgre %r0, %r0, %r0
+
+ selgr %r0, %r0, %r0, 0
+ selgre %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: selfhr %r0, %r0, %r0, 0
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: selfhre %r0, %r0, %r0
+
+ selfhr %r0, %r0, %r0, 0
+ selfhre %r0, %r0, %r0
+
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: selr %r0, %r0, %r0, 0
+#CHECK: error: instruction requires: miscellaneous-extensions-3
+#CHECK: selre %r0, %r0, %r0
+
+ selr %r0, %r0, %r0, 0
+ selre %r0, %r0, %r0
+
#CHECK: error: invalid operand
#CHECK: sgh %r0, -524289
#CHECK: error: invalid operand
@@ -125,6 +219,11 @@
sgh %r0, -524289
sgh %r0, 524288
+#CHECK: error: instruction requires: enhanced-sort
+#CHECK: sortl %r2, %r4
+
+ sortl %r2, %r4
+
#CHECK: error: invalid operand
#CHECK: stgsc %r0, -524289
#CHECK: error: invalid operand
@@ -147,6 +246,41 @@
vap %v0, %v0, %v0, -1, 0
vap %v0, %v0, %v0, 256, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vcefb %v0, %v0, 0, 0
+
+ vcefb %v0, %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vcelfb %v0, %v0, 0, 0
+
+ vcelfb %v0, %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vcfeb %v0, %v0, 0, 0
+
+ vcfeb %v0, %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vcfpl %v0, %v0, 0, 0, 0
+
+ vcfpl %v0, %v0, 0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vcfps %v0, %v0, 0, 0, 0
+
+ vcfps %v0, %v0, 0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vclfeb %v0, %v0, 0, 0
+
+ vclfeb %v0, %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vclfp %v0, %v0, 0, 0, 0
+
+ vclfp %v0, %v0, 0, 0, 0
+
#CHECK: error: invalid operand
#CHECK: vcp %v0, %v0, -1
#CHECK: error: invalid operand
@@ -155,21 +289,32 @@
vcp %v0, %v0, -1
vcp %v0, %v0, 16
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vcsfp %v0, %v0, 0, 0, 0
+
+ vcsfp %v0, %v0, 0, 0, 0
+
#CHECK: error: invalid operand
#CHECK: vcvb %r0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vcvb %r0, %v0, 16
+#CHECK: error: instruction requires: vector-packed-decimal-enhancement
+#CHECK: vcvb %r0, %v0, 0, 1
vcvb %r0, %v0, -1
vcvb %r0, %v0, 16
+ vcvb %r0, %v0, 0, 1
#CHECK: error: invalid operand
#CHECK: vcvbg %r0, %v0, -1
#CHECK: error: invalid operand
#CHECK: vcvbg %r0, %v0, 16
+#CHECK: error: instruction requires: vector-packed-decimal-enhancement
+#CHECK: vcvbg %r0, %v0, 0, 1
vcvbg %r0, %v0, -1
vcvbg %r0, %v0, 16
+ vcvbg %r0, %v0, 0, 1
#CHECK: error: invalid operand
#CHECK: vcvd %r0, %v0, 0, -1
@@ -408,6 +553,79 @@
vllezlf %v0, 4096
vllezlf %v0, 0(%v1,%r2)
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbr %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbrf %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbrg %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbrh %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbrq %v0, 0
+
+ vlbr %v0, 0, 0
+ vlbrf %v0, 0
+ vlbrg %v0, 0
+ vlbrh %v0, 0
+ vlbrq %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbrrep %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbrrepf %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbrrepg %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlbrreph %v0, 0
+
+ vlbrrep %v0, 0, 0
+ vlbrrepf %v0, 0
+ vlbrrepg %v0, 0
+ vlbrreph %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlebrf %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlebrg %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlebrh %v0, 0, 0
+
+ vlebrf %v0, 0, 0
+ vlebrg %v0, 0, 0
+ vlebrh %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vler %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlerf %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlerg %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vlerh %v0, 0
+
+ vler %v0, 0, 0
+ vlerf %v0, 0
+ vlerg %v0, 0
+ vlerh %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vllebrz %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vllebrze %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vllebrzf %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vllebrzg %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vllebrzh %v0, 0
+
+ vllebrz %v0, 0, 0
+ vllebrze %v0, 0
+ vllebrzf %v0, 0
+ vllebrzg %v0, 0
+ vllebrzh %v0, 0
+
#CHECK: error: invalid operand
#CHECK: vlrl %v0, 0, -1
#CHECK: error: invalid operand
@@ -551,6 +769,11 @@
vsdp %v0, %v0, %v0, -1, 0
vsdp %v0, %v0, %v0, 256, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vsld %v0, %v0, %v0, 0
+
+ vsld %v0, %v0, %v0, 0
+
#CHECK: error: invalid operand
#CHECK: vsp %v0, %v0, %v0, 0, -1
#CHECK: error: invalid operand
@@ -565,6 +788,11 @@
vsp %v0, %v0, %v0, -1, 0
vsp %v0, %v0, %v0, 256, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vsrd %v0, %v0, %v0, 0
+
+ vsrd %v0, %v0, %v0, 0
+
#CHECK: error: invalid operand
#CHECK: vsrp %v0, %v0, 0, 0, -1
#CHECK: error: invalid operand
@@ -585,6 +813,48 @@
vsrp %v0, %v0, -1, 0, 0
vsrp %v0, %v0, 256, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstbr %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstbrf %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstbrg %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstbrh %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstbrq %v0, 0
+
+ vstbr %v0, 0, 0
+ vstbrf %v0, 0
+ vstbrg %v0, 0
+ vstbrh %v0, 0
+ vstbrq %v0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstebrf %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstebrg %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstebrh %v0, 0, 0
+
+ vstebrf %v0, 0, 0
+ vstebrg %v0, 0, 0
+ vstebrh %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vster %v0, 0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vsterf %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vsterg %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vsterh %v0, 0
+
+ vster %v0, 0, 0
+ vsterf %v0, 0
+ vsterg %v0, 0
+ vsterh %v0, 0
+
#CHECK: error: invalid operand
#CHECK: vstrl %v0, 0, -1
#CHECK: error: invalid operand
@@ -613,6 +883,29 @@
vstrlr %v0, %r0, 4096
vstrlr %v0, %r0, 0(%r0)
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstrs %v0, %v0, %v0, %v0, 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstrsb %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstrsf %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstrsh %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstrszb %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstrszf %v0, %v0, %v0, %v0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: vstrszh %v0, %v0, %v0, %v0
+
+ vstrs %v0, %v0, %v0, %v0, 0
+ vstrsb %v0, %v0, %v0, %v0
+ vstrsf %v0, %v0, %v0, %v0
+ vstrsh %v0, %v0, %v0, %v0
+ vstrszb %v0, %v0, %v0, %v0
+ vstrszf %v0, %v0, %v0, %v0
+ vstrszh %v0, %v0, %v0, %v0
+
#CHECK: error: invalid operand
#CHECK: vupkz %v0, 0, -1
#CHECK: error: invalid operand
@@ -630,6 +923,26 @@
vupkz %v0, 4096, 0
vupkz %v0, 0(%r0), 0
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: wcefb %v0, %v0, 0, 0
+
+ wcefb %v0, %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: wcelfb %v0, %v0, 0, 0
+
+ wcelfb %v0, %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: wcfeb %v0, %v0, 0, 0
+
+ wcfeb %v0, %v0, 0, 0
+
+#CHECK: error: instruction requires: vector-enhancements-2
+#CHECK: wclfeb %v0, %v0, 0, 0
+
+ wclfeb %v0, %v0, 0, 0
+
#CHECK: error: invalid operand
#CHECK: wfisb %v0, %v0, 0, -1
#CHECK: error: invalid operand
Added: llvm/trunk/test/MC/SystemZ/insn-good-arch13.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/SystemZ/insn-good-arch13.s?rev=365932&view=auto
==============================================================================
--- llvm/trunk/test/MC/SystemZ/insn-good-arch13.s (added)
+++ llvm/trunk/test/MC/SystemZ/insn-good-arch13.s Fri Jul 12 11:13:16 2019
@@ -0,0 +1,1344 @@
+# For arch13 and above.
+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=arch13 -show-encoding %s \
+# RUN: | FileCheck %s
+
+#CHECK: dfltcc %r2, %r2, %r2 # encoding: [0xb9,0x39,0x20,0x22]
+#CHECK: dfltcc %r2, %r8, %r15 # encoding: [0xb9,0x39,0xf0,0x28]
+#CHECK: dfltcc %r14, %r8, %r2 # encoding: [0xb9,0x39,0x20,0xe8]
+#CHECK: dfltcc %r6, %r8, %r10 # encoding: [0xb9,0x39,0xa0,0x68]
+
+ dfltcc %r2, %r2, %r2
+ dfltcc %r2, %r8, %r15
+ dfltcc %r14, %r8, %r2
+ dfltcc %r6, %r8, %r10
+
+#CHECK: kdsa %r0, %r2 # encoding: [0xb9,0x3a,0x00,0x02]
+#CHECK: kdsa %r0, %r14 # encoding: [0xb9,0x3a,0x00,0x0e]
+#CHECK: kdsa %r15, %r2 # encoding: [0xb9,0x3a,0x00,0xf2]
+#CHECK: kdsa %r7, %r10 # encoding: [0xb9,0x3a,0x00,0x7a]
+
+ kdsa %r0, %r2
+ kdsa %r0, %r14
+ kdsa %r15, %r2
+ kdsa %r7, %r10
+
+#CHECK: vllebrzg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x04]
+#CHECK: vllebrzg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x04]
+#CHECK: vllebrzg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x04]
+#CHECK: vllebrzg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x04]
+#CHECK: vllebrzg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x04]
+#CHECK: vllebrzg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x04]
+#CHECK: vllebrzg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x04]
+
+ ldrv %f0, 0
+ ldrv %f0, 4095
+ ldrv %f0, 0(%r15)
+ ldrv %f0, 0(%r15,%r1)
+ ldrv %f15, 0
+ ldrv %v31, 0
+ ldrv %v18, 0x567(%r3,%r4)
+
+#CHECK: vllebrze %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x60,0x04]
+#CHECK: vllebrze %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x60,0x04]
+#CHECK: vllebrze %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x60,0x04]
+#CHECK: vllebrze %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x60,0x04]
+#CHECK: vllebrze %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x60,0x04]
+#CHECK: vllebrze %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x68,0x04]
+#CHECK: vllebrze %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x68,0x04]
+
+ lerv %f0, 0
+ lerv %f0, 4095
+ lerv %f0, 0(%r15)
+ lerv %f0, 0(%r15,%r1)
+ lerv %f15, 0
+ lerv %v31, 0
+ lerv %v18, 0x567(%r3,%r4)
+
+#CHECK: mvcrl 0, 0 # encoding: [0xe5,0x0a,0x00,0x00,0x00,0x00]
+#CHECK: mvcrl 0(%r1), 0(%r2) # encoding: [0xe5,0x0a,0x10,0x00,0x20,0x00]
+#CHECK: mvcrl 160(%r1), 320(%r15) # encoding: [0xe5,0x0a,0x10,0xa0,0xf1,0x40]
+#CHECK: mvcrl 0(%r1), 4095 # encoding: [0xe5,0x0a,0x10,0x00,0x0f,0xff]
+#CHECK: mvcrl 0(%r1), 4095(%r2) # encoding: [0xe5,0x0a,0x10,0x00,0x2f,0xff]
+#CHECK: mvcrl 0(%r1), 4095(%r15) # encoding: [0xe5,0x0a,0x10,0x00,0xff,0xff]
+#CHECK: mvcrl 0(%r1), 0 # encoding: [0xe5,0x0a,0x10,0x00,0x00,0x00]
+#CHECK: mvcrl 0(%r15), 0 # encoding: [0xe5,0x0a,0xf0,0x00,0x00,0x00]
+#CHECK: mvcrl 4095(%r1), 0 # encoding: [0xe5,0x0a,0x1f,0xff,0x00,0x00]
+#CHECK: mvcrl 4095(%r15), 0 # encoding: [0xe5,0x0a,0xff,0xff,0x00,0x00]
+
+ mvcrl 0, 0
+ mvcrl 0(%r1), 0(%r2)
+ mvcrl 160(%r1), 320(%r15)
+ mvcrl 0(%r1), 4095
+ mvcrl 0(%r1), 4095(%r2)
+ mvcrl 0(%r1), 4095(%r15)
+ mvcrl 0(%r1), 0
+ mvcrl 0(%r15), 0
+ mvcrl 4095(%r1), 0
+ mvcrl 4095(%r15), 0
+
+#CHECK: ncgrk %r0, %r0, %r0 # encoding: [0xb9,0xe5,0x00,0x00]
+#CHECK: ncgrk %r0, %r0, %r15 # encoding: [0xb9,0xe5,0xf0,0x00]
+#CHECK: ncgrk %r0, %r15, %r0 # encoding: [0xb9,0xe5,0x00,0x0f]
+#CHECK: ncgrk %r15, %r0, %r0 # encoding: [0xb9,0xe5,0x00,0xf0]
+#CHECK: ncgrk %r7, %r8, %r9 # encoding: [0xb9,0xe5,0x90,0x78]
+
+ ncgrk %r0,%r0,%r0
+ ncgrk %r0,%r0,%r15
+ ncgrk %r0,%r15,%r0
+ ncgrk %r15,%r0,%r0
+ ncgrk %r7,%r8,%r9
+
+#CHECK: ncrk %r0, %r0, %r0 # encoding: [0xb9,0xf5,0x00,0x00]
+#CHECK: ncrk %r0, %r0, %r15 # encoding: [0xb9,0xf5,0xf0,0x00]
+#CHECK: ncrk %r0, %r15, %r0 # encoding: [0xb9,0xf5,0x00,0x0f]
+#CHECK: ncrk %r15, %r0, %r0 # encoding: [0xb9,0xf5,0x00,0xf0]
+#CHECK: ncrk %r7, %r8, %r9 # encoding: [0xb9,0xf5,0x90,0x78]
+
+ ncrk %r0,%r0,%r0
+ ncrk %r0,%r0,%r15
+ ncrk %r0,%r15,%r0
+ ncrk %r15,%r0,%r0
+ ncrk %r7,%r8,%r9
+
+#CHECK: nngrk %r0, %r0, %r0 # encoding: [0xb9,0x64,0x00,0x00]
+#CHECK: nngrk %r0, %r0, %r15 # encoding: [0xb9,0x64,0xf0,0x00]
+#CHECK: nngrk %r0, %r15, %r0 # encoding: [0xb9,0x64,0x00,0x0f]
+#CHECK: nngrk %r15, %r0, %r0 # encoding: [0xb9,0x64,0x00,0xf0]
+#CHECK: nngrk %r7, %r8, %r9 # encoding: [0xb9,0x64,0x90,0x78]
+
+ nngrk %r0,%r0,%r0
+ nngrk %r0,%r0,%r15
+ nngrk %r0,%r15,%r0
+ nngrk %r15,%r0,%r0
+ nngrk %r7,%r8,%r9
+
+#CHECK: nnrk %r0, %r0, %r0 # encoding: [0xb9,0x74,0x00,0x00]
+#CHECK: nnrk %r0, %r0, %r15 # encoding: [0xb9,0x74,0xf0,0x00]
+#CHECK: nnrk %r0, %r15, %r0 # encoding: [0xb9,0x74,0x00,0x0f]
+#CHECK: nnrk %r15, %r0, %r0 # encoding: [0xb9,0x74,0x00,0xf0]
+#CHECK: nnrk %r7, %r8, %r9 # encoding: [0xb9,0x74,0x90,0x78]
+
+ nnrk %r0,%r0,%r0
+ nnrk %r0,%r0,%r15
+ nnrk %r0,%r15,%r0
+ nnrk %r15,%r0,%r0
+ nnrk %r7,%r8,%r9
+
+#CHECK: nogrk %r0, %r0, %r0 # encoding: [0xb9,0x66,0x00,0x00]
+#CHECK: nogrk %r0, %r0, %r15 # encoding: [0xb9,0x66,0xf0,0x00]
+#CHECK: nogrk %r0, %r15, %r0 # encoding: [0xb9,0x66,0x00,0x0f]
+#CHECK: nogrk %r15, %r0, %r0 # encoding: [0xb9,0x66,0x00,0xf0]
+#CHECK: nogrk %r7, %r8, %r9 # encoding: [0xb9,0x66,0x90,0x78]
+
+ nogrk %r0,%r0,%r0
+ nogrk %r0,%r0,%r15
+ nogrk %r0,%r15,%r0
+ nogrk %r15,%r0,%r0
+ nogrk %r7,%r8,%r9
+
+#CHECK: nork %r0, %r0, %r0 # encoding: [0xb9,0x76,0x00,0x00]
+#CHECK: nork %r0, %r0, %r15 # encoding: [0xb9,0x76,0xf0,0x00]
+#CHECK: nork %r0, %r15, %r0 # encoding: [0xb9,0x76,0x00,0x0f]
+#CHECK: nork %r15, %r0, %r0 # encoding: [0xb9,0x76,0x00,0xf0]
+#CHECK: nork %r7, %r8, %r9 # encoding: [0xb9,0x76,0x90,0x78]
+
+ nork %r0,%r0,%r0
+ nork %r0,%r0,%r15
+ nork %r0,%r15,%r0
+ nork %r15,%r0,%r0
+ nork %r7,%r8,%r9
+
+#CHECK: nxgrk %r0, %r0, %r0 # encoding: [0xb9,0x67,0x00,0x00]
+#CHECK: nxgrk %r0, %r0, %r15 # encoding: [0xb9,0x67,0xf0,0x00]
+#CHECK: nxgrk %r0, %r15, %r0 # encoding: [0xb9,0x67,0x00,0x0f]
+#CHECK: nxgrk %r15, %r0, %r0 # encoding: [0xb9,0x67,0x00,0xf0]
+#CHECK: nxgrk %r7, %r8, %r9 # encoding: [0xb9,0x67,0x90,0x78]
+
+ nxgrk %r0,%r0,%r0
+ nxgrk %r0,%r0,%r15
+ nxgrk %r0,%r15,%r0
+ nxgrk %r15,%r0,%r0
+ nxgrk %r7,%r8,%r9
+
+#CHECK: nxrk %r0, %r0, %r0 # encoding: [0xb9,0x77,0x00,0x00]
+#CHECK: nxrk %r0, %r0, %r15 # encoding: [0xb9,0x77,0xf0,0x00]
+#CHECK: nxrk %r0, %r15, %r0 # encoding: [0xb9,0x77,0x00,0x0f]
+#CHECK: nxrk %r15, %r0, %r0 # encoding: [0xb9,0x77,0x00,0xf0]
+#CHECK: nxrk %r7, %r8, %r9 # encoding: [0xb9,0x77,0x90,0x78]
+
+ nxrk %r0,%r0,%r0
+ nxrk %r0,%r0,%r15
+ nxrk %r0,%r15,%r0
+ nxrk %r15,%r0,%r0
+ nxrk %r7,%r8,%r9
+
+#CHECK: ocgrk %r0, %r0, %r0 # encoding: [0xb9,0x65,0x00,0x00]
+#CHECK: ocgrk %r0, %r0, %r15 # encoding: [0xb9,0x65,0xf0,0x00]
+#CHECK: ocgrk %r0, %r15, %r0 # encoding: [0xb9,0x65,0x00,0x0f]
+#CHECK: ocgrk %r15, %r0, %r0 # encoding: [0xb9,0x65,0x00,0xf0]
+#CHECK: ocgrk %r7, %r8, %r9 # encoding: [0xb9,0x65,0x90,0x78]
+
+ ocgrk %r0,%r0,%r0
+ ocgrk %r0,%r0,%r15
+ ocgrk %r0,%r15,%r0
+ ocgrk %r15,%r0,%r0
+ ocgrk %r7,%r8,%r9
+
+#CHECK: ocrk %r0, %r0, %r0 # encoding: [0xb9,0x75,0x00,0x00]
+#CHECK: ocrk %r0, %r0, %r15 # encoding: [0xb9,0x75,0xf0,0x00]
+#CHECK: ocrk %r0, %r15, %r0 # encoding: [0xb9,0x75,0x00,0x0f]
+#CHECK: ocrk %r15, %r0, %r0 # encoding: [0xb9,0x75,0x00,0xf0]
+#CHECK: ocrk %r7, %r8, %r9 # encoding: [0xb9,0x75,0x90,0x78]
+
+ ocrk %r0,%r0,%r0
+ ocrk %r0,%r0,%r15
+ ocrk %r0,%r15,%r0
+ ocrk %r15,%r0,%r0
+ ocrk %r7,%r8,%r9
+
+#CHECK: popcnt %r0, %r0 # encoding: [0xb9,0xe1,0x00,0x00]
+#CHECK: popcnt %r0, %r15 # encoding: [0xb9,0xe1,0x00,0x0f]
+#CHECK: popcnt %r14, %r0 # encoding: [0xb9,0xe1,0x00,0xe0]
+#CHECK: popcnt %r6, %r8 # encoding: [0xb9,0xe1,0x00,0x68]
+#CHECK: popcnt %r4, %r13, 1 # encoding: [0xb9,0xe1,0x10,0x4d]
+#CHECK: popcnt %r4, %r13, 15 # encoding: [0xb9,0xe1,0xf0,0x4d]
+
+ popcnt %r0, %r0
+ popcnt %r0, %r15
+ popcnt %r14, %r0
+ popcnt %r6, %r8
+ popcnt %r4, %r13, 1
+ popcnt %r4, %r13, 15
+
+#CHECK: selgr %r0, %r0, %r0, 0 # encoding: [0xb9,0xe3,0x00,0x00]
+#CHECK: selgr %r0, %r0, %r0, 15 # encoding: [0xb9,0xe3,0x0f,0x00]
+#CHECK: selgr %r0, %r0, %r15, 0 # encoding: [0xb9,0xe3,0xf0,0x00]
+#CHECK: selgr %r0, %r15, %r0, 0 # encoding: [0xb9,0xe3,0x00,0x0f]
+#CHECK: selgr %r15, %r0, %r0, 0 # encoding: [0xb9,0xe3,0x00,0xf0]
+#CHECK: selgr %r7, %r8, %r9, 10 # encoding: [0xb9,0xe3,0x9a,0x78]
+
+ selgr %r0, %r0, %r0, 0
+ selgr %r0, %r0, %r0, 15
+ selgr %r0, %r0, %r15, 0
+ selgr %r0, %r15, %r0, 0
+ selgr %r15, %r0, %r0, 0
+ selgr %r7, %r8, %r9, 10
+
+#CHECK: selgro %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x31,0x12]
+#CHECK: selgrh %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x32,0x12]
+#CHECK: selgrp %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x32,0x12]
+#CHECK: selgrnle %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x33,0x12]
+#CHECK: selgrl %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x34,0x12]
+#CHECK: selgrm %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x34,0x12]
+#CHECK: selgrnhe %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x35,0x12]
+#CHECK: selgrlh %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x36,0x12]
+#CHECK: selgrne %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x37,0x12]
+#CHECK: selgrnz %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x37,0x12]
+#CHECK: selgre %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x38,0x12]
+#CHECK: selgrz %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x38,0x12]
+#CHECK: selgrnlh %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x39,0x12]
+#CHECK: selgrhe %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3a,0x12]
+#CHECK: selgrnl %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3b,0x12]
+#CHECK: selgrnm %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3b,0x12]
+#CHECK: selgrle %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3c,0x12]
+#CHECK: selgrnh %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3d,0x12]
+#CHECK: selgrnp %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3d,0x12]
+#CHECK: selgrno %r1, %r2, %r3 # encoding: [0xb9,0xe3,0x3e,0x12]
+
+ selgro %r1, %r2, %r3
+ selgrh %r1, %r2, %r3
+ selgrp %r1, %r2, %r3
+ selgrnle %r1, %r2, %r3
+ selgrl %r1, %r2, %r3
+ selgrm %r1, %r2, %r3
+ selgrnhe %r1, %r2, %r3
+ selgrlh %r1, %r2, %r3
+ selgrne %r1, %r2, %r3
+ selgrnz %r1, %r2, %r3
+ selgre %r1, %r2, %r3
+ selgrz %r1, %r2, %r3
+ selgrnlh %r1, %r2, %r3
+ selgrhe %r1, %r2, %r3
+ selgrnl %r1, %r2, %r3
+ selgrnm %r1, %r2, %r3
+ selgrle %r1, %r2, %r3
+ selgrnh %r1, %r2, %r3
+ selgrnp %r1, %r2, %r3
+ selgrno %r1, %r2, %r3
+
+#CHECK: selfhr %r0, %r0, %r0, 0 # encoding: [0xb9,0xc0,0x00,0x00]
+#CHECK: selfhr %r0, %r0, %r0, 15 # encoding: [0xb9,0xc0,0x0f,0x00]
+#CHECK: selfhr %r0, %r0, %r15, 0 # encoding: [0xb9,0xc0,0xf0,0x00]
+#CHECK: selfhr %r0, %r15, %r0, 0 # encoding: [0xb9,0xc0,0x00,0x0f]
+#CHECK: selfhr %r15, %r0, %r0, 0 # encoding: [0xb9,0xc0,0x00,0xf0]
+#CHECK: selfhr %r7, %r8, %r9, 10 # encoding: [0xb9,0xc0,0x9a,0x78]
+
+ selfhr %r0, %r0, %r0, 0
+ selfhr %r0, %r0, %r0, 15
+ selfhr %r0, %r0, %r15, 0
+ selfhr %r0, %r15, %r0, 0
+ selfhr %r15, %r0, %r0, 0
+ selfhr %r7, %r8, %r9, 10
+
+#CHECK: selfhro %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x31,0x12]
+#CHECK: selfhrh %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x32,0x12]
+#CHECK: selfhrp %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x32,0x12]
+#CHECK: selfhrnle %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x33,0x12]
+#CHECK: selfhrl %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x34,0x12]
+#CHECK: selfhrm %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x34,0x12]
+#CHECK: selfhrnhe %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x35,0x12]
+#CHECK: selfhrlh %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x36,0x12]
+#CHECK: selfhrne %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x37,0x12]
+#CHECK: selfhrnz %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x37,0x12]
+#CHECK: selfhre %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x38,0x12]
+#CHECK: selfhrz %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x38,0x12]
+#CHECK: selfhrnlh %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x39,0x12]
+#CHECK: selfhrhe %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3a,0x12]
+#CHECK: selfhrnl %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3b,0x12]
+#CHECK: selfhrnm %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3b,0x12]
+#CHECK: selfhrle %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3c,0x12]
+#CHECK: selfhrnh %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3d,0x12]
+#CHECK: selfhrnp %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3d,0x12]
+#CHECK: selfhrno %r1, %r2, %r3 # encoding: [0xb9,0xc0,0x3e,0x12]
+
+ selfhro %r1, %r2, %r3
+ selfhrh %r1, %r2, %r3
+ selfhrp %r1, %r2, %r3
+ selfhrnle %r1, %r2, %r3
+ selfhrl %r1, %r2, %r3
+ selfhrm %r1, %r2, %r3
+ selfhrnhe %r1, %r2, %r3
+ selfhrlh %r1, %r2, %r3
+ selfhrne %r1, %r2, %r3
+ selfhrnz %r1, %r2, %r3
+ selfhre %r1, %r2, %r3
+ selfhrz %r1, %r2, %r3
+ selfhrnlh %r1, %r2, %r3
+ selfhrhe %r1, %r2, %r3
+ selfhrnl %r1, %r2, %r3
+ selfhrnm %r1, %r2, %r3
+ selfhrle %r1, %r2, %r3
+ selfhrnh %r1, %r2, %r3
+ selfhrnp %r1, %r2, %r3
+ selfhrno %r1, %r2, %r3
+
+#CHECK: selr %r0, %r0, %r0, 0 # encoding: [0xb9,0xf0,0x00,0x00]
+#CHECK: selr %r0, %r0, %r0, 15 # encoding: [0xb9,0xf0,0x0f,0x00]
+#CHECK: selr %r0, %r0, %r15, 0 # encoding: [0xb9,0xf0,0xf0,0x00]
+#CHECK: selr %r0, %r15, %r0, 0 # encoding: [0xb9,0xf0,0x00,0x0f]
+#CHECK: selr %r15, %r0, %r0, 0 # encoding: [0xb9,0xf0,0x00,0xf0]
+#CHECK: selr %r7, %r8, %r9, 10 # encoding: [0xb9,0xf0,0x9a,0x78]
+
+ selr %r0, %r0, %r0, 0
+ selr %r0, %r0, %r0, 15
+ selr %r0, %r0, %r15, 0
+ selr %r0, %r15, %r0, 0
+ selr %r15, %r0, %r0, 0
+ selr %r7, %r8, %r9, 10
+
+#CHECK: selro %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x31,0x12]
+#CHECK: selrh %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x32,0x12]
+#CHECK: selrp %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x32,0x12]
+#CHECK: selrnle %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x33,0x12]
+#CHECK: selrl %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x34,0x12]
+#CHECK: selrm %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x34,0x12]
+#CHECK: selrnhe %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x35,0x12]
+#CHECK: selrlh %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x36,0x12]
+#CHECK: selrne %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x37,0x12]
+#CHECK: selrnz %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x37,0x12]
+#CHECK: selre %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x38,0x12]
+#CHECK: selrz %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x38,0x12]
+#CHECK: selrnlh %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x39,0x12]
+#CHECK: selrhe %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3a,0x12]
+#CHECK: selrnl %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3b,0x12]
+#CHECK: selrnm %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3b,0x12]
+#CHECK: selrle %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3c,0x12]
+#CHECK: selrnh %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3d,0x12]
+#CHECK: selrnp %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3d,0x12]
+#CHECK: selrno %r1, %r2, %r3 # encoding: [0xb9,0xf0,0x3e,0x12]
+
+ selro %r1, %r2, %r3
+ selrh %r1, %r2, %r3
+ selrp %r1, %r2, %r3
+ selrnle %r1, %r2, %r3
+ selrl %r1, %r2, %r3
+ selrm %r1, %r2, %r3
+ selrnhe %r1, %r2, %r3
+ selrlh %r1, %r2, %r3
+ selrne %r1, %r2, %r3
+ selrnz %r1, %r2, %r3
+ selre %r1, %r2, %r3
+ selrz %r1, %r2, %r3
+ selrnlh %r1, %r2, %r3
+ selrhe %r1, %r2, %r3
+ selrnl %r1, %r2, %r3
+ selrnm %r1, %r2, %r3
+ selrle %r1, %r2, %r3
+ selrnh %r1, %r2, %r3
+ selrnp %r1, %r2, %r3
+ selrno %r1, %r2, %r3
+
+#CHECK: sortl %r2, %r2 # encoding: [0xb9,0x38,0x00,0x22]
+#CHECK: sortl %r2, %r14 # encoding: [0xb9,0x38,0x00,0x2e]
+#CHECK: sortl %r14, %r2 # encoding: [0xb9,0x38,0x00,0xe2]
+#CHECK: sortl %r6, %r10 # encoding: [0xb9,0x38,0x00,0x6a]
+
+ sortl %r2, %r2
+ sortl %r2, %r14
+ sortl %r14, %r2
+ sortl %r6, %r10
+
+#CHECK: vstebrg %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0a]
+#CHECK: vstebrg %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0a]
+#CHECK: vstebrg %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0a]
+#CHECK: vstebrg %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0a]
+#CHECK: vstebrg %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0a]
+#CHECK: vstebrg %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0a]
+#CHECK: vstebrg %v18, 1383(%r3,%r4), 0 # encoding: [0xe6,0x23,0x45,0x67,0x08,0x0a]
+
+ stdrv %f0, 0
+ stdrv %f0, 4095
+ stdrv %f0, 0(%r15)
+ stdrv %f0, 0(%r15,%r1)
+ stdrv %f15, 0
+ stdrv %v31, 0
+ stdrv %v18, 0x567(%r3,%r4)
+
+#CHECK: vstebrf %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0b]
+#CHECK: vstebrf %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0b]
+#CHECK: vstebrf %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0b]
+#CHECK: vstebrf %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0b]
+#CHECK: vstebrf %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0b]
+#CHECK: vstebrf %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0b]
+#CHECK: vstebrf %v18, 1383(%r3,%r4), 0 # encoding: [0xe6,0x23,0x45,0x67,0x08,0x0b]
+
+ sterv %f0, 0
+ sterv %f0, 4095
+ sterv %f0, 0(%r15)
+ sterv %f0, 0(%r15,%r1)
+ sterv %f15, 0
+ sterv %v31, 0
+ sterv %v18, 0x567(%r3,%r4)
+
+#CHECK: vcefb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc3]
+#CHECK: vcefb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc3]
+#CHECK: vcefb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc3]
+#CHECK: vcefb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc3]
+#CHECK: vcefb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc3]
+#CHECK: vcefb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc3]
+#CHECK: vcefb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc3]
+
+ vcefb %v0, %v0, 0, 0
+ vcefb %v0, %v0, 0, 15
+ vcefb %v0, %v0, 4, 0
+ vcefb %v0, %v0, 12, 0
+ vcefb %v0, %v31, 0, 0
+ vcefb %v31, %v0, 0, 0
+ vcefb %v14, %v17, 4, 10
+
+#CHECK: vcelfb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc1]
+#CHECK: vcelfb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc1]
+#CHECK: vcelfb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc1]
+#CHECK: vcelfb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc1]
+#CHECK: vcelfb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc1]
+#CHECK: vcelfb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc1]
+#CHECK: vcelfb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc1]
+
+ vcelfb %v0, %v0, 0, 0
+ vcelfb %v0, %v0, 0, 15
+ vcelfb %v0, %v0, 4, 0
+ vcelfb %v0, %v0, 12, 0
+ vcelfb %v0, %v31, 0, 0
+ vcelfb %v31, %v0, 0, 0
+ vcelfb %v14, %v17, 4, 10
+
+#CHECK: vcfeb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc2]
+#CHECK: vcfeb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc2]
+#CHECK: vcfeb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc2]
+#CHECK: vcfeb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc2]
+#CHECK: vcfeb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc2]
+#CHECK: vcfeb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc2]
+#CHECK: vcfeb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc2]
+
+ vcfeb %v0, %v0, 0, 0
+ vcfeb %v0, %v0, 0, 15
+ vcfeb %v0, %v0, 4, 0
+ vcfeb %v0, %v0, 12, 0
+ vcfeb %v0, %v31, 0, 0
+ vcfeb %v31, %v0, 0, 0
+ vcfeb %v14, %v17, 4, 10
+
+#CHECK: vcfpl %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc1]
+#CHECK: vcfpl %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc1]
+#CHECK: vcfpl %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc1]
+#CHECK: vcfpl %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc1]
+#CHECK: vcfpl %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc1]
+#CHECK: vcfpl %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc1]
+#CHECK: vcfpl %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc1]
+#CHECK: vcfpl %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc1]
+
+ vcfpl %v0, %v0, 0, 0, 0
+ vcfpl %v0, %v0, 15, 0, 0
+ vcfpl %v0, %v0, 0, 0, 15
+ vcfpl %v0, %v0, 0, 4, 0
+ vcfpl %v0, %v0, 0, 12, 0
+ vcfpl %v0, %v31, 0, 0, 0
+ vcfpl %v31, %v0, 0, 0, 0
+ vcfpl %v14, %v17, 11, 4, 10
+
+#CHECK: vcfps %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc3]
+#CHECK: vcfps %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc3]
+#CHECK: vcfps %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc3]
+#CHECK: vcfps %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc3]
+#CHECK: vcfps %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc3]
+#CHECK: vcfps %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc3]
+#CHECK: vcfps %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc3]
+#CHECK: vcfps %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc3]
+
+ vcfps %v0, %v0, 0, 0, 0
+ vcfps %v0, %v0, 15, 0, 0
+ vcfps %v0, %v0, 0, 0, 15
+ vcfps %v0, %v0, 0, 4, 0
+ vcfps %v0, %v0, 0, 12, 0
+ vcfps %v0, %v31, 0, 0, 0
+ vcfps %v31, %v0, 0, 0, 0
+ vcfps %v14, %v17, 11, 4, 10
+
+#CHECK: vclfeb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc0]
+#CHECK: vclfeb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x20,0xc0]
+#CHECK: vclfeb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x20,0xc0]
+#CHECK: vclfeb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc0]
+#CHECK: vclfeb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc0]
+#CHECK: vclfeb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc0]
+#CHECK: vclfeb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x24,0xc0]
+
+ vclfeb %v0, %v0, 0, 0
+ vclfeb %v0, %v0, 0, 15
+ vclfeb %v0, %v0, 4, 0
+ vclfeb %v0, %v0, 12, 0
+ vclfeb %v0, %v31, 0, 0
+ vclfeb %v31, %v0, 0, 0
+ vclfeb %v14, %v17, 4, 10
+
+#CHECK: vclfp %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc0]
+#CHECK: vclfp %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc0]
+#CHECK: vclfp %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc0]
+#CHECK: vclfp %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc0]
+#CHECK: vclfp %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc0]
+#CHECK: vclfp %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc0]
+#CHECK: vclfp %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc0]
+#CHECK: vclfp %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc0]
+
+ vclfp %v0, %v0, 0, 0, 0
+ vclfp %v0, %v0, 15, 0, 0
+ vclfp %v0, %v0, 0, 0, 15
+ vclfp %v0, %v0, 0, 4, 0
+ vclfp %v0, %v0, 0, 12, 0
+ vclfp %v0, %v31, 0, 0, 0
+ vclfp %v31, %v0, 0, 0, 0
+ vclfp %v14, %v17, 11, 4, 10
+
+#CHECK: vcsfp %v0, %v0, 0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xc2]
+#CHECK: vcsfp %v0, %v0, 15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0xc2]
+#CHECK: vcsfp %v0, %v0, 0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x00,0xc2]
+#CHECK: vcsfp %v0, %v0, 0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x00,0xc2]
+#CHECK: vcsfp %v0, %v0, 0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x00,0xc2]
+#CHECK: vcsfp %v0, %v31, 0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xc2]
+#CHECK: vcsfp %v31, %v0, 0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xc2]
+#CHECK: vcsfp %v14, %v17, 11, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0xb4,0xc2]
+
+ vcsfp %v0, %v0, 0, 0, 0
+ vcsfp %v0, %v0, 15, 0, 0
+ vcsfp %v0, %v0, 0, 0, 15
+ vcsfp %v0, %v0, 0, 4, 0
+ vcsfp %v0, %v0, 0, 12, 0
+ vcsfp %v0, %v31, 0, 0, 0
+ vcsfp %v31, %v0, 0, 0, 0
+ vcsfp %v14, %v17, 11, 4, 10
+
+#CHECK: vcvb %r0, %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x0f,0x00,0x50]
+#CHECK: vcvb %r3, %v18, 4, 6 # encoding: [0xe6,0x32,0x00,0x46,0x04,0x50]
+
+ vcvb %r0, %v0, 0, 15
+ vcvb %r3, %v18, 4, 6
+
+#CHECK: vcvbg %r0, %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x0f,0x00,0x52]
+#CHECK: vcvbg %r3, %v18, 4, 6 # encoding: [0xe6,0x32,0x00,0x46,0x04,0x52]
+
+ vcvbg %r0, %v0, 0, 15
+ vcvbg %r3, %v18, 4, 6
+
+#CHECK: vlbr %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x06]
+#CHECK: vlbr %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x06]
+#CHECK: vlbr %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x06]
+#CHECK: vlbr %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x06]
+#CHECK: vlbr %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x06]
+#CHECK: vlbr %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x06]
+#CHECK: vlbr %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x06]
+#CHECK: vlbr %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x06]
+
+ vlbr %v0, 0, 0
+ vlbr %v0, 0, 15
+ vlbr %v0, 4095, 0
+ vlbr %v0, 0(%r15), 0
+ vlbr %v0, 0(%r15,%r1), 0
+ vlbr %v15, 0, 0
+ vlbr %v31, 0, 0
+ vlbr %v18, 0x567(%r3,%r4), 11
+
+#CHECK: vlbrf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x06]
+#CHECK: vlbrf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x06]
+#CHECK: vlbrf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x06]
+#CHECK: vlbrf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x06]
+#CHECK: vlbrf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x06]
+#CHECK: vlbrf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x06]
+#CHECK: vlbrf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x06]
+
+ vlbrf %v0, 0
+ vlbrf %v0, 4095
+ vlbrf %v0, 0(%r15)
+ vlbrf %v0, 0(%r15,%r1)
+ vlbrf %v15, 0
+ vlbrf %v31, 0
+ vlbrf %v18, 0x567(%r3,%r4)
+
+#CHECK: vlbrg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x06]
+#CHECK: vlbrg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x06]
+#CHECK: vlbrg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x06]
+#CHECK: vlbrg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x06]
+#CHECK: vlbrg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x06]
+#CHECK: vlbrg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x06]
+#CHECK: vlbrg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x06]
+
+ vlbrg %v0, 0
+ vlbrg %v0, 4095
+ vlbrg %v0, 0(%r15)
+ vlbrg %v0, 0(%r15,%r1)
+ vlbrg %v15, 0
+ vlbrg %v31, 0
+ vlbrg %v18, 0x567(%r3,%r4)
+
+#CHECK: vlbrh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x06]
+#CHECK: vlbrh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x06]
+#CHECK: vlbrh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x06]
+#CHECK: vlbrh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x06]
+#CHECK: vlbrh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x06]
+#CHECK: vlbrh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x06]
+#CHECK: vlbrh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x06]
+
+ vlbrh %v0, 0
+ vlbrh %v0, 4095
+ vlbrh %v0, 0(%r15)
+ vlbrh %v0, 0(%r15,%r1)
+ vlbrh %v15, 0
+ vlbrh %v31, 0
+ vlbrh %v18, 0x567(%r3,%r4)
+
+#CHECK: vlbrq %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x40,0x06]
+#CHECK: vlbrq %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x40,0x06]
+#CHECK: vlbrq %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x40,0x06]
+#CHECK: vlbrq %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x40,0x06]
+#CHECK: vlbrq %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x40,0x06]
+#CHECK: vlbrq %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x48,0x06]
+#CHECK: vlbrq %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x48,0x06]
+
+ vlbrq %v0, 0
+ vlbrq %v0, 4095
+ vlbrq %v0, 0(%r15)
+ vlbrq %v0, 0(%r15,%r1)
+ vlbrq %v15, 0
+ vlbrq %v31, 0
+ vlbrq %v18, 0x567(%r3,%r4)
+
+#CHECK: vlbrrep %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x05]
+#CHECK: vlbrrep %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x05]
+#CHECK: vlbrrep %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x05]
+#CHECK: vlbrrep %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x05]
+#CHECK: vlbrrep %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x05]
+#CHECK: vlbrrep %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x05]
+#CHECK: vlbrrep %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x05]
+#CHECK: vlbrrep %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x05]
+
+ vlbrrep %v0, 0, 0
+ vlbrrep %v0, 0, 15
+ vlbrrep %v0, 4095, 0
+ vlbrrep %v0, 0(%r15), 0
+ vlbrrep %v0, 0(%r15,%r1), 0
+ vlbrrep %v15, 0, 0
+ vlbrrep %v31, 0, 0
+ vlbrrep %v18, 0x567(%r3,%r4), 11
+
+#CHECK: vlbrrepf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x05]
+#CHECK: vlbrrepf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x05]
+#CHECK: vlbrrepf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x05]
+#CHECK: vlbrrepf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x05]
+#CHECK: vlbrrepf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x05]
+#CHECK: vlbrrepf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x05]
+#CHECK: vlbrrepf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x05]
+
+ vlbrrepf %v0, 0
+ vlbrrepf %v0, 4095
+ vlbrrepf %v0, 0(%r15)
+ vlbrrepf %v0, 0(%r15,%r1)
+ vlbrrepf %v15, 0
+ vlbrrepf %v31, 0
+ vlbrrepf %v18, 0x567(%r3,%r4)
+
+#CHECK: vlbrrepg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x05]
+#CHECK: vlbrrepg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x05]
+#CHECK: vlbrrepg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x05]
+#CHECK: vlbrrepg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x05]
+#CHECK: vlbrrepg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x05]
+#CHECK: vlbrrepg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x05]
+#CHECK: vlbrrepg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x05]
+
+ vlbrrepg %v0, 0
+ vlbrrepg %v0, 4095
+ vlbrrepg %v0, 0(%r15)
+ vlbrrepg %v0, 0(%r15,%r1)
+ vlbrrepg %v15, 0
+ vlbrrepg %v31, 0
+ vlbrrepg %v18, 0x567(%r3,%r4)
+
+#CHECK: vlbrreph %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x05]
+#CHECK: vlbrreph %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x05]
+#CHECK: vlbrreph %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x05]
+#CHECK: vlbrreph %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x05]
+#CHECK: vlbrreph %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x05]
+#CHECK: vlbrreph %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x05]
+#CHECK: vlbrreph %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x05]
+
+ vlbrreph %v0, 0
+ vlbrreph %v0, 4095
+ vlbrreph %v0, 0(%r15)
+ vlbrreph %v0, 0(%r15,%r1)
+ vlbrreph %v15, 0
+ vlbrreph %v31, 0
+ vlbrreph %v18, 0x567(%r3,%r4)
+
+#CHECK: vlebrf %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x03]
+#CHECK: vlebrf %v0, 0, 3 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x03]
+#CHECK: vlebrf %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x03]
+#CHECK: vlebrf %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x03]
+#CHECK: vlebrf %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x03]
+#CHECK: vlebrf %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x03]
+#CHECK: vlebrf %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x03]
+#CHECK: vlebrf %v18, 1383(%r3,%r4), 2 # encoding: [0xe6,0x23,0x45,0x67,0x28,0x03]
+
+ vlebrf %v0, 0, 0
+ vlebrf %v0, 0, 3
+ vlebrf %v0, 4095, 0
+ vlebrf %v0, 0(%r15), 0
+ vlebrf %v0, 0(%r15,%r1), 0
+ vlebrf %v15, 0, 0
+ vlebrf %v31, 0, 0
+ vlebrf %v18, 1383(%r3,%r4), 2
+
+#CHECK: vlebrg %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x02]
+#CHECK: vlebrg %v0, 0, 1 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x02]
+#CHECK: vlebrg %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x02]
+#CHECK: vlebrg %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x02]
+#CHECK: vlebrg %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x02]
+#CHECK: vlebrg %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x02]
+#CHECK: vlebrg %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x02]
+#CHECK: vlebrg %v18, 1383(%r3,%r4), 1 # encoding: [0xe6,0x23,0x45,0x67,0x18,0x02]
+
+ vlebrg %v0, 0, 0
+ vlebrg %v0, 0, 1
+ vlebrg %v0, 4095, 0
+ vlebrg %v0, 0(%r15), 0
+ vlebrg %v0, 0(%r15,%r1), 0
+ vlebrg %v15, 0, 0
+ vlebrg %v31, 0, 0
+ vlebrg %v18, 1383(%r3,%r4), 1
+
+#CHECK: vlebrh %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x01]
+#CHECK: vlebrh %v0, 0, 7 # encoding: [0xe6,0x00,0x00,0x00,0x70,0x01]
+#CHECK: vlebrh %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x01]
+#CHECK: vlebrh %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x01]
+#CHECK: vlebrh %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x01]
+#CHECK: vlebrh %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x01]
+#CHECK: vlebrh %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x01]
+#CHECK: vlebrh %v18, 1383(%r3,%r4), 4 # encoding: [0xe6,0x23,0x45,0x67,0x48,0x01]
+
+ vlebrh %v0, 0, 0
+ vlebrh %v0, 0, 7
+ vlebrh %v0, 4095, 0
+ vlebrh %v0, 0(%r15), 0
+ vlebrh %v0, 0(%r15,%r1), 0
+ vlebrh %v15, 0, 0
+ vlebrh %v31, 0, 0
+ vlebrh %v18, 1383(%r3,%r4), 4
+
+#CHECK: vler %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x07]
+#CHECK: vler %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x07]
+#CHECK: vler %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x07]
+#CHECK: vler %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x07]
+#CHECK: vler %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x07]
+#CHECK: vler %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x07]
+#CHECK: vler %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x07]
+#CHECK: vler %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x07]
+
+ vler %v0, 0, 0
+ vler %v0, 0, 15
+ vler %v0, 4095, 0
+ vler %v0, 0(%r15), 0
+ vler %v0, 0(%r15,%r1), 0
+ vler %v15, 0, 0
+ vler %v31, 0, 0
+ vler %v18, 0x567(%r3,%r4), 11
+
+#CHECK: vlerf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x07]
+#CHECK: vlerf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x07]
+#CHECK: vlerf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x07]
+#CHECK: vlerf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x07]
+#CHECK: vlerf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x07]
+#CHECK: vlerf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x07]
+#CHECK: vlerf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x07]
+
+ vlerf %v0, 0
+ vlerf %v0, 4095
+ vlerf %v0, 0(%r15)
+ vlerf %v0, 0(%r15,%r1)
+ vlerf %v15, 0
+ vlerf %v31, 0
+ vlerf %v18, 0x567(%r3,%r4)
+
+#CHECK: vlerg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x07]
+#CHECK: vlerg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x07]
+#CHECK: vlerg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x07]
+#CHECK: vlerg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x07]
+#CHECK: vlerg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x07]
+#CHECK: vlerg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x07]
+#CHECK: vlerg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x07]
+
+ vlerg %v0, 0
+ vlerg %v0, 4095
+ vlerg %v0, 0(%r15)
+ vlerg %v0, 0(%r15,%r1)
+ vlerg %v15, 0
+ vlerg %v31, 0
+ vlerg %v18, 0x567(%r3,%r4)
+
+#CHECK: vlerh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x07]
+#CHECK: vlerh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x07]
+#CHECK: vlerh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x07]
+#CHECK: vlerh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x07]
+#CHECK: vlerh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x07]
+#CHECK: vlerh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x07]
+#CHECK: vlerh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x07]
+
+ vlerh %v0, 0
+ vlerh %v0, 4095
+ vlerh %v0, 0(%r15)
+ vlerh %v0, 0(%r15,%r1)
+ vlerh %v15, 0
+ vlerh %v31, 0
+ vlerh %v18, 0x567(%r3,%r4)
+
+#CHECK: vllebrz %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x04]
+#CHECK: vllebrz %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x04]
+#CHECK: vllebrz %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x04]
+#CHECK: vllebrz %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x04]
+#CHECK: vllebrz %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x04]
+#CHECK: vllebrz %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x04]
+#CHECK: vllebrz %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x04]
+#CHECK: vllebrz %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x04]
+
+ vllebrz %v0, 0, 0
+ vllebrz %v0, 0, 15
+ vllebrz %v0, 4095, 0
+ vllebrz %v0, 0(%r15), 0
+ vllebrz %v0, 0(%r15,%r1), 0
+ vllebrz %v15, 0, 0
+ vllebrz %v31, 0, 0
+ vllebrz %v18, 0x567(%r3,%r4), 11
+
+#CHECK: vllebrze %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x60,0x04]
+#CHECK: vllebrze %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x60,0x04]
+#CHECK: vllebrze %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x60,0x04]
+#CHECK: vllebrze %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x60,0x04]
+#CHECK: vllebrze %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x60,0x04]
+#CHECK: vllebrze %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x68,0x04]
+#CHECK: vllebrze %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x68,0x04]
+
+ vllebrze %v0, 0
+ vllebrze %v0, 4095
+ vllebrze %v0, 0(%r15)
+ vllebrze %v0, 0(%r15,%r1)
+ vllebrze %v15, 0
+ vllebrze %v31, 0
+ vllebrze %v18, 0x567(%r3,%r4)
+
+#CHECK: vllebrzf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x04]
+#CHECK: vllebrzf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x04]
+#CHECK: vllebrzf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x04]
+#CHECK: vllebrzf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x04]
+#CHECK: vllebrzf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x04]
+#CHECK: vllebrzf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x04]
+#CHECK: vllebrzf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x04]
+
+ vllebrzf %v0, 0
+ vllebrzf %v0, 4095
+ vllebrzf %v0, 0(%r15)
+ vllebrzf %v0, 0(%r15,%r1)
+ vllebrzf %v15, 0
+ vllebrzf %v31, 0
+ vllebrzf %v18, 0x567(%r3,%r4)
+
+#CHECK: vllebrzg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x04]
+#CHECK: vllebrzg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x04]
+#CHECK: vllebrzg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x04]
+#CHECK: vllebrzg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x04]
+#CHECK: vllebrzg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x04]
+#CHECK: vllebrzg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x04]
+#CHECK: vllebrzg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x04]
+
+ vllebrzg %v0, 0
+ vllebrzg %v0, 4095
+ vllebrzg %v0, 0(%r15)
+ vllebrzg %v0, 0(%r15,%r1)
+ vllebrzg %v15, 0
+ vllebrzg %v31, 0
+ vllebrzg %v18, 0x567(%r3,%r4)
+
+#CHECK: vllebrzh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x04]
+#CHECK: vllebrzh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x04]
+#CHECK: vllebrzh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x04]
+#CHECK: vllebrzh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x04]
+#CHECK: vllebrzh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x04]
+#CHECK: vllebrzh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x04]
+#CHECK: vllebrzh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x04]
+
+ vllebrzh %v0, 0
+ vllebrzh %v0, 4095
+ vllebrzh %v0, 0(%r15)
+ vllebrzh %v0, 0(%r15,%r1)
+ vllebrzh %v15, 0
+ vllebrzh %v31, 0
+ vllebrzh %v18, 0x567(%r3,%r4)
+
+#CHECK: vsld %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x86]
+#CHECK: vsld %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x86]
+#CHECK: vsld %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x86]
+#CHECK: vsld %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x86]
+#CHECK: vsld %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x86]
+#CHECK: vsld %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x86]
+
+ vsld %v0, %v0, %v0, 0
+ vsld %v0, %v0, %v0, 255
+ vsld %v0, %v0, %v31, 0
+ vsld %v0, %v31, %v0, 0
+ vsld %v31, %v0, %v0, 0
+ vsld %v13, %v17, %v21, 0x79
+
+#CHECK: vsrd %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x87]
+#CHECK: vsrd %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x87]
+#CHECK: vsrd %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x87]
+#CHECK: vsrd %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x87]
+#CHECK: vsrd %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x87]
+#CHECK: vsrd %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x87]
+
+ vsrd %v0, %v0, %v0, 0
+ vsrd %v0, %v0, %v0, 255
+ vsrd %v0, %v0, %v31, 0
+ vsrd %v0, %v31, %v0, 0
+ vsrd %v31, %v0, %v0, 0
+ vsrd %v13, %v17, %v21, 0x79
+
+#CHECK: vstbr %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0e]
+#CHECK: vstbr %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x0e]
+#CHECK: vstbr %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0e]
+#CHECK: vstbr %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0e]
+#CHECK: vstbr %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0e]
+#CHECK: vstbr %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0e]
+#CHECK: vstbr %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0e]
+#CHECK: vstbr %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x0e]
+
+ vstbr %v0, 0, 0
+ vstbr %v0, 0, 15
+ vstbr %v0, 4095, 0
+ vstbr %v0, 0(%r15), 0
+ vstbr %v0, 0(%r15,%r1), 0
+ vstbr %v15, 0, 0
+ vstbr %v31, 0, 0
+ vstbr %v18, 0x567(%r3,%r4), 11
+
+#CHECK: vstbrf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x0e]
+#CHECK: vstbrf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x0e]
+#CHECK: vstbrf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x0e]
+#CHECK: vstbrf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x0e]
+#CHECK: vstbrf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x0e]
+#CHECK: vstbrf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x0e]
+#CHECK: vstbrf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x0e]
+
+ vstbrf %v0, 0
+ vstbrf %v0, 4095
+ vstbrf %v0, 0(%r15)
+ vstbrf %v0, 0(%r15,%r1)
+ vstbrf %v15, 0
+ vstbrf %v31, 0
+ vstbrf %v18, 0x567(%r3,%r4)
+
+#CHECK: vstbrg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x0e]
+#CHECK: vstbrg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x0e]
+#CHECK: vstbrg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x0e]
+#CHECK: vstbrg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x0e]
+#CHECK: vstbrg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x0e]
+#CHECK: vstbrg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x0e]
+#CHECK: vstbrg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x0e]
+
+ vstbrg %v0, 0
+ vstbrg %v0, 4095
+ vstbrg %v0, 0(%r15)
+ vstbrg %v0, 0(%r15,%r1)
+ vstbrg %v15, 0
+ vstbrg %v31, 0
+ vstbrg %v18, 0x567(%r3,%r4)
+
+#CHECK: vstbrh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x0e]
+#CHECK: vstbrh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x0e]
+#CHECK: vstbrh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x0e]
+#CHECK: vstbrh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x0e]
+#CHECK: vstbrh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x0e]
+#CHECK: vstbrh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x0e]
+#CHECK: vstbrh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x0e]
+
+ vstbrh %v0, 0
+ vstbrh %v0, 4095
+ vstbrh %v0, 0(%r15)
+ vstbrh %v0, 0(%r15,%r1)
+ vstbrh %v15, 0
+ vstbrh %v31, 0
+ vstbrh %v18, 0x567(%r3,%r4)
+
+#CHECK: vstbrq %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x40,0x0e]
+#CHECK: vstbrq %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x40,0x0e]
+#CHECK: vstbrq %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x40,0x0e]
+#CHECK: vstbrq %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x40,0x0e]
+#CHECK: vstbrq %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x40,0x0e]
+#CHECK: vstbrq %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x48,0x0e]
+#CHECK: vstbrq %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x48,0x0e]
+
+ vstbrq %v0, 0
+ vstbrq %v0, 4095
+ vstbrq %v0, 0(%r15)
+ vstbrq %v0, 0(%r15,%r1)
+ vstbrq %v15, 0
+ vstbrq %v31, 0
+ vstbrq %v18, 0x567(%r3,%r4)
+
+#CHECK: vstebrf %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0b]
+#CHECK: vstebrf %v0, 0, 3 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x0b]
+#CHECK: vstebrf %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0b]
+#CHECK: vstebrf %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0b]
+#CHECK: vstebrf %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0b]
+#CHECK: vstebrf %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0b]
+#CHECK: vstebrf %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0b]
+#CHECK: vstebrf %v18, 1383(%r3,%r4), 2 # encoding: [0xe6,0x23,0x45,0x67,0x28,0x0b]
+
+ vstebrf %v0, 0, 0
+ vstebrf %v0, 0, 3
+ vstebrf %v0, 4095, 0
+ vstebrf %v0, 0(%r15), 0
+ vstebrf %v0, 0(%r15,%r1), 0
+ vstebrf %v15, 0, 0
+ vstebrf %v31, 0, 0
+ vstebrf %v18, 1383(%r3,%r4), 2
+
+#CHECK: vstebrg %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0a]
+#CHECK: vstebrg %v0, 0, 1 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x0a]
+#CHECK: vstebrg %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0a]
+#CHECK: vstebrg %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0a]
+#CHECK: vstebrg %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0a]
+#CHECK: vstebrg %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0a]
+#CHECK: vstebrg %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0a]
+#CHECK: vstebrg %v18, 1383(%r3,%r4), 1 # encoding: [0xe6,0x23,0x45,0x67,0x18,0x0a]
+
+ vstebrg %v0, 0, 0
+ vstebrg %v0, 0, 1
+ vstebrg %v0, 4095, 0
+ vstebrg %v0, 0(%r15), 0
+ vstebrg %v0, 0(%r15,%r1), 0
+ vstebrg %v15, 0, 0
+ vstebrg %v31, 0, 0
+ vstebrg %v18, 1383(%r3,%r4), 1
+
+#CHECK: vstebrh %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x09]
+#CHECK: vstebrh %v0, 0, 7 # encoding: [0xe6,0x00,0x00,0x00,0x70,0x09]
+#CHECK: vstebrh %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x09]
+#CHECK: vstebrh %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x09]
+#CHECK: vstebrh %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x09]
+#CHECK: vstebrh %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x09]
+#CHECK: vstebrh %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x09]
+#CHECK: vstebrh %v18, 1383(%r3,%r4), 4 # encoding: [0xe6,0x23,0x45,0x67,0x48,0x09]
+
+ vstebrh %v0, 0, 0
+ vstebrh %v0, 0, 7
+ vstebrh %v0, 4095, 0
+ vstebrh %v0, 0(%r15), 0
+ vstebrh %v0, 0(%r15,%r1), 0
+ vstebrh %v15, 0, 0
+ vstebrh %v31, 0, 0
+ vstebrh %v18, 1383(%r3,%r4), 4
+
+#CHECK: vster %v0, 0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x00,0x0f]
+#CHECK: vster %v0, 0, 15 # encoding: [0xe6,0x00,0x00,0x00,0xf0,0x0f]
+#CHECK: vster %v0, 4095, 0 # encoding: [0xe6,0x00,0x0f,0xff,0x00,0x0f]
+#CHECK: vster %v0, 0(%r15), 0 # encoding: [0xe6,0x00,0xf0,0x00,0x00,0x0f]
+#CHECK: vster %v0, 0(%r15,%r1), 0 # encoding: [0xe6,0x0f,0x10,0x00,0x00,0x0f]
+#CHECK: vster %v15, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x00,0x0f]
+#CHECK: vster %v31, 0, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x08,0x0f]
+#CHECK: vster %v18, 1383(%r3,%r4), 11 # encoding: [0xe6,0x23,0x45,0x67,0xb8,0x0f]
+
+ vster %v0, 0, 0
+ vster %v0, 0, 15
+ vster %v0, 4095, 0
+ vster %v0, 0(%r15), 0
+ vster %v0, 0(%r15,%r1), 0
+ vster %v15, 0, 0
+ vster %v31, 0, 0
+ vster %v18, 0x567(%r3,%r4), 11
+
+#CHECK: vsterf %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x20,0x0f]
+#CHECK: vsterf %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x20,0x0f]
+#CHECK: vsterf %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x20,0x0f]
+#CHECK: vsterf %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x20,0x0f]
+#CHECK: vsterf %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x20,0x0f]
+#CHECK: vsterf %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x28,0x0f]
+#CHECK: vsterf %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x28,0x0f]
+
+ vsterf %v0, 0
+ vsterf %v0, 4095
+ vsterf %v0, 0(%r15)
+ vsterf %v0, 0(%r15,%r1)
+ vsterf %v15, 0
+ vsterf %v31, 0
+ vsterf %v18, 0x567(%r3,%r4)
+
+#CHECK: vsterg %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x30,0x0f]
+#CHECK: vsterg %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x30,0x0f]
+#CHECK: vsterg %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x30,0x0f]
+#CHECK: vsterg %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x30,0x0f]
+#CHECK: vsterg %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x30,0x0f]
+#CHECK: vsterg %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x38,0x0f]
+#CHECK: vsterg %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x38,0x0f]
+
+ vsterg %v0, 0
+ vsterg %v0, 4095
+ vsterg %v0, 0(%r15)
+ vsterg %v0, 0(%r15,%r1)
+ vsterg %v15, 0
+ vsterg %v31, 0
+ vsterg %v18, 0x567(%r3,%r4)
+
+#CHECK: vsterh %v0, 0 # encoding: [0xe6,0x00,0x00,0x00,0x10,0x0f]
+#CHECK: vsterh %v0, 4095 # encoding: [0xe6,0x00,0x0f,0xff,0x10,0x0f]
+#CHECK: vsterh %v0, 0(%r15) # encoding: [0xe6,0x00,0xf0,0x00,0x10,0x0f]
+#CHECK: vsterh %v0, 0(%r15,%r1) # encoding: [0xe6,0x0f,0x10,0x00,0x10,0x0f]
+#CHECK: vsterh %v15, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x10,0x0f]
+#CHECK: vsterh %v31, 0 # encoding: [0xe6,0xf0,0x00,0x00,0x18,0x0f]
+#CHECK: vsterh %v18, 1383(%r3,%r4) # encoding: [0xe6,0x23,0x45,0x67,0x18,0x0f]
+
+ vsterh %v0, 0
+ vsterh %v0, 4095
+ vsterh %v0, 0(%r15)
+ vsterh %v0, 0(%r15,%r1)
+ vsterh %v15, 0
+ vsterh %v31, 0
+ vsterh %v18, 0x567(%r3,%r4)
+
+#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8b]
+#CHECK: vstrs %v0, %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x0f,0x00,0x00,0x8b]
+#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8b]
+#CHECK: vstrs %v0, %v0, %v0, %v0, 15, 0 # encoding: [0xe7,0x00,0x0f,0x00,0x00,0x8b]
+#CHECK: vstrs %v0, %v0, %v0, %v0, 0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x8b]
+#CHECK: vstrs %v0, %v0, %v0, %v15, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x8b]
+#CHECK: vstrs %v0, %v0, %v0, %v31, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8b]
+#CHECK: vstrs %v0, %v0, %v15, %v0, 0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x8b]
+#CHECK: vstrs %v0, %v0, %v31, %v0, 0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8b]
+#CHECK: vstrs %v0, %v15, %v0, %v0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x8b]
+#CHECK: vstrs %v0, %v31, %v0, %v0, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8b]
+#CHECK: vstrs %v15, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x8b]
+#CHECK: vstrs %v31, %v0, %v0, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8b]
+#CHECK: vstrs %v18, %v3, %v20, %v5, 11, 4 # encoding: [0xe7,0x23,0x4b,0x40,0x5a,0x8b]
+#CHECK: vstrs %v18, %v3, %v20, %v5, 0, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8b]
+
+ vstrs %v0, %v0, %v0, %v0, 0
+ vstrs %v0, %v0, %v0, %v0, 15
+ vstrs %v0, %v0, %v0, %v0, 0, 0
+ vstrs %v0, %v0, %v0, %v0, 15, 0
+ vstrs %v0, %v0, %v0, %v0, 0, 12
+ vstrs %v0, %v0, %v0, %v15, 0
+ vstrs %v0, %v0, %v0, %v31, 0
+ vstrs %v0, %v0, %v15, %v0, 0
+ vstrs %v0, %v0, %v31, %v0, 0
+ vstrs %v0, %v15, %v0, %v0, 0
+ vstrs %v0, %v31, %v0, %v0, 0
+ vstrs %v15, %v0, %v0, %v0, 0
+ vstrs %v31, %v0, %v0, %v0, 0
+ vstrs %v18, %v3, %v20, %v5, 11, 4
+ vstrs %v18, %v3, %v20, %v5, 0, 15
+
+#CHECK: vstrsb %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8b]
+#CHECK: vstrsb %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8b]
+#CHECK: vstrsb %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x8b]
+#CHECK: vstrsb %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x8b]
+#CHECK: vstrsb %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8b]
+#CHECK: vstrsb %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x8b]
+#CHECK: vstrsb %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8b]
+#CHECK: vstrsb %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x8b]
+#CHECK: vstrsb %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8b]
+#CHECK: vstrsb %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x8b]
+#CHECK: vstrsb %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8b]
+#CHECK: vstrsb %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x40,0x5a,0x8b]
+#CHECK: vstrsb %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8b]
+#CHECK: vstrszb %v18, %v3, %v20, %v5 # encoding: [0xe7,0x23,0x40,0x20,0x5a,0x8b]
+
+ vstrsb %v0, %v0, %v0, %v0
+ vstrsb %v0, %v0, %v0, %v0, 0
+ vstrsb %v0, %v0, %v0, %v0, 12
+ vstrsb %v0, %v0, %v0, %v15
+ vstrsb %v0, %v0, %v0, %v31
+ vstrsb %v0, %v0, %v15, %v0
+ vstrsb %v0, %v0, %v31, %v0
+ vstrsb %v0, %v15, %v0, %v0
+ vstrsb %v0, %v31, %v0, %v0
+ vstrsb %v15, %v0, %v0, %v0
+ vstrsb %v31, %v0, %v0, %v0
+ vstrsb %v18, %v3, %v20, %v5, 4
+ vstrsb %v18, %v3, %v20, %v5, 15
+ vstrszb %v18, %v3, %v20, %v5
+
+#CHECK: vstrsf %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8b]
+#CHECK: vstrsf %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8b]
+#CHECK: vstrsf %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x02,0xc0,0x00,0x8b]
+#CHECK: vstrsf %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x02,0x00,0xf0,0x8b]
+#CHECK: vstrsf %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x8b]
+#CHECK: vstrsf %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf2,0x00,0x00,0x8b]
+#CHECK: vstrsf %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x8b]
+#CHECK: vstrsf %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x02,0x00,0x00,0x8b]
+#CHECK: vstrsf %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x8b]
+#CHECK: vstrsf %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x02,0x00,0x00,0x8b]
+#CHECK: vstrsf %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x8b]
+#CHECK: vstrsf %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x42,0x40,0x5a,0x8b]
+#CHECK: vstrsf %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x42,0xf0,0x5a,0x8b]
+#CHECK: vstrszf %v18, %v3, %v20, %v5 # encoding: [0xe7,0x23,0x42,0x20,0x5a,0x8b]
+
+ vstrsf %v0, %v0, %v0, %v0
+ vstrsf %v0, %v0, %v0, %v0, 0
+ vstrsf %v0, %v0, %v0, %v0, 12
+ vstrsf %v0, %v0, %v0, %v15
+ vstrsf %v0, %v0, %v0, %v31
+ vstrsf %v0, %v0, %v15, %v0
+ vstrsf %v0, %v0, %v31, %v0
+ vstrsf %v0, %v15, %v0, %v0
+ vstrsf %v0, %v31, %v0, %v0
+ vstrsf %v15, %v0, %v0, %v0
+ vstrsf %v31, %v0, %v0, %v0
+ vstrsf %v18, %v3, %v20, %v5, 4
+ vstrsf %v18, %v3, %v20, %v5, 15
+ vstrszf %v18, %v3, %v20, %v5
+
+#CHECK: vstrsh %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8b]
+#CHECK: vstrsh %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8b]
+#CHECK: vstrsh %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x01,0xc0,0x00,0x8b]
+#CHECK: vstrsh %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x01,0x00,0xf0,0x8b]
+#CHECK: vstrsh %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0x8b]
+#CHECK: vstrsh %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf1,0x00,0x00,0x8b]
+#CHECK: vstrsh %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0x8b]
+#CHECK: vstrsh %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x01,0x00,0x00,0x8b]
+#CHECK: vstrsh %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0x8b]
+#CHECK: vstrsh %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x01,0x00,0x00,0x8b]
+#CHECK: vstrsh %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0x8b]
+#CHECK: vstrsh %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x41,0x40,0x5a,0x8b]
+#CHECK: vstrsh %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x41,0xf0,0x5a,0x8b]
+#CHECK: vstrszh %v18, %v3, %v20, %v5 # encoding: [0xe7,0x23,0x41,0x20,0x5a,0x8b]
+
+ vstrsh %v0, %v0, %v0, %v0
+ vstrsh %v0, %v0, %v0, %v0, 0
+ vstrsh %v0, %v0, %v0, %v0, 12
+ vstrsh %v0, %v0, %v0, %v15
+ vstrsh %v0, %v0, %v0, %v31
+ vstrsh %v0, %v0, %v15, %v0
+ vstrsh %v0, %v0, %v31, %v0
+ vstrsh %v0, %v15, %v0, %v0
+ vstrsh %v0, %v31, %v0, %v0
+ vstrsh %v15, %v0, %v0, %v0
+ vstrsh %v31, %v0, %v0, %v0
+ vstrsh %v18, %v3, %v20, %v5, 4
+ vstrsh %v18, %v3, %v20, %v5, 15
+ vstrszh %v18, %v3, %v20, %v5
+
+#CHECK: wcefb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc3]
+#CHECK: wcefb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc3]
+#CHECK: wcefb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc3]
+#CHECK: wcefb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc3]
+#CHECK: wcefb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc3]
+#CHECK: wcefb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc3]
+#CHECK: wcefb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc3]
+#CHECK: wcefb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc3]
+
+ wcefb %v0, %v0, 0, 0
+ wcefb %f0, %f0, 0, 0
+ wcefb %v0, %v0, 0, 15
+ wcefb %v0, %v0, 4, 0
+ wcefb %v0, %v0, 12, 0
+ wcefb %v0, %v31, 0, 0
+ wcefb %v31, %v0, 0, 0
+ wcefb %v14, %v17, 4, 10
+
+#CHECK: wcelfb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc1]
+#CHECK: wcelfb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc1]
+#CHECK: wcelfb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc1]
+#CHECK: wcelfb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc1]
+#CHECK: wcelfb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc1]
+#CHECK: wcelfb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc1]
+#CHECK: wcelfb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc1]
+#CHECK: wcelfb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc1]
+
+ wcelfb %v0, %v0, 0, 0
+ wcelfb %f0, %f0, 0, 0
+ wcelfb %v0, %v0, 0, 15
+ wcelfb %v0, %v0, 4, 0
+ wcelfb %v0, %v0, 12, 0
+ wcelfb %v0, %v31, 0, 0
+ wcelfb %v31, %v0, 0, 0
+ wcelfb %v14, %v17, 4, 10
+
+#CHECK: wcfeb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc2]
+#CHECK: wcfeb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc2]
+#CHECK: wcfeb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc2]
+#CHECK: wcfeb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc2]
+#CHECK: wcfeb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc2]
+#CHECK: wcfeb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc2]
+#CHECK: wcfeb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc2]
+#CHECK: wcfeb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc2]
+
+ wcfeb %v0, %v0, 0, 0
+ wcfeb %f0, %f0, 0, 0
+ wcfeb %v0, %v0, 0, 15
+ wcfeb %v0, %v0, 4, 0
+ wcfeb %v0, %v0, 12, 0
+ wcfeb %v0, %v31, 0, 0
+ wcfeb %v31, %v0, 0, 0
+ wcfeb %v14, %v17, 4, 10
+
+#CHECK: wclfeb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc0]
+#CHECK: wclfeb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc0]
+#CHECK: wclfeb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x20,0xc0]
+#CHECK: wclfeb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc0]
+#CHECK: wclfeb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x20,0xc0]
+#CHECK: wclfeb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc0]
+#CHECK: wclfeb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc0]
+#CHECK: wclfeb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x24,0xc0]
+
+ wclfeb %v0, %v0, 0, 0
+ wclfeb %f0, %f0, 0, 0
+ wclfeb %v0, %v0, 0, 15
+ wclfeb %v0, %v0, 4, 0
+ wclfeb %v0, %v0, 12, 0
+ wclfeb %v0, %v31, 0, 0
+ wclfeb %v31, %v0, 0, 0
+ wclfeb %v14, %v17, 4, 10
+
Modified: llvm/trunk/test/Verifier/SystemZ/intrinsic-immarg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Verifier/SystemZ/intrinsic-immarg.ll?rev=365932&r1=365931&r2=365932&view=diff
==============================================================================
--- llvm/trunk/test/Verifier/SystemZ/intrinsic-immarg.ll (original)
+++ llvm/trunk/test/Verifier/SystemZ/intrinsic-immarg.ll Fri Jul 12 11:13:16 2019
@@ -381,3 +381,21 @@ define <16 x i8> @test_vsldb(<16 x i8> %
ret <16 x i8> %res
}
+declare <16 x i8> @llvm.s390.vsld(<16 x i8>, <16 x i8>, i32)
+define <16 x i8> @test_vsld(<16 x i8> %a, <16 x i8> %b, i32 %c) {
+ ; CHECK: immarg operand has non-immediate parameter
+ ; CHECK-NEXT: i32 %c
+ ; CHECK-NEXT: %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 %c)
+ %res = call <16 x i8> @llvm.s390.vsld(<16 x i8> %a, <16 x i8> %b, i32 %c)
+ ret <16 x i8> %res
+}
+
+declare <16 x i8> @llvm.s390.vsrd(<16 x i8>, <16 x i8>, i32)
+define <16 x i8> @test_vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c) {
+ ; CHECK: immarg operand has non-immediate parameter
+ ; CHECK-NEXT: i32 %c
+ ; CHECK-NEXT: %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c)
+ %res = call <16 x i8> @llvm.s390.vsrd(<16 x i8> %a, <16 x i8> %b, i32 %c)
+ ret <16 x i8> %res
+}
+
More information about the llvm-commits
mailing list