[clang] c61eb44 - [SystemZ] Implement vector rotate in terms of funnel shift
Ulrich Weigand via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 4 07:52:21 PST 2023
Author: Ulrich Weigand
Date: 2023-12-04T16:52:00+01:00
New Revision: c61eb440059d6e9c18e6f8404e06bf125aa942c9
URL: https://github.com/llvm/llvm-project/commit/c61eb440059d6e9c18e6f8404e06bf125aa942c9
DIFF: https://github.com/llvm/llvm-project/commit/c61eb440059d6e9c18e6f8404e06bf125aa942c9.diff
LOG: [SystemZ] Implement vector rotate in terms of funnel shift
Clang currently implements a set of vector rotate builtins
(__builtin_s390_verll*) in terms of platform-specific LLVM
intrinsics. To simplify the IR (and allow for common code
optimizations if applicable), this patch removes those LLVM
intrinsics and implements the builtins in terms of the
platform-independent funnel shift intrinsics instead.
Also, fix the prototype of the __builtin_s390_verll*
builtins for full compatibility with GCC.
Added:
llvm/test/CodeGen/SystemZ/vec-rot-01.ll
llvm/test/CodeGen/SystemZ/vec-rot-02.ll
Modified:
clang/include/clang/Basic/BuiltinsSystemZ.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/vecintrin.h
clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
llvm/include/llvm/IR/IntrinsicsSystemZ.td
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/lib/Target/SystemZ/SystemZISelLowering.h
llvm/lib/Target/SystemZ/SystemZInstrVector.td
llvm/lib/Target/SystemZ/SystemZOperators.td
llvm/test/CodeGen/SystemZ/vec-intrinsics-01.ll
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsSystemZ.def b/clang/include/clang/Basic/BuiltinsSystemZ.def
index 079e411364885..b84cf5b9cec9f 100644
--- a/clang/include/clang/Basic/BuiltinsSystemZ.def
+++ b/clang/include/clang/Basic/BuiltinsSystemZ.def
@@ -105,10 +105,10 @@ TARGET_BUILTIN(__builtin_s390_verimb, "V16UcV16UcV16UcV16UcIi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_verimh, "V8UsV8UsV8UsV8UsIi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_verimf, "V4UiV4UiV4UiV4UiIi", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_verimg, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_verllb, "V16UcV16UcUi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_verllh, "V8UsV8UsUi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_verllf, "V4UiV4UiUi", "nc", "vector")
-TARGET_BUILTIN(__builtin_s390_verllg, "V2ULLiV2ULLiUi", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_verllb, "V16UcV16UcUc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_verllh, "V8UsV8UsUc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_verllf, "V4UiV4UiUc", "nc", "vector")
+TARGET_BUILTIN(__builtin_s390_verllg, "V2ULLiV2ULLiUc", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_verllvb, "V16UcV16UcV16Uc", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_verllvh, "V8UsV8UsV8Us", "nc", "vector")
TARGET_BUILTIN(__builtin_s390_verllvf, "V4UiV4UiV4Ui", "nc", "vector")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d9862621061..a0f4172002613 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -18337,6 +18337,32 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
return Builder.CreateCall(F, {X, Undef});
}
+ case SystemZ::BI__builtin_s390_verllb:
+ case SystemZ::BI__builtin_s390_verllh:
+ case SystemZ::BI__builtin_s390_verllf:
+ case SystemZ::BI__builtin_s390_verllg: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
+ // Splat scalar rotate amount to vector type.
+ unsigned NumElts = cast<llvm::FixedVectorType>(ResultType)->getNumElements();
+ Amt = Builder.CreateIntCast(Amt, ResultType->getScalarType(), false);
+ Amt = Builder.CreateVectorSplat(NumElts, Amt);
+ Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
+ return Builder.CreateCall(F, { Src, Src, Amt });
+ }
+
+ case SystemZ::BI__builtin_s390_verllvb:
+ case SystemZ::BI__builtin_s390_verllvh:
+ case SystemZ::BI__builtin_s390_verllvf:
+ case SystemZ::BI__builtin_s390_verllvg: {
+ llvm::Type *ResultType = ConvertType(E->getType());
+ llvm::Value *Src = EmitScalarExpr(E->getArg(0));
+ llvm::Value *Amt = EmitScalarExpr(E->getArg(1));
+ Function *F = CGM.getIntrinsic(Intrinsic::fshl, ResultType);
+ return Builder.CreateCall(F, { Src, Src, Amt });
+ }
+
case SystemZ::BI__builtin_s390_vfsqsb:
case SystemZ::BI__builtin_s390_vfsqdb: {
llvm::Type *ResultType = ConvertType(E->getType());
diff --git a/clang/lib/Headers/vecintrin.h b/clang/lib/Headers/vecintrin.h
index ec1dbfd015f6e..0c535225c78e5 100644
--- a/clang/lib/Headers/vecintrin.h
+++ b/clang/lib/Headers/vecintrin.h
@@ -6565,45 +6565,45 @@ vec_rl(__vector unsigned long long __a, __vector unsigned long long __b) {
static inline __ATTRS_o_ai __vector signed char
vec_rli(__vector signed char __a, unsigned long __b) {
return (__vector signed char)__builtin_s390_verllb(
- (__vector unsigned char)__a, (int)__b);
+ (__vector unsigned char)__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector unsigned char
vec_rli(__vector unsigned char __a, unsigned long __b) {
- return __builtin_s390_verllb(__a, (int)__b);
+ return __builtin_s390_verllb(__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector signed short
vec_rli(__vector signed short __a, unsigned long __b) {
return (__vector signed short)__builtin_s390_verllh(
- (__vector unsigned short)__a, (int)__b);
+ (__vector unsigned short)__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector unsigned short
vec_rli(__vector unsigned short __a, unsigned long __b) {
- return __builtin_s390_verllh(__a, (int)__b);
+ return __builtin_s390_verllh(__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector signed int
vec_rli(__vector signed int __a, unsigned long __b) {
return (__vector signed int)__builtin_s390_verllf(
- (__vector unsigned int)__a, (int)__b);
+ (__vector unsigned int)__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector unsigned int
vec_rli(__vector unsigned int __a, unsigned long __b) {
- return __builtin_s390_verllf(__a, (int)__b);
+ return __builtin_s390_verllf(__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector signed long long
vec_rli(__vector signed long long __a, unsigned long __b) {
return (__vector signed long long)__builtin_s390_verllg(
- (__vector unsigned long long)__a, (int)__b);
+ (__vector unsigned long long)__a, (unsigned char)__b);
}
static inline __ATTRS_o_ai __vector unsigned long long
vec_rli(__vector unsigned long long __a, unsigned long __b) {
- return __builtin_s390_verllg(__a, (int)__b);
+ return __builtin_s390_verllg(__a, (unsigned char)__b);
}
/*-- vec_rl_mask ------------------------------------------------------------*/
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
index f01813ee76034..d17daaf35ca4b 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-vector.c
@@ -23,6 +23,7 @@ volatile vec_ulong vul;
volatile vec_double vd;
volatile unsigned int len;
+volatile unsigned char amt;
const void * volatile cptr;
void * volatile ptr;
int cc;
@@ -184,23 +185,23 @@ void test_integer(void) {
vul = __builtin_s390_verimg(vul, vul, vul, 255);
// CHECK: call <2 x i64> @llvm.s390.verimg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i32 255)
- vuc = __builtin_s390_verllb(vuc, len);
- // CHECK: call <16 x i8> @llvm.s390.verllb(<16 x i8> %{{.*}}, i32 %{{.*}})
- vus = __builtin_s390_verllh(vus, len);
- // CHECK: call <8 x i16> @llvm.s390.verllh(<8 x i16> %{{.*}}, i32 %{{.*}})
- vui = __builtin_s390_verllf(vui, len);
- // CHECK: call <4 x i32> @llvm.s390.verllf(<4 x i32> %{{.*}}, i32 %{{.*}})
- vul = __builtin_s390_verllg(vul, len);
- // CHECK: call <2 x i64> @llvm.s390.verllg(<2 x i64> %{{.*}}, i32 %{{.*}})
+ vuc = __builtin_s390_verllb(vuc, amt);
+ // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ vus = __builtin_s390_verllh(vus, amt);
+ // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ vui = __builtin_s390_verllf(vui, amt);
+ // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ vul = __builtin_s390_verllg(vul, amt);
+ // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
vuc = __builtin_s390_verllvb(vuc, vuc);
- // CHECK: call <16 x i8> @llvm.s390.verllvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
vus = __builtin_s390_verllvh(vus, vus);
- // CHECK: call <8 x i16> @llvm.s390.verllvh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
vui = __builtin_s390_verllvf(vui, vui);
- // CHECK: call <4 x i32> @llvm.s390.verllvf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
vul = __builtin_s390_verllvg(vul, vul);
- // CHECK: call <2 x i64> @llvm.s390.verllvg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
vus = __builtin_s390_vgfmb(vuc, vuc);
// CHECK: call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
diff --git a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
index 44f8cbe2cc017..0dc2fa7c66dd2 100644
--- a/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
+++ b/clang/test/CodeGen/SystemZ/builtins-systemz-zvector.c
@@ -2564,53 +2564,53 @@ void test_integer(void) {
// (emulated)
vsc = vec_rl(vsc, vuc);
- // CHECK: call <16 x i8> @llvm.s390.verllvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: verllvb
vuc = vec_rl(vuc, vuc);
- // CHECK: call <16 x i8> @llvm.s390.verllvb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: verllvb
vss = vec_rl(vss, vus);
- // CHECK: call <8 x i16> @llvm.s390.verllvh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK-ASM: verllvh
vus = vec_rl(vus, vus);
- // CHECK: call <8 x i16> @llvm.s390.verllvh(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK-ASM: verllvh
vsi = vec_rl(vsi, vui);
- // CHECK: call <4 x i32> @llvm.s390.verllvf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK-ASM: verllvf
vui = vec_rl(vui, vui);
- // CHECK: call <4 x i32> @llvm.s390.verllvf(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK-ASM: verllvf
vsl = vec_rl(vsl, vul);
- // CHECK: call <2 x i64> @llvm.s390.verllvg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK-ASM: verllvg
vul = vec_rl(vul, vul);
- // CHECK: call <2 x i64> @llvm.s390.verllvg(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK-ASM: verllvg
vsc = vec_rli(vsc, ul);
- // CHECK: call <16 x i8> @llvm.s390.verllb(<16 x i8> %{{.*}}, i32 %{{.*}})
+ // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: verllb
vuc = vec_rli(vuc, ul);
- // CHECK: call <16 x i8> @llvm.s390.verllb(<16 x i8> %{{.*}}, i32 %{{.*}})
+ // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
// CHECK-ASM: verllb
vss = vec_rli(vss, ul);
- // CHECK: call <8 x i16> @llvm.s390.verllh(<8 x i16> %{{.*}}, i32 %{{.*}})
+ // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK-ASM: verllh
vus = vec_rli(vus, ul);
- // CHECK: call <8 x i16> @llvm.s390.verllh(<8 x i16> %{{.*}}, i32 %{{.*}})
+ // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
// CHECK-ASM: verllh
vsi = vec_rli(vsi, ul);
- // CHECK: call <4 x i32> @llvm.s390.verllf(<4 x i32> %{{.*}}, i32 %{{.*}})
+ // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK-ASM: verllf
vui = vec_rli(vui, ul);
- // CHECK: call <4 x i32> @llvm.s390.verllf(<4 x i32> %{{.*}}, i32 %{{.*}})
+ // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
// CHECK-ASM: verllf
vsl = vec_rli(vsl, ul);
- // CHECK: call <2 x i64> @llvm.s390.verllg(<2 x i64> %{{.*}}, i32 %{{.*}})
+ // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK-ASM: verllg
vul = vec_rli(vul, ul);
- // CHECK: call <2 x i64> @llvm.s390.verllg(<2 x i64> %{{.*}}, i32 %{{.*}})
+ // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
// CHECK-ASM: verllg
vsc = vec_rl_mask(vsc, vuc, 0);
diff --git a/llvm/include/llvm/IR/IntrinsicsSystemZ.td b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
index 9d21f3eb5352e..9f79bdfa9d2d2 100644
--- a/llvm/include/llvm/IR/IntrinsicsSystemZ.td
+++ b/llvm/include/llvm/IR/IntrinsicsSystemZ.td
@@ -30,10 +30,6 @@ class SystemZBinaryConv<string name, LLVMType result, LLVMType arg>
class SystemZBinary<string name, LLVMType type>
: SystemZBinaryConv<name, type, type>;
-class SystemZBinaryInt<string name, LLVMType type>
- : ClangBuiltin<"__builtin_s390_" # name>,
- Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>;
-
class SystemZBinaryConvCC<LLVMType result, LLVMType arg>
: Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>;
@@ -131,13 +127,6 @@ multiclass SystemZBinaryBHFG<string name> : SystemZBinaryBHF<name> {
def g : SystemZBinary<name#"g", llvm_v2i64_ty>;
}
-multiclass SystemZBinaryIntBHFG<string name> {
- def b : SystemZBinaryInt<name#"b", llvm_v16i8_ty>;
- def h : SystemZBinaryInt<name#"h", llvm_v8i16_ty>;
- def f : SystemZBinaryInt<name#"f", llvm_v4i32_ty>;
- def g : SystemZBinaryInt<name#"g", llvm_v2i64_ty>;
-}
-
multiclass SystemZBinaryCCBHF {
def bs : SystemZBinaryCC<llvm_v16i8_ty>;
def hs : SystemZBinaryCC<llvm_v8i16_ty>;
@@ -303,8 +292,6 @@ let TargetPrefix = "s390" in {
defm int_s390_vmo : SystemZBinaryExtBHF<"vmo">;
defm int_s390_vmlo : SystemZBinaryExtBHF<"vmlo">;
- defm int_s390_verllv : SystemZBinaryBHFG<"verllv">;
- defm int_s390_verll : SystemZBinaryIntBHFG<"verll">;
defm int_s390_verim : SystemZQuaternaryIntBHFG<"verim">;
def int_s390_vsl : SystemZBinary<"vsl", llvm_v16i8_ty>;
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 4e57986206dc6..d0eb0255f7d92 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -385,16 +385,12 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
- // Detect shifts by a scalar amount and convert them into
+ // Detect shifts/rotates by a scalar amount and convert them into
// V*_BY_SCALAR.
setOperationAction(ISD::SHL, VT, Custom);
setOperationAction(ISD::SRA, VT, Custom);
setOperationAction(ISD::SRL, VT, Custom);
-
- // At present ROTL isn't matched by DAGCombiner. ROTR should be
- // converted into ROTL.
- setOperationAction(ISD::ROTL, VT, Expand);
- setOperationAction(ISD::ROTR, VT, Expand);
+ setOperationAction(ISD::ROTL, VT, Custom);
// Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
// and inverting the result as necessary.
@@ -5979,6 +5975,8 @@ SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
case ISD::SRA:
return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
+ case ISD::ROTL:
+ return lowerShift(Op, DAG, SystemZISD::VROTL_BY_SCALAR);
case ISD::IS_FPCLASS:
return lowerIS_FPCLASS(Op, DAG);
case ISD::GET_ROUNDING:
@@ -6143,6 +6141,7 @@ const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const {
OPCODE(VSHL_BY_SCALAR);
OPCODE(VSRL_BY_SCALAR);
OPCODE(VSRA_BY_SCALAR);
+ OPCODE(VROTL_BY_SCALAR);
OPCODE(VSUM);
OPCODE(VICMPE);
OPCODE(VICMPH);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index fd951b935702a..40fe433f816fa 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -215,11 +215,12 @@ enum NodeType : unsigned {
UNPACK_LOW,
UNPACKL_LOW,
- // Shift each element of vector operand 0 by the number of bits specified
- // by scalar operand 1.
+ // Shift/rotate each element of vector operand 0 by the number of bits
+ // specified by scalar operand 1.
VSHL_BY_SCALAR,
VSRL_BY_SCALAR,
VSRA_BY_SCALAR,
+ VROTL_BY_SCALAR,
// For each element of the output type, sum across all sub-elements of
// operand 0 belonging to the corresponding element, and add in the
diff --git a/llvm/lib/Target/SystemZ/SystemZInstrVector.td b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
index 82863d7838a95..37d6945dc7a05 100644
--- a/llvm/lib/Target/SystemZ/SystemZInstrVector.td
+++ b/llvm/lib/Target/SystemZ/SystemZInstrVector.td
@@ -732,21 +732,17 @@ let Predicates = [FeatureVector] in {
// Element rotate left logical (with vector shift amount).
def VERLLV : BinaryVRRcGeneric<"verllv", 0xE773>;
- def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
- v128b, v128b, 0>;
- def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
- v128h, v128h, 1>;
- def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf,
- v128f, v128f, 2>;
- def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg,
- v128g, v128g, 3>;
+ def VERLLVB : BinaryVRRc<"verllvb", 0xE773, rotl, v128b, v128b, 0>;
+ def VERLLVH : BinaryVRRc<"verllvh", 0xE773, rotl, v128h, v128h, 1>;
+ def VERLLVF : BinaryVRRc<"verllvf", 0xE773, rotl, v128f, v128f, 2>;
+ def VERLLVG : BinaryVRRc<"verllvg", 0xE773, rotl, v128g, v128g, 3>;
// Element rotate left logical (with scalar shift amount).
def VERLL : BinaryVRSaGeneric<"verll", 0xE733>;
- def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
- def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
- def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
- def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
+ def VERLLB : BinaryVRSa<"verllb", 0xE733, z_vrotl_by_scalar, v128b, v128b, 0>;
+ def VERLLH : BinaryVRSa<"verllh", 0xE733, z_vrotl_by_scalar, v128h, v128h, 1>;
+ def VERLLF : BinaryVRSa<"verllf", 0xE733, z_vrotl_by_scalar, v128f, v128f, 2>;
+ def VERLLG : BinaryVRSa<"verllg", 0xE733, z_vrotl_by_scalar, v128g, v128g, 3>;
// Element rotate and insert under mask.
def VERIM : QuaternaryVRIdGeneric<"verim", 0xE772>;
diff --git a/llvm/lib/Target/SystemZ/SystemZOperators.td b/llvm/lib/Target/SystemZ/SystemZOperators.td
index 6713cac2a7807..4f0f23fe3ef8e 100644
--- a/llvm/lib/Target/SystemZ/SystemZOperators.td
+++ b/llvm/lib/Target/SystemZ/SystemZOperators.td
@@ -324,6 +324,8 @@ def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR",
SDT_ZVecBinaryInt>;
def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR",
SDT_ZVecBinaryInt>;
+def z_vrotl_by_scalar : SDNode<"SystemZISD::VROTL_BY_SCALAR",
+ SDT_ZVecBinaryInt>;
def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
diff --git a/llvm/test/CodeGen/SystemZ/vec-intrinsics-01.ll b/llvm/test/CodeGen/SystemZ/vec-intrinsics-01.ll
index 5338ccc9b4292..e69dc9d009a54 100644
--- a/llvm/test/CodeGen/SystemZ/vec-intrinsics-01.ll
+++ b/llvm/test/CodeGen/SystemZ/vec-intrinsics-01.ll
@@ -94,14 +94,6 @@ declare <2 x i64> @llvm.s390.vmof(<4 x i32>, <4 x i32>)
declare <8 x i16> @llvm.s390.vmlob(<16 x i8>, <16 x i8>)
declare <4 x i32> @llvm.s390.vmloh(<8 x i16>, <8 x i16>)
declare <2 x i64> @llvm.s390.vmlof(<4 x i32>, <4 x i32>)
-declare <16 x i8> @llvm.s390.verllvb(<16 x i8>, <16 x i8>)
-declare <8 x i16> @llvm.s390.verllvh(<8 x i16>, <8 x i16>)
-declare <4 x i32> @llvm.s390.verllvf(<4 x i32>, <4 x i32>)
-declare <2 x i64> @llvm.s390.verllvg(<2 x i64>, <2 x i64>)
-declare <16 x i8> @llvm.s390.verllb(<16 x i8>, i32)
-declare <8 x i16> @llvm.s390.verllh(<8 x i16>, i32)
-declare <4 x i32> @llvm.s390.verllf(<4 x i32>, i32)
-declare <2 x i64> @llvm.s390.verllg(<2 x i64>, i32)
declare <16 x i8> @llvm.s390.verimb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
declare <8 x i16> @llvm.s390.verimh(<8 x i16>, <8 x i16>, <8 x i16>, i32)
declare <4 x i32> @llvm.s390.verimf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
@@ -1487,117 +1479,6 @@ define <2 x i64> @test_vmlof(<4 x i32> %a, <4 x i32> %b) {
ret <2 x i64> %res
}
-; VERLLVB.
-define <16 x i8> @test_verllvb(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: test_verllvb:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllvb %v24, %v24, %v26
-; CHECK-NEXT: br %r14
- %res = call <16 x i8> @llvm.s390.verllvb(<16 x i8> %a, <16 x i8> %b)
- ret <16 x i8> %res
-}
-
-; VERLLVH.
-define <8 x i16> @test_verllvh(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: test_verllvh:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllvh %v24, %v24, %v26
-; CHECK-NEXT: br %r14
- %res = call <8 x i16> @llvm.s390.verllvh(<8 x i16> %a, <8 x i16> %b)
- ret <8 x i16> %res
-}
-
-; VERLLVF.
-define <4 x i32> @test_verllvf(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: test_verllvf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllvf %v24, %v24, %v26
-; CHECK-NEXT: br %r14
- %res = call <4 x i32> @llvm.s390.verllvf(<4 x i32> %a, <4 x i32> %b)
- ret <4 x i32> %res
-}
-
-; VERLLVG.
-define <2 x i64> @test_verllvg(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: test_verllvg:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllvg %v24, %v24, %v26
-; CHECK-NEXT: br %r14
- %res = call <2 x i64> @llvm.s390.verllvg(<2 x i64> %a, <2 x i64> %b)
- ret <2 x i64> %res
-}
-
-; VERLLB.
-define <16 x i8> @test_verllb(<16 x i8> %a, i32 %b) {
-; CHECK-LABEL: test_verllb:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllb %v24, %v24, 0(%r2)
-; CHECK-NEXT: br %r14
- %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 %b)
- ret <16 x i8> %res
-}
-
-; VERLLH.
-define <8 x i16> @test_verllh(<8 x i16> %a, i32 %b) {
-; CHECK-LABEL: test_verllh:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllh %v24, %v24, 0(%r2)
-; CHECK-NEXT: br %r14
- %res = call <8 x i16> @llvm.s390.verllh(<8 x i16> %a, i32 %b)
- ret <8 x i16> %res
-}
-
-; VERLLF.
-define <4 x i32> @test_verllf(<4 x i32> %a, i32 %b) {
-; CHECK-LABEL: test_verllf:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllf %v24, %v24, 0(%r2)
-; CHECK-NEXT: br %r14
- %res = call <4 x i32> @llvm.s390.verllf(<4 x i32> %a, i32 %b)
- ret <4 x i32> %res
-}
-
-; VERLLG.
-define <2 x i64> @test_verllg(<2 x i64> %a, i32 %b) {
-; CHECK-LABEL: test_verllg:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllg %v24, %v24, 0(%r2)
-; CHECK-NEXT: br %r14
- %res = call <2 x i64> @llvm.s390.verllg(<2 x i64> %a, i32 %b)
- ret <2 x i64> %res
-}
-
-; VERLLB with the smallest count.
-define <16 x i8> @test_verllb_1(<16 x i8> %a) {
-; CHECK-LABEL: test_verllb_1:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllb %v24, %v24, 1
-; CHECK-NEXT: br %r14
- %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 1)
- ret <16 x i8> %res
-}
-
-; VERLLB with the largest count.
-define <16 x i8> @test_verllb_4095(<16 x i8> %a) {
-; CHECK-LABEL: test_verllb_4095:
-; CHECK: # %bb.0:
-; CHECK-NEXT: verllb %v24, %v24, 4095
-; CHECK-NEXT: br %r14
- %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4095)
- ret <16 x i8> %res
-}
-
-; VERLLB with the largest count + 1.
-define <16 x i8> @test_verllb_4096(<16 x i8> %a) {
-; CHECK-LABEL: test_verllb_4096:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lhi %r1, 4096
-; CHECK-NEXT: verllb %v24, %v24, 0(%r1)
-; CHECK-NEXT: br %r14
- %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4096)
- ret <16 x i8> %res
-}
-
; VERIMB.
define <16 x i8> @test_verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
; CHECK-LABEL: test_verimb:
@@ -1888,7 +1769,7 @@ define void @test_vtm_all_store(<16 x i8> %a, <16 x i8> %b, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vtm %v24, %v26
; CHECK-NEXT: bler %r14
-; CHECK-NEXT: .LBB151_1: # %store
+; CHECK-NEXT: .LBB140_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
%res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b)
@@ -1937,7 +1818,7 @@ define <16 x i8> @test_vceqbs_any_store(<16 x i8> %a, <16 x i8> %b, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vceqbs %v24, %v24, %v26
; CHECK-NEXT: bor %r14
-; CHECK-NEXT: .LBB154_1: # %store
+; CHECK-NEXT: .LBB143_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
@@ -1988,7 +1869,7 @@ define <8 x i16> @test_vceqhs_notall_store(<8 x i16> %a, <8 x i16> %b,
; CHECK: # %bb.0:
; CHECK-NEXT: vceqhs %v24, %v24, %v26
; CHECK-NEXT: ber %r14
-; CHECK-NEXT: .LBB157_1: # %store
+; CHECK-NEXT: .LBB146_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
ptr %ptr) {
@@ -2040,7 +1921,7 @@ define <4 x i32> @test_vceqfs_none_store(<4 x i32> %a, <4 x i32> %b,
; CHECK: # %bb.0:
; CHECK-NEXT: vceqfs %v24, %v24, %v26
; CHECK-NEXT: bler %r14
-; CHECK-NEXT: .LBB160_1: # %store
+; CHECK-NEXT: .LBB149_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
ptr %ptr) {
@@ -2092,7 +1973,7 @@ define <2 x i64> @test_vceqgs_all_store(<2 x i64> %a, <2 x i64> %b, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vceqgs %v24, %v24, %v26
; CHECK-NEXT: bnher %r14
-; CHECK-NEXT: .LBB163_1: # %store
+; CHECK-NEXT: .LBB152_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
%call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
@@ -2143,7 +2024,7 @@ define <16 x i8> @test_vchbs_any_store(<16 x i8> %a, <16 x i8> %b, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vchbs %v24, %v24, %v26
; CHECK-NEXT: bor %r14
-; CHECK-NEXT: .LBB166_1: # %store
+; CHECK-NEXT: .LBB155_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
@@ -2194,7 +2075,7 @@ define <8 x i16> @test_vchhs_notall_store(<8 x i16> %a, <8 x i16> %b,
; CHECK: # %bb.0:
; CHECK-NEXT: vchhs %v24, %v24, %v26
; CHECK-NEXT: ber %r14
-; CHECK-NEXT: .LBB169_1: # %store
+; CHECK-NEXT: .LBB158_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
ptr %ptr) {
@@ -2246,7 +2127,7 @@ define <4 x i32> @test_vchfs_none_store(<4 x i32> %a, <4 x i32> %b, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vchfs %v24, %v24, %v26
; CHECK-NEXT: bler %r14
-; CHECK-NEXT: .LBB172_1: # %store
+; CHECK-NEXT: .LBB161_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
%call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
@@ -2297,7 +2178,7 @@ define <2 x i64> @test_vchgs_all_store(<2 x i64> %a, <2 x i64> %b, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vchgs %v24, %v24, %v26
; CHECK-NEXT: bnher %r14
-; CHECK-NEXT: .LBB175_1: # %store
+; CHECK-NEXT: .LBB164_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
%call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
@@ -2348,7 +2229,7 @@ define <16 x i8> @test_vchlbs_any_store(<16 x i8> %a, <16 x i8> %b, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vchlbs %v24, %v24, %v26
; CHECK-NEXT: bor %r14
-; CHECK-NEXT: .LBB178_1: # %store
+; CHECK-NEXT: .LBB167_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
%call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
@@ -2399,7 +2280,7 @@ define <8 x i16> @test_vchlhs_notall_store(<8 x i16> %a, <8 x i16> %b,
; CHECK: # %bb.0:
; CHECK-NEXT: vchlhs %v24, %v24, %v26
; CHECK-NEXT: ber %r14
-; CHECK-NEXT: .LBB181_1: # %store
+; CHECK-NEXT: .LBB170_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
ptr %ptr) {
@@ -2451,7 +2332,7 @@ define <4 x i32> @test_vchlfs_none_store(<4 x i32> %a, <4 x i32> %b,
; CHECK: # %bb.0:
; CHECK-NEXT: vchlfs %v24, %v24, %v26
; CHECK-NEXT: bler %r14
-; CHECK-NEXT: .LBB184_1: # %store
+; CHECK-NEXT: .LBB173_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
ptr %ptr) {
@@ -2503,7 +2384,7 @@ define <2 x i64> @test_vchlgs_all_store(<2 x i64> %a, <2 x i64> %b, ptr %ptr) {
; CHECK: # %bb.0:
; CHECK-NEXT: vchlgs %v24, %v24, %v26
; CHECK-NEXT: bnher %r14
-; CHECK-NEXT: .LBB187_1: # %store
+; CHECK-NEXT: .LBB176_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
%call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
@@ -3450,7 +3331,7 @@ define <2 x i64> @test_vfcedbs_any_store(<2 x double> %a, <2 x double> %b,
; CHECK: # %bb.0:
; CHECK-NEXT: vfcedbs %v24, %v24, %v26
; CHECK-NEXT: bor %r14
-; CHECK-NEXT: .LBB260_1: # %store
+; CHECK-NEXT: .LBB249_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
ptr %ptr) {
@@ -3505,7 +3386,7 @@ define <2 x i64> @test_vfchdbs_notall_store(<2 x double> %a, <2 x double> %b,
; CHECK: # %bb.0:
; CHECK-NEXT: vfchdbs %v24, %v24, %v26
; CHECK-NEXT: ber %r14
-; CHECK-NEXT: .LBB263_1: # %store
+; CHECK-NEXT: .LBB252_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
ptr %ptr) {
@@ -3560,7 +3441,7 @@ define <2 x i64> @test_vfchedbs_none_store(<2 x double> %a, <2 x double> %b,
; CHECK: # %bb.0:
; CHECK-NEXT: vfchedbs %v24, %v24, %v26
; CHECK-NEXT: bler %r14
-; CHECK-NEXT: .LBB266_1: # %store
+; CHECK-NEXT: .LBB255_1: # %store
; CHECK-NEXT: mvhi 0(%r2), 0
; CHECK-NEXT: br %r14
ptr %ptr) {
diff --git a/llvm/test/CodeGen/SystemZ/vec-rot-01.ll b/llvm/test/CodeGen/SystemZ/vec-rot-01.ll
new file mode 100644
index 0000000000000..fae20350f3caf
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-rot-01.ll
@@ -0,0 +1,123 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; Test vector rotate left instructions with vector rotate amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
+
+; Test a v16i8 rotate left.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val, <16 x i8> %amt) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllvb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+
+ %inv = sub <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8,
+ i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %amt
+ %parta = shl <16 x i8> %val, %amt
+ %partb = lshr <16 x i8> %val, %inv
+
+ %rotl = or <16 x i8> %parta, %partb
+
+ ret <16 x i8> %rotl
+}
+
+; Test a v16i8 rotate left (matched from fshl).
+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val, <16 x i8> %amt) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllvb %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+
+ %rotl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %val, <16 x i8> %val, <16 x i8> %amt)
+
+ ret <16 x i8> %rotl
+}
+
+; Test a v8i16 rotate left.
+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val, <8 x i16> %amt) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllvh %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+
+ %inv = sub <8 x i16> <i16 16, i16 16, i16 16, i16 16,
+ i16 16, i16 16, i16 16, i16 16>, %amt
+ %parta = shl <8 x i16> %val, %amt
+ %partb = lshr <8 x i16> %val, %inv
+
+ %rotl = or <8 x i16> %parta, %partb
+
+ ret <8 x i16> %rotl
+}
+
+; Test a v8i16 rotate left (matched from fshl).
+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val, <8 x i16> %amt) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllvh %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+
+ %rotl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %val, <8 x i16> %val, <8 x i16> %amt)
+
+ ret <8 x i16> %rotl
+}
+
+; Test a v4i32 rotate left.
+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val, <4 x i32> %amt) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllvf %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+
+ %inv = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %amt
+ %parta = shl <4 x i32> %val, %amt
+ %partb = lshr <4 x i32> %val, %inv
+
+ %rotl = or <4 x i32> %parta, %partb
+
+ ret <4 x i32> %rotl
+}
+
+; Test a v4i32 rotate left (matched from fshl).
+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val, <4 x i32> %amt) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllvf %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+
+ %rotl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %val, <4 x i32> %val, <4 x i32> %amt)
+
+ ret <4 x i32> %rotl
+}
+
+; Test a v2i64 rotate left.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val, <2 x i64> %amt) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllvg %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+
+ %inv = sub <2 x i64> <i64 64, i64 64>, %amt
+ %parta = shl <2 x i64> %val, %amt
+ %partb = lshr <2 x i64> %val, %inv
+
+ %rotl = or <2 x i64> %parta, %partb
+
+ ret <2 x i64> %rotl
+}
+
+; Test a v2i64 rotate left (matched from fshl).
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val, <2 x i64> %amt) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllvg %v24, %v26, %v28
+; CHECK-NEXT: br %r14
+
+ %rotl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %val, <2 x i64> %val, <2 x i64> %amt)
+
+ ret <2 x i64> %rotl
+}
diff --git a/llvm/test/CodeGen/SystemZ/vec-rot-02.ll b/llvm/test/CodeGen/SystemZ/vec-rot-02.ll
new file mode 100644
index 0000000000000..1331c6290af17
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-rot-02.ll
@@ -0,0 +1,177 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; Test vector rotate left instructions with scalar rotate amount.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+declare <16 x i8> @llvm.fshl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>)
+declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
+declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>)
+
+; Test a v16i8 rotate left.
+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val, i32 %scalar) {
+; CHECK-LABEL: f1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllb %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %scalar_tmp = trunc i32 %scalar to i8
+ %tmp = insertelement <16 x i8> undef, i8 %scalar_tmp, i32 0
+ %amt = shufflevector <16 x i8> %tmp, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+
+ %inv = sub <16 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8,
+ i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>, %amt
+ %parta = shl <16 x i8> %val, %amt
+ %partb = lshr <16 x i8> %val, %inv
+
+ %rotl = or <16 x i8> %parta, %partb
+
+ ret <16 x i8> %rotl
+}
+
+; Test a v16i8 rotate left (matched from fshl).
+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val, i32 %scalar) {
+; CHECK-LABEL: f2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllb %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %scalar_tmp = trunc i32 %scalar to i8
+ %tmp = insertelement <16 x i8> undef, i8 %scalar_tmp, i32 0
+ %amt = shufflevector <16 x i8> %tmp, <16 x i8> undef,
+ <16 x i32> zeroinitializer
+
+ %rotl = tail call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %val, <16 x i8> %val, <16 x i8> %amt)
+
+ ret <16 x i8> %rotl
+}
+
+; Test a v8i16 rotate left.
+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val, i32 %scalar) {
+; CHECK-LABEL: f3:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllh %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %scalar_tmp = trunc i32 %scalar to i16
+ %tmp = insertelement <8 x i16> undef, i16 %scalar_tmp, i32 0
+ %amt = shufflevector <8 x i16> %tmp, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+
+ %inv = sub <8 x i16> <i16 16, i16 16, i16 16, i16 16,
+ i16 16, i16 16, i16 16, i16 16>, %amt
+ %parta = shl <8 x i16> %val, %amt
+ %partb = lshr <8 x i16> %val, %inv
+
+ %rotl = or <8 x i16> %parta, %partb
+
+ ret <8 x i16> %rotl
+}
+
+; Test a v8i16 rotate left (matched from fshl).
+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val, i32 %scalar) {
+; CHECK-LABEL: f4:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllh %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %scalar_tmp = trunc i32 %scalar to i16
+ %tmp = insertelement <8 x i16> undef, i16 %scalar_tmp, i32 0
+ %amt = shufflevector <8 x i16> %tmp, <8 x i16> undef,
+ <8 x i32> zeroinitializer
+
+ %rotl = tail call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %val, <8 x i16> %val, <8 x i16> %amt)
+
+ ret <8 x i16> %rotl
+}
+
+; Test a v4i32 rotate left.
+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val, i32 %scalar) {
+; CHECK-LABEL: f5:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllf %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %tmp = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %amt = shufflevector <4 x i32> %tmp, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+
+ %inv = sub <4 x i32> <i32 32, i32 32, i32 32, i32 32>, %amt
+ %parta = shl <4 x i32> %val, %amt
+ %partb = lshr <4 x i32> %val, %inv
+
+ %rotl = or <4 x i32> %parta, %partb
+
+ ret <4 x i32> %rotl
+}
+
+; Test a v4i32 rotate left (matched from fshl).
+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val, i32 %scalar) {
+; CHECK-LABEL: f6:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllf %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %tmp = insertelement <4 x i32> undef, i32 %scalar, i32 0
+ %amt = shufflevector <4 x i32> %tmp, <4 x i32> undef,
+ <4 x i32> zeroinitializer
+
+ %rotl = tail call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %val, <4 x i32> %val, <4 x i32> %amt)
+
+ ret <4 x i32> %rotl
+}
+
+; Test a v2i64 rotate left.
+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val, i32 %scalar) {
+; CHECK-LABEL: f7:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllg %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %scalar_tmp = zext i32 %scalar to i64
+ %tmp = insertelement <2 x i64> undef, i64 %scalar_tmp, i32 0
+ %amt = shufflevector <2 x i64> %tmp, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+
+ %inv = sub <2 x i64> <i64 64, i64 64>, %amt
+ %parta = shl <2 x i64> %val, %amt
+ %partb = lshr <2 x i64> %val, %inv
+
+ %rotl = or <2 x i64> %parta, %partb
+
+ ret <2 x i64> %rotl
+}
+
+; Test a v2i64 rotate left (matched from fshl).
+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val, i32 %scalar) {
+; CHECK-LABEL: f8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllg %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %scalar_tmp = zext i32 %scalar to i64
+ %tmp = insertelement <2 x i64> undef, i64 %scalar_tmp, i32 0
+ %amt = shufflevector <2 x i64> %tmp, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+
+ %rotl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %val, <2 x i64> %val, <2 x i64> %amt)
+
+ ret <2 x i64> %rotl
+}
+
+; Test a v2i64 rotate left (matched from fshl).
+define <2 x i64> @f9(<2 x i64> %dummy, <2 x i64> %val, i64 %scalar) {
+; CHECK-LABEL: f9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: verllg %v24, %v26, 0(%r2)
+; CHECK-NEXT: br %r14
+
+ %tmp = insertelement <2 x i64> undef, i64 %scalar, i32 0
+ %amt = shufflevector <2 x i64> %tmp, <2 x i64> undef,
+ <2 x i32> zeroinitializer
+
+ %rotl = tail call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %val, <2 x i64> %val, <2 x i64> %amt)
+
+ ret <2 x i64> %rotl
+}
More information about the cfe-commits
mailing list