[llvm] 88a973c - [AArch64][SVE] Add intrinsics for binary narrowing operations
Andrzej Warzynski via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 20 02:20:38 PST 2019
Author: Andrzej Warzynski
Date: 2019-12-20T10:20:30Z
New Revision: 88a973cf688eb97326ddea731010a7ffc277c257
URL: https://github.com/llvm/llvm-project/commit/88a973cf688eb97326ddea731010a7ffc277c257
DIFF: https://github.com/llvm/llvm-project/commit/88a973cf688eb97326ddea731010a7ffc277c257.diff
LOG: [AArch64][SVE] Add intrinsics for binary narrowing operations
Summary:
The following intrinsics for binary narrowing shift righ operations are
added:
* @llvm.aarch64.sve.shrnb
* @llvm.aarch64.sve.uqshrnb
* @llvm.aarch64.sve.sqshrnb
* @llvm.aarch64.sve.sqshrunb
* @llvm.aarch64.sve.uqrshrnb
* @llvm.aarch64.sve.sqrshrnb
* @llvm.aarch64.sve.sqrshrunb
* @llvm.aarch64.sve.shrnt
* @llvm.aarch64.sve.uqshrnt
* @llvm.aarch64.sve.sqshrnt
* @llvm.aarch64.sve.sqshrunt
* @llvm.aarch64.sve.uqrshrnt
* @llvm.aarch64.sve.sqrshrnt
* @llvm.aarch64.sve.sqrshrunt
Reviewers: sdesmalen, rengolin, efriedma
Reviewed By: efriedma
Subscribers: tschuett, kristof.beyls, hiraditya, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D71552
Added:
llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index ffafe83d25f8..fa1a3c918bec 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1021,6 +1021,17 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
[LLVMSubdivide2VectorType<0>, llvm_anyvector_ty, LLVMMatchType<0>],
[IntrNoMem]>;
+ class SVE2_1VectorArg_Imm_Narrowing_Intrinsic
+ : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ [llvm_anyvector_ty, llvm_i32_ty],
+ [IntrNoMem, ImmArg<1>]>;
+
+ class SVE2_2VectorArg_Imm_Narrowing_Intrinsic
+ : Intrinsic<[LLVMSubdivide2VectorType<0>],
+ [LLVMSubdivide2VectorType<0>, llvm_anyvector_ty,
+ llvm_i32_ty],
+ [IntrNoMem, ImmArg<2>]>;
+
// NOTE: There is no relationship between these intrinsics beyond an attempt
// to reuse currently identical class definitions.
class AdvSIMD_SVE_LOGB_Intrinsic : AdvSIMD_SVE_CNT_Intrinsic;
@@ -1559,4 +1570,32 @@ def int_aarch64_sve_subhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_rsubhnb : SVE2_2VectorArg_Narrowing_Intrinsic;
def int_aarch64_sve_rsubhnt : SVE2_Merged2VectorArg_Narrowing_Intrinsic;
+
+// Narrowing shift right
+def int_aarch64_sve_shrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_shrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+def int_aarch64_sve_rshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_rshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+// Saturating shift right - signed input/output
+def int_aarch64_sve_sqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_sqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+def int_aarch64_sve_sqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_sqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+// Saturating shift right - unsigned input/output
+def int_aarch64_sve_uqshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_uqshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+def int_aarch64_sve_uqrshrnb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_uqrshrnt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+// Saturating shift right - signed input, unsigned output
+def int_aarch64_sve_sqshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_sqshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
+
+def int_aarch64_sve_sqrshrunb : SVE2_1VectorArg_Imm_Narrowing_Intrinsic;
+def int_aarch64_sve_sqrshrunt : SVE2_2VectorArg_Imm_Narrowing_Intrinsic;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index 222365fd7872..4ac52a48b3a1 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -624,6 +624,30 @@ def vecshiftR64Narrow : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm1_32Operand;
}
+// Same as vecshiftR#N, but use TargetConstant (TimmLeaf) instead of Constant
+// (ImmLeaf)
+def tvecshiftR8 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 9);
+}]> {
+ let EncoderMethod = "getVecShiftR8OpValue";
+ let DecoderMethod = "DecodeVecShiftR8Imm";
+ let ParserMatchClass = Imm1_8Operand;
+}
+def tvecshiftR16 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 17);
+}]> {
+ let EncoderMethod = "getVecShiftR16OpValue";
+ let DecoderMethod = "DecodeVecShiftR16Imm";
+ let ParserMatchClass = Imm1_16Operand;
+}
+def tvecshiftR32 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) > 0) && (((uint32_t)Imm) < 33);
+}]> {
+ let EncoderMethod = "getVecShiftR32OpValue";
+ let DecoderMethod = "DecodeVecShiftR32Imm";
+ let ParserMatchClass = Imm1_32Operand;
+}
+
def Imm0_1Operand : AsmImmRange<0, 1>;
def Imm0_7Operand : AsmImmRange<0, 7>;
def Imm0_15Operand : AsmImmRange<0, 15>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 92bc59a3b72c..42d8c36277b0 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1426,24 +1426,24 @@ let Predicates = [HasSVE2] in {
defm SBCLT_ZZZ : sve2_int_addsub_long_carry<0b11, "sbclt">;
// SVE2 bitwise shift right narrow (bottom)
- defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb">;
- defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb">;
- defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb">;
- defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb">;
- defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb">;
- defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb">;
- defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb">;
- defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb">;
+ defm SQSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b000, "sqshrunb", int_aarch64_sve_sqshrunb>;
+ defm SQRSHRUNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b001, "sqrshrunb", int_aarch64_sve_sqrshrunb>;
+ defm SHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b010, "shrnb", int_aarch64_sve_shrnb>;
+ defm RSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b011, "rshrnb", int_aarch64_sve_rshrnb>;
+ defm SQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b100, "sqshrnb", int_aarch64_sve_sqshrnb>;
+ defm SQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b101, "sqrshrnb", int_aarch64_sve_sqrshrnb>;
+ defm UQSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b110, "uqshrnb", int_aarch64_sve_uqshrnb>;
+ defm UQRSHRNB_ZZI : sve2_int_bin_shift_imm_right_narrow_bottom<0b111, "uqrshrnb", int_aarch64_sve_uqrshrnb>;
// SVE2 bitwise shift right narrow (top)
- defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt">;
- defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt">;
- defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt">;
- defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt">;
- defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt">;
- defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt">;
- defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt">;
- defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt">;
+ defm SQSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b000, "sqshrunt", int_aarch64_sve_sqshrunt>;
+ defm SQRSHRUNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b001, "sqrshrunt", int_aarch64_sve_sqrshrunt>;
+ defm SHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b010, "shrnt", int_aarch64_sve_shrnt>;
+ defm RSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b011, "rshrnt", int_aarch64_sve_rshrnt>;
+ defm SQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b100, "sqshrnt", int_aarch64_sve_sqshrnt>;
+ defm SQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b101, "sqrshrnt", int_aarch64_sve_sqrshrnt>;
+ defm UQSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b110, "uqshrnt", int_aarch64_sve_uqshrnt>;
+ defm UQRSHRNT_ZZI : sve2_int_bin_shift_imm_right_narrow_top<0b111, "uqrshrnt", int_aarch64_sve_uqrshrnt>;
// SVE2 integer add/subtract narrow high part (bottom)
defm ADDHNB_ZZZ : sve2_int_addsub_narrow_high_bottom<0b00, "addhnb", int_aarch64_sve_addhnb>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 63214215add7..31823118bffe 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -334,6 +334,11 @@ class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)),
(inst $Op1, $Op2, $Op3, $Op4)>;
+class SVE_2_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, Operand ImmTy, Instruction inst>
+: Pat<(vtd (op vt1:$Op1, (vt2 ImmTy:$Op2))),
+ (inst $Op1, ImmTy:$Op2)>;
+
class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Operand ImmTy,
Instruction inst>
@@ -2965,17 +2970,21 @@ class sve2_int_bin_shift_imm_narrow_bottom<bits<3> tsz8_64, bits<3> opc,
let Inst{4-0} = Zd;
}
-multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm> {
+multiclass sve2_int_bin_shift_imm_right_narrow_bottom<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_bottom<{0,0,1}, opc, asm, ZPR8, ZPR16,
- vecshiftR8>;
+ tvecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_bottom<{0,1,?}, opc, asm, ZPR16, ZPR32,
- vecshiftR16> {
+ tvecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_bottom<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
+ def : SVE_2_Op_Imm_Pat<nxv16i8, op, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Imm_Pat<nxv8i16, op, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Imm_Pat<nxv4i32, op, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
@@ -3001,17 +3010,21 @@ class sve2_int_bin_shift_imm_narrow_top<bits<3> tsz8_64, bits<3> opc,
let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm> {
+multiclass sve2_int_bin_shift_imm_right_narrow_top<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm_narrow_top<{0,0,1}, opc, asm, ZPR8, ZPR16,
- vecshiftR8>;
+ tvecshiftR8>;
def _H : sve2_int_bin_shift_imm_narrow_top<{0,1,?}, opc, asm, ZPR16, ZPR32,
- vecshiftR16> {
+ tvecshiftR16> {
let Inst{19} = imm{3};
}
def _S : sve2_int_bin_shift_imm_narrow_top<{1,?,?}, opc, asm, ZPR32, ZPR64,
vecshiftR32> {
let Inst{20-19} = imm{4-3};
}
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv8i16, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv4i32, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv2i64, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
}
class sve2_int_addsub_narrow_high_bottom<bits<2> sz, bits<2> opc, string asm,
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll
new file mode 100644
index 000000000000..27f7d71c5782
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-binary-narrowing-shr.ll
@@ -0,0 +1,512 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SHRNB
+;
+
+define <vscale x 16 x i8> @shrnb_h(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: shrnb_h:
+; CHECK: shrnb z0.b, z0.h, #8
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shrnb.nxv8i16(<vscale x 8 x i16> %a,
+ i32 8)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @shrnb_s(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: shrnb_s:
+; CHECK: shrnb z0.h, z0.s, #16
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shrnb.nxv4i32(<vscale x 4 x i32> %a,
+ i32 16)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @shrnb_d(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: shrnb_d:
+; CHECK: shrnb z0.s, z0.d, #32
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shrnb.nxv2i64(<vscale x 2 x i64> %a,
+ i32 32)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; UQSHRNB
+;
+
+define <vscale x 16 x i8> @uqshrnb_h(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: uqshrnb_h:
+; CHECK: uqshrnb z0.b, z0.h, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnb.nxv8i16(<vscale x 8 x i16> %a,
+ i32 1)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshrnb_s(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: uqshrnb_s:
+; CHECK: uqshrnb z0.h, z0.s, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnb.nxv4i32(<vscale x 4 x i32> %a,
+ i32 1)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshrnb_d(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: uqshrnb_d:
+; CHECK: uqshrnb z0.s, z0.d, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnb.nxv2i64(<vscale x 2 x i64> %a,
+ i32 1)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SQSHRNB
+;
+
+define <vscale x 16 x i8> @sqshrnb_h(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqshrnb_h:
+; CHECK: sqshrnb z0.b, z0.h, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnb.nxv8i16(<vscale x 8 x i16> %a,
+ i32 1)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshrnb_s(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqshrnb_s:
+; CHECK: sqshrnb z0.h, z0.s, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnb.nxv4i32(<vscale x 4 x i32> %a,
+ i32 1)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshrnb_d(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqshrnb_d:
+; CHECK: sqshrnb z0.s, z0.d, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnb.nxv2i64(<vscale x 2 x i64> %a,
+ i32 1)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SQSHRUNB
+;
+
+define <vscale x 16 x i8> @sqshrunb_h(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: qshrunb_h:
+; CHECK: sqshrunb z0.b, z0.h, #7
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunb.nxv8i16(<vscale x 8 x i16> %a,
+ i32 7)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshrunb_s(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqshrunb_s:
+; CHECK: sqshrunb z0.h, z0.s, #15
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunb.nxv4i32(<vscale x 4 x i32> %a,
+ i32 15)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshrunb_d(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqshrunb_d:
+; CHECK: sqshrunb z0.s, z0.d, #31
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunb.nxv2i64(<vscale x 2 x i64> %a,
+ i32 31)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; UQRSHRNB
+;
+
+define <vscale x 16 x i8> @uqrshrnb_h(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: uqrshrnb_h:
+; CHECK: uqrshrnb z0.b, z0.h, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnb.nxv8i16(<vscale x 8 x i16> %a,
+ i32 2)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqrshrnb_s(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: uqrshrnb_s:
+; CHECK: uqrshrnb z0.h, z0.s, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnb.nxv4i32(<vscale x 4 x i32> %a,
+ i32 2)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqrshrnb_d(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: uqrshrnb_d:
+; CHECK: uqrshrnb z0.s, z0.d, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnb.nxv2i64(<vscale x 2 x i64> %a,
+ i32 2)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SQRSHRNB
+;
+
+define <vscale x 16 x i8> @sqrshrnb_h(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqrshrnb_h:
+; CHECK: sqrshrnb z0.b, z0.h, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnb.nxv8i16(<vscale x 8 x i16> %a,
+ i32 2)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqrshrnb_s(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqrshrnb_s:
+; CHECK: sqrshrnb z0.h, z0.s, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnb.nxv4i32(<vscale x 4 x i32> %a,
+ i32 2)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqrshrnb_d(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqrshrnb_d:
+; CHECK: sqrshrnb z0.s, z0.d, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnb.nxv2i64(<vscale x 2 x i64> %a,
+ i32 2)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SQRSHRUNB
+;
+
+define <vscale x 16 x i8> @sqrshrunb_h(<vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqrshrunb_h:
+; CHECK: sqrshrunb z0.b, z0.h, #6
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunb.nxv8i16(<vscale x 8 x i16> %a,
+ i32 6)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqrshrunb_s(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqrshrunb_s:
+; CHECK: sqrshrunb z0.h, z0.s, #14
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunb.nxv4i32(<vscale x 4 x i32> %a,
+ i32 14)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqrshrunb_d(<vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqrshrunb_d:
+; CHECK: sqrshrunb z0.s, z0.d, #30
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunb.nxv2i64(<vscale x 2 x i64> %a,
+ i32 30)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SHRNT
+;
+
+define <vscale x 16 x i8> @shrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: shrnt_h:
+; CHECK: shrnt z0.b, z1.h, #3
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shrnt.nxv8i16(<vscale x 16 x i8> %a,
+ <vscale x 8 x i16> %b,
+ i32 3)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @shrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: shrnt_s:
+; CHECK: shrnt z0.h, z1.s, #3
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shrnt.nxv4i32(<vscale x 8 x i16> %a,
+ <vscale x 4 x i32> %b,
+ i32 3)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @shrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: shrnt_d:
+; CHECK: shrnt z0.s, z1.d, #3
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shrnt.nxv2i64(<vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b,
+ i32 3)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; UQSHRNT
+;
+
+define <vscale x 16 x i8> @uqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqshrnt_h:
+; CHECK: uqshrnt z0.b, z1.h, #5
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnt.nxv8i16(<vscale x 16 x i8> %a,
+ <vscale x 8 x i16> %b,
+ i32 5)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqshrnt_s:
+; CHECK: uqshrnt z0.h, z1.s, #13
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnt.nxv4i32(<vscale x 8 x i16> %a,
+ <vscale x 4 x i32> %b,
+ i32 13)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqshrnt_d:
+; CHECK: uqshrnt z0.s, z1.d, #29
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnt.nxv2i64(<vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b,
+ i32 29)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SQSHRNT
+;
+
+define <vscale x 16 x i8> @sqshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqshrnt_h:
+; CHECK: sqshrnt z0.b, z1.h, #5
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnt.nxv8i16(<vscale x 16 x i8> %a,
+ <vscale x 8 x i16> %b,
+ i32 5)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqshrnt_s:
+; CHECK: sqshrnt z0.h, z1.s, #13
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnt.nxv4i32(<vscale x 8 x i16> %a,
+ <vscale x 4 x i32> %b,
+ i32 13)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqshrnt_d:
+; CHECK: sqshrnt z0.s, z1.d, #29
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnt.nxv2i64(<vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b,
+ i32 29)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SQSHRUNT
+;
+
+define <vscale x 16 x i8> @sqshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqshrunt_h:
+; CHECK: sqshrunt z0.b, z1.h, #4
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunt.nxv8i16(<vscale x 16 x i8> %a,
+ <vscale x 8 x i16> %b,
+ i32 4)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqshrunt_s:
+; CHECK: sqshrunt z0.h, z1.s, #4
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunt.nxv4i32(<vscale x 8 x i16> %a,
+ <vscale x 4 x i32> %b,
+ i32 4)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqshrunt_d:
+; CHECK: sqshrunt z0.s, z1.d, #4
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunt.nxv2i64(<vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b,
+ i32 4)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; UQRSHRNT
+;
+
+define <vscale x 16 x i8> @uqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqrshrnt_h:
+; CHECK: uqrshrnt z0.b, z1.h, #8
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnt.nxv8i16(<vscale x 16 x i8> %a,
+ <vscale x 8 x i16> %b,
+ i32 8)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqrshrnt_s:
+; CHECK: uqrshrnt z0.h, z1.s, #12
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnt.nxv4i32(<vscale x 8 x i16> %a,
+ <vscale x 4 x i32> %b,
+ i32 12)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqrshrnt_d:
+; CHECK: uqrshrnt z0.s, z1.d, #28
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnt.nxv2i64(<vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b,
+ i32 28)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SQRSHRNT
+;
+
+define <vscale x 16 x i8> @sqrshrnt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqrshrnt_h:
+; CHECK: sqrshrnt z0.b, z1.h, #8
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnt.nxv8i16(<vscale x 16 x i8> %a,
+ <vscale x 8 x i16> %b,
+ i32 8)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqrshrnt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqrshrnt_s:
+; CHECK: sqrshrnt z0.h, z1.s, #12
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnt.nxv4i32(<vscale x 8 x i16> %a,
+ <vscale x 4 x i32> %b,
+ i32 12)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqrshrnt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqrshrnt_d:
+; CHECK: sqrshrnt z0.s, z1.d, #28
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnt.nxv2i64(<vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b,
+ i32 28)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; SQRSHRUNT
+;
+
+define <vscale x 16 x i8> @sqrshrunt_h(<vscale x 16 x i8> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqrshrunt_h:
+; CHECK: sqrshrunt z0.b, z1.h, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunt.nxv8i16(<vscale x 16 x i8> %a,
+ <vscale x 8 x i16> %b,
+ i32 1)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqrshrunt_s(<vscale x 8 x i16> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqrshrunt_s:
+; CHECK: sqrshrunt z0.h, z1.s, #5
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunt.nxv4i32(<vscale x 8 x i16> %a,
+ <vscale x 4 x i32> %b,
+ i32 5)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqrshrunt_d(<vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqrshrunt_d:
+; CHECK: sqrshrunt z0.s, z1.d, #5
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunt.nxv2i64(<vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b,
+ i32 5)
+ ret <vscale x 4 x i32> %out
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.shrnb.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.shrnb.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shrnb.nxv2i64(<vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnb.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnb.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnb.nxv2i64(<vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnb.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnb.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnb.nxv2i64(<vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnb.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnb.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnb.nxv2i64(<vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnb.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnb.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnb.nxv2i64(<vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunb.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunb.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunb.nxv2i64(<vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunb.nxv8i16(<vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunb.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunb.nxv2i64(<vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.shrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.shrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshrunt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshrunt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshrunt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrnt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrnt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrnt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshrunt.nxv8i16(<vscale x 16 x i8>, <vscale x 8 x i16>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshrunt.nxv4i32(<vscale x 8 x i16>, <vscale x 4 x i32>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshrunt.nxv2i64(<vscale x 4 x i32>, <vscale x 2 x i64>, i32)
More information about the llvm-commits
mailing list