[llvm] 69558c8 - [AArch64][SVE] Add remaining SVE2 intrinsics for uniform DSP operations
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 02:53:00 PST 2020
Author: Kerry McLaughlin
Date: 2020-01-31T10:51:57Z
New Revision: 69558c84871d746e4a6c4d19a0b228cab629ef3a
URL: https://github.com/llvm/llvm-project/commit/69558c84871d746e4a6c4d19a0b228cab629ef3a
DIFF: https://github.com/llvm/llvm-project/commit/69558c84871d746e4a6c4d19a0b228cab629ef3a.diff
LOG: [AArch64][SVE] Add remaining SVE2 intrinsics for uniform DSP operations
Summary:
Implements the following intrinsics:
- @llvm.aarch64.sve.[s|u]qadd
- @llvm.aarch64.sve.[s|u]qsub
- @llvm.aarch64.sve.suqadd
- @llvm.aarch64.sve.usqadd
- @llvm.aarch64.sve.[s|u]qsubr
- @llvm.aarch64.sve.[s|u]rshl
- @llvm.aarch64.sve.[s|u]qshl
- @llvm.aarch64.sve.[s|u]qrshl
- @llvm.aarch64.sve.[s|u]rshr
- @llvm.aarch64.sve.sqshlu
- @llvm.aarch64.sve.sri
- @llvm.aarch64.sve.sli
- @llvm.aarch64.sve.[s|u]sra
- @llvm.aarch64.sve.[s|u]rsra
- @llvm.aarch64.sve.[s|u]aba
Reviewers: efriedma, sdesmalen, dancgr, cameron.mcinally, c-rhodes, rengolin
Reviewed By: sdesmalen
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D73551
Added:
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64InstrFormats.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 91a9b2454e61..0e51e2990fe4 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1675,10 +1675,13 @@ def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VectorBase_
// SVE2 - Uniform DSP operations
//
+def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic;
def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sli : AdvSIMD_2VectorArgIndexed_Intrinsic;
def int_aarch64_sve_sqabs : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_sqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_sqdmulh : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_sqdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
def int_aarch64_sve_sqneg : AdvSIMD_Merged1VectorArg_Intrinsic;
@@ -1688,13 +1691,35 @@ def int_aarch64_sve_sqrdmlsh : AdvSIMD_3VectorArg_Intrinsic;
def int_aarch64_sve_sqrdmlsh_lane : AdvSIMD_3VectorArgIndexed_Intrinsic;
def int_aarch64_sve_sqrdmulh : AdvSIMD_2VectorArg_Intrinsic;
def int_aarch64_sve_sqrdmulh_lane : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_sqrshl : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sqshl : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sqshlu : AdvSIMD_SVE_ShiftByImm_Intrinsic;
+def int_aarch64_sve_sqsub : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sqsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_srhadd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sri : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_srshl : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_srshr : AdvSIMD_SVE_ShiftByImm_Intrinsic;
+def int_aarch64_sve_srsra : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_ssra : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_suqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uaba : AdvSIMD_3VectorArg_Intrinsic;
def int_aarch64_sve_uhadd : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uqshl : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uqsub : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uqsubr : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_urecpe : AdvSIMD_Merged1VectorArg_Intrinsic;
def int_aarch64_sve_urhadd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_urshl : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_urshr : AdvSIMD_SVE_ShiftByImm_Intrinsic;
def int_aarch64_sve_ursqrte : AdvSIMD_Merged1VectorArg_Intrinsic;
+def int_aarch64_sve_ursra : AdvSIMD_2VectorArgIndexed_Intrinsic;
+def int_aarch64_sve_usqadd : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_usra : AdvSIMD_2VectorArgIndexed_Intrinsic;
//
// SVE2 - Non-widening pairwise arithmetic
diff --git a/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
index f2f7e1768596..05c657b2bed5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrFormats.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrFormats.td
@@ -698,6 +698,37 @@ def vecshiftL64 : Operand<i32>, ImmLeaf<i32, [{
let ParserMatchClass = Imm0_63Operand;
}
+// Same as vecshiftL#N, but use TargetConstant (TimmLeaf) instead of Constant
+// (ImmLeaf)
+def tvecshiftL8 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 8);
+}]> {
+ let EncoderMethod = "getVecShiftL8OpValue";
+ let DecoderMethod = "DecodeVecShiftL8Imm";
+ let ParserMatchClass = Imm0_7Operand;
+}
+def tvecshiftL16 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 16);
+}]> {
+ let EncoderMethod = "getVecShiftL16OpValue";
+ let DecoderMethod = "DecodeVecShiftL16Imm";
+ let ParserMatchClass = Imm0_15Operand;
+}
+def tvecshiftL32 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 32);
+}]> {
+ let EncoderMethod = "getVecShiftL32OpValue";
+ let DecoderMethod = "DecodeVecShiftL32Imm";
+ let ParserMatchClass = Imm0_31Operand;
+}
+def tvecshiftL64 : Operand<i32>, TImmLeaf<i32, [{
+ return (((uint32_t)Imm) < 64);
+}]> {
+ let EncoderMethod = "getVecShiftL64OpValue";
+ let DecoderMethod = "DecodeVecShiftL64Imm";
+ let ParserMatchClass = Imm0_63Operand;
+}
+
// Crazy immediate formats used by 32-bit and 64-bit logical immediate
// instructions for splatting repeating bit patterns across the immediate.
def logical_imm32_XFORM : SDNodeXForm<imm, [{
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index c2e607a11536..13b063585a44 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1530,35 +1530,35 @@ let Predicates = [HasSVE2] in {
defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg", int_aarch64_sve_sqneg>;
// SVE2 saturating add/subtract
- defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", null_frag>;
- defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", null_frag>;
- defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", null_frag>;
- defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", null_frag>;
- defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", null_frag>;
- defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", null_frag>;
- defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", null_frag>;
- defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", null_frag>;
+ defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", int_aarch64_sve_sqadd>;
+ defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", int_aarch64_sve_uqadd>;
+ defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", int_aarch64_sve_sqsub>;
+ defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", int_aarch64_sve_uqsub>;
+ defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", int_aarch64_sve_suqadd>;
+ defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", int_aarch64_sve_usqadd>;
+ defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", int_aarch64_sve_sqsubr>;
+ defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>;
// SVE2 saturating/rounding bitwise shift left (predicated)
- defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", null_frag>;
- defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", null_frag>;
+ defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl>;
+ defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl>;
defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>;
defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>;
- defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", null_frag>;
- defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", null_frag>;
- defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", null_frag>;
- defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", null_frag>;
+ defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl>;
+ defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl>;
+ defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl>;
+ defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl>;
defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>;
defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>;
defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>;
defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>;
// SVE2 predicated shifts
- defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
- defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
- defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr">;
- defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr">;
- defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu">;
+ defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
+ defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl">;
+ defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", int_aarch64_sve_srshr>;
+ defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", int_aarch64_sve_urshr>;
+ defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", int_aarch64_sve_sqshlu>;
// SVE2 integer add/subtract long
defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb">;
@@ -1595,22 +1595,22 @@ let Predicates = [HasSVE2] in {
defm PMULLT_ZZZ : sve2_pmul_long<0b1, "pmullt">;
// SVE2 bitwise shift and insert
- defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri">;
- defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli">;
+ defm SRI_ZZI : sve2_int_bin_shift_imm_right<0b0, "sri", int_aarch64_sve_sri>;
+ defm SLI_ZZI : sve2_int_bin_shift_imm_left< 0b1, "sli", int_aarch64_sve_sli>;
// SVE2 bitwise shift right and accumulate
- defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra">;
- defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra">;
- defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra">;
- defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra">;
+ defm SSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b00, "ssra", int_aarch64_sve_ssra>;
+ defm USRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b01, "usra", int_aarch64_sve_usra>;
+ defm SRSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b10, "srsra", int_aarch64_sve_srsra>;
+ defm URSRA_ZZI : sve2_int_bin_accum_shift_imm_right<0b11, "ursra", int_aarch64_sve_ursra>;
// SVE2 complex integer add
defm CADD_ZZI : sve2_int_cadd<0b0, "cadd">;
defm SQCADD_ZZI : sve2_int_cadd<0b1, "sqcadd">;
// SVE2 integer absolute
diff erence and accumulate
- defm SABA_ZZZ : sve2_int_abs
diff _accum<0b0, "saba">;
- defm UABA_ZZZ : sve2_int_abs
diff _accum<0b1, "uaba">;
+ defm SABA_ZZZ : sve2_int_abs
diff _accum<0b0, "saba", int_aarch64_sve_saba>;
+ defm UABA_ZZZ : sve2_int_abs
diff _accum<0b1, "uaba", int_aarch64_sve_uaba>;
// SVE2 integer absolute
diff erence and accumulate long
defm SABALB_ZZZ : sve2_int_abs
diff _accum_long<0b00, "sabalb">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 76ef6042a88a..a3043917196c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2988,7 +2988,8 @@ class sve2_int_bin_shift_imm<bits<4> tsz8_64, bit opc, string asm,
let Constraints = "$Zd = $_Zd";
}
-multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
+multiclass sve2_int_bin_shift_imm_left<bit opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{19} = imm{3};
@@ -3000,9 +3001,15 @@ multiclass sve2_int_bin_shift_imm_left<bit opc, string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
+multiclass sve2_int_bin_shift_imm_right<bit opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
@@ -3014,6 +3021,11 @@ multiclass sve2_int_bin_shift_imm_right<bit opc, string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
@@ -3039,7 +3051,8 @@ class sve2_int_bin_accum_shift_imm<bits<4> tsz8_64, bits<2> opc, string asm,
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
+multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm,
+ SDPatternOperator op> {
def _B : sve2_int_bin_accum_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
def _H : sve2_int_bin_accum_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{19} = imm{3};
@@ -3051,6 +3064,11 @@ multiclass sve2_int_bin_accum_shift_imm_right<bits<2> opc, string asm> {
let Inst{22} = imm{5};
let Inst{20-19} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i8, nxv16i8, i32, tvecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i16, nxv8i16, i32, tvecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i32, nxv4i32, i32, tvecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i64, nxv2i64, i32, tvecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_cadd<bits<2> sz, bit opc, string asm, ZPRRegOp zprty>
@@ -3101,11 +3119,16 @@ class sve2_int_abs
diff _accum<bits<2> sz, bits<4> opc, string asm,
let ElementSize = ElementSizeNone;
}
-multiclass sve2_int_abs
diff _accum<bit opc, string asm> {
+multiclass sve2_int_abs
diff _accum<bit opc, string asm, SDPatternOperator op> {
def _B : sve2_int_abs
diff _accum<0b00, { 0b111, opc }, asm, ZPR8, ZPR8>;
def _H : sve2_int_abs
diff _accum<0b01, { 0b111, opc }, asm, ZPR16, ZPR16>;
def _S : sve2_int_abs
diff _accum<0b10, { 0b111, opc }, asm, ZPR32, ZPR32>;
def _D : sve2_int_abs
diff _accum<0b11, { 0b111, opc }, asm, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i8, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve2_int_abs
diff _accum_long<bits<2> opc, string asm> {
@@ -4391,8 +4414,7 @@ multiclass sve_int_index_rr<string asm> {
// SVE Bitwise Shift - Predicated Group
//===----------------------------------------------------------------------===//
class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
- ZPRRegOp zprty, Operand immtype,
- ElementSizeEnum size>
+ ZPRRegOp zprty, Operand immtype>
: I<(outs zprty:$Zdn), (ins PPR3bAny:$Pg, zprty:$_Zdn, immtype:$imm),
asm, "\t$Zdn, $Pg/m, $_Zdn, $imm",
"",
@@ -4412,41 +4434,53 @@ class sve_int_bin_pred_shift_imm<bits<4> tsz8_64, bits<4> opc, string asm,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = DestructiveOther;
- let ElementSize = size;
+ let ElementSize = zprty.ElementSize;
}
multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8,
- ElementSizeB>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16,
- ElementSizeH> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32,
- ElementSizeS> {
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64,
- ElementSizeD> {
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
}
+multiclass sve2_int_bin_pred_shift_imm_left<bits<4> opc, string asm,
+ SDPatternOperator op> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+ let Inst{8} = imm{3};
+ }
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+ let Inst{9-8} = imm{4-3};
+ }
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+ let Inst{22} = imm{5};
+ let Inst{9-8} = imm{4-3};
+ }
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, tvecshiftL64, !cast<Instruction>(NAME # _D)>;
+}
+
multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm,
SDPatternOperator op = null_frag> {
- def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
- ElementSizeB>;
- def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
- ElementSizeH> {
+ def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8>;
+ def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16> {
let Inst{8} = imm{3};
}
- def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32,
- ElementSizeS> {
+ def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftR32> {
let Inst{9-8} = imm{4-3};
}
- def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64,
- ElementSizeD> {
+ def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftR64> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
index 82f30ab93348..2151eed64c83 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
@@ -1,5 +1,49 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+;
+; SABA
+;
+
+define <vscale x 16 x i8> @saba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: saba_i8:
+; CHECK: saba z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %c)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @saba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+; CHECK-LABEL: saba_i16:
+; CHECK: saba z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %c)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @saba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+; CHECK-LABEL: saba_i32:
+; CHECK: saba z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %c)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @saba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+; CHECK-LABEL: saba_i64:
+; CHECK: saba z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %c)
+ ret <vscale x 2 x i64> %out
+}
+
;
; SHADD
;
@@ -132,6 +176,50 @@ define <vscale x 2 x i64> @shsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
ret <vscale x 2 x i64> %out
}
+;
+; SLI
+;
+
+define <vscale x 16 x i8> @sli_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sli_i8:
+; CHECK: sli z0.b, z1.b, #0
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ i32 0)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sli_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sli_i16:
+; CHECK: sli z0.h, z1.h, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ i32 1)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sli_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sli_i32:
+; CHECK: sli z0.s, z1.s, #30
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ i32 30);
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sli_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sli_i64:
+; CHECK: sli z0.d, z1.d, #63
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b,
+ i32 63)
+ ret <vscale x 2 x i64> %out
+}
+
;
; SQABS
;
@@ -176,6 +264,50 @@ define <vscale x 2 x i64> @sqabs_i64(<vscale x 2 x i64> %a, <vscale x 2 x i1> %p
ret <vscale x 2 x i64> %out
}
+;
+; SQADD
+;
+
+define <vscale x 16 x i8> @sqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqadd_i8:
+; CHECK: sqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqadd_i16:
+; CHECK: sqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqadd_i32:
+; CHECK: sqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqadd_i64:
+; CHECK: sqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
;
; SQDMULH (Vector)
;
@@ -531,281 +663,1264 @@ define <vscale x 2 x i64> @sqrdmulh_lane_i64(<vscale x 2 x i64> %a, <vscale x 2
}
;
-; SRHADD
+; SQRSHL
;
-define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: srhadd_i8:
-; CHECK: srhadd z0.b, p0/m, z0.b, z1.b
+define <vscale x 16 x i8> @sqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqrshl_i8:
+; CHECK: sqrshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1> %pg,
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %out
}
-define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: srhadd_i16:
-; CHECK: srhadd z0.h, p0/m, z0.h, z1.h
+define <vscale x 8 x i16> @sqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqrshl_i16:
+; CHECK: sqrshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1> %pg,
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %out
}
-define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: srhadd_i32:
-; CHECK: srhadd z0.s, p0/m, z0.s, z1.s
+define <vscale x 4 x i32> @sqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqrshl_i32:
+; CHECK: sqrshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> %pg,
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %out
}
-define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: srhadd_i64:
-; CHECK: srhadd z0.d, p0/m, z0.d, z1.d
+define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqrshl_i64:
+; CHECK: sqrshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1> %pg,
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}
;
-; UHADD
+; SQSHL (Vectors)
;
-define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: uhadd_i8:
-; CHECK: uhadd z0.b, p0/m, z0.b, z1.b
+define <vscale x 16 x i8> @sqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqshl_i8:
+; CHECK: sqshl z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1> %pg,
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %out
}
-define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: uhadd_i16:
-; CHECK: uhadd z0.h, p0/m, z0.h, z1.h
+define <vscale x 8 x i16> @sqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqshl_i16:
+; CHECK: sqshl z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1> %pg,
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %out
}
-define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: uhadd_i32:
-; CHECK: uhadd z0.s, p0/m, z0.s, z1.s
+define <vscale x 4 x i32> @sqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqshl_i32:
+; CHECK: sqshl z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> %pg,
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %out
}
-define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: uhadd_i64:
-; CHECK: uhadd z0.d, p0/m, z0.d, z1.d
+define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqshl_i64:
+; CHECK: sqshl z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1> %pg,
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}
;
-; UHSUB
+; SQSHLU
;
-define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: uhsub_i8:
-; CHECK: uhsub z0.b, p0/m, z0.b, z1.b
+define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqshlu_i8:
+; CHECK: sqshlu z0.b, p0/m, z0.b, #2
; CHECK-NEXT: ret
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> %pg,
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ i32 2)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqshlu_i16:
+; CHECK: sqshlu z0.h, p0/m, z0.h, #3
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ i32 3)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqshlu_i32:
+; CHECK: sqshlu z0.s, p0/m, z0.s, #29
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ i32 29)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqshlu_i64:
+; CHECK: sqshlu z0.d, p0/m, z0.d, #62
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ i32 62)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SQSUB
+;
+
+define <vscale x 16 x i8> @sqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqsub_i8:
+; CHECK: sqsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %out
}
-define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: uhsub_i16:
-; CHECK: uhsub z0.h, p0/m, z0.h, z1.h
+define <vscale x 8 x i16> @sqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqsub_i16:
+; CHECK: sqsub z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> %pg,
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %out
}
-define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: uhsub_i32:
-; CHECK: uhsub z0.s, p0/m, z0.s, z1.s
+define <vscale x 4 x i32> @sqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqsub_i32:
+; CHECK: sqsub z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> %pg,
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %out
}
-define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: uhsub_i64:
-; CHECK: uhsub z0.d, p0/m, z0.d, z1.d
+define <vscale x 2 x i64> @sqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqsub_i64:
+; CHECK: sqsub z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> %pg,
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}
;
-; UHSUBR
+; SQSUBR
;
-define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: uhsubr_i8:
-; CHECK: uhsubr z0.b, p0/m, z0.b, z1.b
+define <vscale x 16 x i8> @sqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqsubr_i8:
+; CHECK: sqsubr z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> %pg,
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %out
}
-define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: uhsubr_i16:
-; CHECK: uhsubr z0.h, p0/m, z0.h, z1.h
+define <vscale x 8 x i16> @sqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqsubr_i16:
+; CHECK: sqsubr z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> %pg,
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %out
}
-define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: uhsubr_i32:
-; CHECK: uhsubr z0.s, p0/m, z0.s, z1.s
+define <vscale x 4 x i32> @sqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqsubr_i32:
+; CHECK: sqsubr z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> %pg,
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %out
}
-define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: uhsubr_i64:
-; CHECK: uhsubr z0.d, p0/m, z0.d, z1.d
+define <vscale x 2 x i64> @sqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqsubr_i64:
+; CHECK: sqsubr z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> %pg,
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1> %pg,
<vscale x 2 x i64> %a,
<vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}
;
-; URECPE
-;
-
-define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: urecpe_i32:
-; CHECK: urecpe z0.s, p0/m, z1.s
-; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a,
- <vscale x 4 x i1> %pg,
- <vscale x 4 x i32> %b)
- ret <vscale x 4 x i32> %out
-}
-
-;
-; URHADD
+; SRHADD
;
-define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
-; CHECK-LABEL: urhadd_i8:
-; CHECK: urhadd z0.b, p0/m, z0.b, z1.b
+define <vscale x 16 x i8> @srhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: srhadd_i8:
+; CHECK: srhadd z0.b, p0/m, z0.b, z1.b
; CHECK-NEXT: ret
- %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1> %pg,
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1> %pg,
<vscale x 16 x i8> %a,
<vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %out
}
-define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
-; CHECK-LABEL: urhadd_i16:
-; CHECK: urhadd z0.h, p0/m, z0.h, z1.h
+define <vscale x 8 x i16> @srhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: srhadd_i16:
+; CHECK: srhadd z0.h, p0/m, z0.h, z1.h
; CHECK-NEXT: ret
- %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1> %pg,
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1> %pg,
<vscale x 8 x i16> %a,
<vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %out
}
-define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: urhadd_i32:
-; CHECK: urhadd z0.s, p0/m, z0.s, z1.s
+define <vscale x 4 x i32> @srhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: srhadd_i32:
+; CHECK: srhadd z0.s, p0/m, z0.s, z1.s
; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> %pg,
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1> %pg,
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %out
}
-define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
-; CHECK-LABEL: urhadd_i64:
-; CHECK: urhadd z0.d, p0/m, z0.d, z1.d
+define <vscale x 2 x i64> @srhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: srhadd_i64:
+; CHECK: srhadd z0.d, p0/m, z0.d, z1.d
; CHECK-NEXT: ret
- %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1> %pg,
- <vscale x 2 x i64> %a,
- <vscale x 2 x i64> %b)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %out
}
;
-; URSQRTE
+; SRI
;
-define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
-; CHECK-LABEL: ursqrte_i32:
-; CHECK: ursqrte z0.s, p0/m, z1.s
+define <vscale x 16 x i8> @sri_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sri_i8:
+; CHECK: sri z0.b, z1.b, #1
; CHECK-NEXT: ret
- %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a,
- <vscale x 4 x i1> %pg,
- <vscale x 4 x i32> %b)
- ret <vscale x 4 x i32> %out
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ i32 1)
+ ret <vscale x 16 x i8> %out
}
-declare <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 8 x i16> @sri_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sri_i16:
+; CHECK: sri z0.h, z1.h, #16
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ i32 16)
+ ret <vscale x 8 x i16> %out
+}
-declare <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 4 x i32> @sri_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sri_i32:
+; CHECK: sri z0.s, z1.s, #32
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ i32 32);
+ ret <vscale x 4 x i32> %out
+}
-declare <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 2 x i64> @sri_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sri_i64:
+; CHECK: sri z0.d, z1.d, #64
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b,
+ i32 64)
+ ret <vscale x 2 x i64> %out
+}
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+;
+; SRSHL
+;
-declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+define <vscale x 16 x i8> @srshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: srshl_i8:
+; CHECK: srshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
-declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
-declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
-declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+define <vscale x 8 x i16> @srshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: srshl_i16:
+; CHECK: srshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @srshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: srshl_i32:
+; CHECK: srshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: srshl_i64:
+; CHECK: srshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SRSHR
+;
+
+define <vscale x 16 x i8> @srshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: srshr_i8:
+; CHECK: srshr z0.b, p0/m, z0.b, #8
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ i32 8)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @srshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: srshr_i16:
+; CHECK: srshr z0.h, p0/m, z0.h, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ i32 1)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @srshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: srshr_i32:
+; CHECK: srshr z0.s, p0/m, z0.s, #22
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ i32 22)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @srshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: srshr_i64:
+; CHECK: srshr z0.d, p0/m, z0.d, #54
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ i32 54)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SRSRA
+;
+
+define <vscale x 16 x i8> @srsra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: srsra_i8:
+; CHECK: srsra z0.b, z1.b, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ i32 2)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @srsra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: srsra_i16:
+; CHECK: srsra z0.h, z1.h, #15
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ i32 15)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @srsra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: srsra_i32:
+; CHECK: srsra z0.s, z1.s, #12
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ i32 12)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @srsra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: srsra_i64:
+; CHECK: srsra z0.d, z1.d, #44
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b,
+ i32 44)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SSRA
+;
+
+define <vscale x 16 x i8> @ssra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: ssra_i8:
+; CHECK: ssra z0.b, z1.b, #3
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ i32 3)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @ssra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: ssra_i16:
+; CHECK: ssra z0.h, z1.h, #14
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ i32 14)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @ssra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: ssra_i32:
+; CHECK: ssra z0.s, z1.s, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ i32 2)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @ssra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: ssra_i64:
+; CHECK: ssra z0.d, z1.d, #34
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b,
+ i32 34)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SUQADD
+;
+
+define <vscale x 16 x i8> @suqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: suqadd_i8:
+; CHECK: suqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @suqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: suqadd_i16:
+; CHECK: suqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @suqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: suqadd_i32:
+; CHECK: suqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @suqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: suqadd_i64:
+; CHECK: suqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UABA
+;
+
+define <vscale x 16 x i8> @uaba_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
+; CHECK-LABEL: uaba_i8:
+; CHECK: uaba z0.b, z1.b, z2.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %c)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uaba_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
+; CHECK-LABEL: uaba_i16:
+; CHECK: uaba z0.h, z1.h, z2.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %c)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uaba_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
+; CHECK-LABEL: uaba_i32:
+; CHECK: uaba z0.s, z1.s, z2.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %c)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uaba_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
+; CHECK-LABEL: uaba_i64:
+; CHECK: uaba z0.d, z1.d, z2.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %c)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UHADD
+;
+
+define <vscale x 16 x i8> @uhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uhadd_i8:
+; CHECK: uhadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uhadd_i16:
+; CHECK: uhadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uhadd_i32:
+; CHECK: uhadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uhadd_i64:
+; CHECK: uhadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhadd.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UHSUB
+;
+
+define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uhsub_i8:
+; CHECK: uhsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uhsub_i16:
+; CHECK: uhsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uhsub_i32:
+; CHECK: uhsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uhsub_i64:
+; CHECK: uhsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UHSUBR
+;
+
+define <vscale x 16 x i8> @uhsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uhsubr_i8:
+; CHECK: uhsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uhsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uhsubr_i16:
+; CHECK: uhsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uhsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uhsubr_i32:
+; CHECK: uhsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uhsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uhsubr_i64:
+; CHECK: uhsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQADD
+;
+
+define <vscale x 16 x i8> @uqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqadd_i8:
+; CHECK: uqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqadd_i16:
+; CHECK: uqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqadd_i32:
+; CHECK: uqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqadd_i64:
+; CHECK: uqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQRSHL
+;
+
+define <vscale x 16 x i8> @uqrshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqrshl_i8:
+; CHECK: uqrshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqrshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqrshl_i16:
+; CHECK: uqrshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqrshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqrshl_i32:
+; CHECK: uqrshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqrshl_i64:
+; CHECK: uqrshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQSHL (Vectors)
+;
+
+define <vscale x 16 x i8> @uqshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqshl_i8:
+; CHECK: uqshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqshl_i16:
+; CHECK: uqshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqshl_i32:
+; CHECK: uqshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqshl_i64:
+; CHECK: uqshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQSUB
+;
+
+define <vscale x 16 x i8> @uqsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqsub_i8:
+; CHECK: uqsub z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqsub_i16:
+; CHECK: uqsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqsub_i32:
+; CHECK: uqsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqsub_i64:
+; CHECK: uqsub z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UQSUBR
+;
+
+define <vscale x 16 x i8> @uqsubr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqsubr_i8:
+; CHECK: uqsubr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqsubr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqsubr_i16:
+; CHECK: uqsubr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqsubr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqsubr_i32:
+; CHECK: uqsubr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqsubr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqsubr_i64:
+; CHECK: uqsubr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; URECPE
+;
+
+define <vscale x 4 x i32> @urecpe_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: urecpe_i32:
+; CHECK: urecpe z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; URHADD
+;
+
+define <vscale x 16 x i8> @urhadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: urhadd_i8:
+; CHECK: urhadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @urhadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: urhadd_i16:
+; CHECK: urhadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @urhadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: urhadd_i32:
+; CHECK: urhadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @urhadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: urhadd_i64:
+; CHECK: urhadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; URSHL
+;
+
+define <vscale x 16 x i8> @urshl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: urshl_i8:
+; CHECK: urshl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @urshl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: urshl_i16:
+; CHECK: urshl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @urshl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: urshl_i32:
+; CHECK: urshl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: urshl_i64:
+; CHECK: urshl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; URSHR
+;
+
+define <vscale x 16 x i8> @urshr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: urshr_i8:
+; CHECK: urshr z0.b, p0/m, z0.b, #4
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ i32 4)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @urshr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: urshr_i16:
+; CHECK: urshr z0.h, p0/m, z0.h, #13
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ i32 13)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @urshr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: urshr_i32:
+; CHECK: urshr z0.s, p0/m, z0.s, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ i32 1)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @urshr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: urshr_i64:
+; CHECK: urshr z0.d, p0/m, z0.d, #24
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ i32 24)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; URSQRTE
+;
+
+define <vscale x 4 x i32> @ursqrte_i32(<vscale x 4 x i32> %a, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: ursqrte_i32:
+; CHECK: ursqrte z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; URSRA
+;
+
+define <vscale x 16 x i8> @ursra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: ursra_i8:
+; CHECK: ursra z0.b, z1.b, #5
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ i32 5)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @ursra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: ursra_i16:
+; CHECK: ursra z0.h, z1.h, #12
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ i32 12)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @ursra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: ursra_i32:
+; CHECK: ursra z0.s, z1.s, #31
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ i32 31)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @ursra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: ursra_i64:
+; CHECK: ursra z0.d, z1.d, #14
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b,
+ i32 14)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; USQADD
+;
+
+define <vscale x 16 x i8> @usqadd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: usqadd_i8:
+; CHECK: usqadd z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @usqadd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: usqadd_i16:
+; CHECK: usqadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @usqadd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: usqadd_i32:
+; CHECK: usqadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @usqadd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: usqadd_i64:
+; CHECK: usqadd z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; USRA
+;
+
+define <vscale x 16 x i8> @usra_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: usra_i8:
+; CHECK: usra z0.b, z1.b, #6
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b,
+ i32 6)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @usra_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: usra_i16:
+; CHECK: usra z0.h, z1.h, #11
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b,
+ i32 11)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @usra_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: usra_i32:
+; CHECK: usra z0.s, z1.s, #21
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ i32 21)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @usra_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: usra_i64:
+; CHECK: usra z0.d, z1.d, #4
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b,
+ i32 4)
+ ret <vscale x 2 x i64> %out
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.saba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.saba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.saba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.saba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.shadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.shadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.shadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sli.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sli.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sli.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sli.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqabs.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqabs.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqabs.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqabs.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqdmulh.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqdmulh.lane.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
declare <vscale x 16 x i8> @llvm.aarch64.sve.sqneg.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.sqneg.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>)
@@ -839,11 +1954,71 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrdmulh.lane.nxv8i16(<vscale x 8 x
declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrdmulh.lane.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrdmulh.lane.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
declare <vscale x 16 x i8> @llvm.aarch64.sve.srhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.srhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.srhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.srhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sri.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sri.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sri.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sri.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.srshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.srshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.srshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.srshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.srsra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.srsra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.srsra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.srsra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.ssra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.ssra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.ssra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.ssra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.suqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.suqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.suqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.suqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uaba.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uaba.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uaba.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uaba.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
declare <vscale x 16 x i8> @llvm.aarch64.sve.uhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
declare <vscale x 8 x i16> @llvm.aarch64.sve.uhadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
declare <vscale x 4 x i32> @llvm.aarch64.sve.uhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
@@ -859,6 +2034,31 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1>, <
declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsub.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsub.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsub.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uqsubr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uqsubr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uqsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uqsubr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.urecpe.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
declare <vscale x 16 x i8> @llvm.aarch64.sve.urhadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
@@ -866,4 +2066,29 @@ declare <vscale x 8 x i16> @llvm.aarch64.sve.urhadd.nxv8i16(<vscale x 8 x i1>, <
declare <vscale x 4 x i32> @llvm.aarch64.sve.urhadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
declare <vscale x 2 x i64> @llvm.aarch64.sve.urhadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.urshr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.urshr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.urshr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.urshr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
+
declare <vscale x 4 x i32> @llvm.aarch64.sve.ursqrte.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.ursra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.ursra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.ursra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.ursra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.usqadd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.usqadd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.usqadd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.usqadd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
More information about the llvm-commits
mailing list