[llvm] bd33a46 - [AArch64][SVE] Add SVE2 intrinsics for pairwise arithmetic
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 02:37:46 PST 2020
Author: Kerry McLaughlin
Date: 2020-01-29T10:31:31Z
New Revision: bd33a46213d3cccf8924d33f26c1cf6a8e4c2879
URL: https://github.com/llvm/llvm-project/commit/bd33a46213d3cccf8924d33f26c1cf6a8e4c2879
DIFF: https://github.com/llvm/llvm-project/commit/bd33a46213d3cccf8924d33f26c1cf6a8e4c2879.diff
LOG: [AArch64][SVE] Add SVE2 intrinsics for pairwise arithmetic
Summary:
Implements the following intrinsics:
- addp
- smaxp, sminp, umaxp & uminp
- sadalp & uadalp
Reviewers: dancgr, efriedma, sdesmalen, c-rhodes
Reviewed By: c-rhodes
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D73347
Added:
llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index 222a17709188..88d280b96c9e 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -1032,6 +1032,13 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
LLVMVectorOfBitcastsToInt<0>],
[IntrNoMem]>;
+ class SVE2_2VectorArg_Pred_Long_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMSubdivide2VectorType<0>],
+ [IntrNoMem]>;
+
class SVE2_3VectorArg_Long_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>,
@@ -1662,11 +1669,23 @@ def int_aarch64_sve_st1_scatter_scalar_offset : AdvSIMD_ScatterStore_VectorBase_
// SVE2 - Non-widening pairwise arithmetic
//
+def int_aarch64_sve_addp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_faddp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fmaxnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fminp : AdvSIMD_Pred2VectorArg_Intrinsic;
def int_aarch64_sve_fminnmp : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_smaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_sminp : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_umaxp : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_uminp : AdvSIMD_Pred2VectorArg_Intrinsic;
+
+//
+// SVE2 - Widening pairwise arithmetic
+//
+
+def int_aarch64_sve_sadalp : SVE2_2VectorArg_Pred_Long_Intrinsic;
+def int_aarch64_sve_uadalp : SVE2_2VectorArg_Pred_Long_Intrinsic;
//
// SVE2 - Floating-point widening multiply-accumulate
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 7a2000e746cf..f26ef5e04a68 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1503,25 +1503,25 @@ let Predicates = [HasSVE2] in {
defm SQDMLSLBT_ZZZ : sve2_int_mla_long<0b00011, "sqdmlslbt">;
// SVE2 integer halving add/subtract (predicated)
- defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd">;
- defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd">;
- defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub">;
- defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub">;
- defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd">;
- defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd">;
- defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr">;
- defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr">;
+ defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", null_frag>;
+ defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", null_frag>;
+ defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", null_frag>;
+ defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", null_frag>;
+ defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", null_frag>;
+ defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", null_frag>;
+ defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", null_frag>;
+ defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", null_frag>;
// SVE2 integer pairwise add and accumulate long
- defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp">;
- defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp">;
+ defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>;
+ defm UADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<1, "uadalp", int_aarch64_sve_uadalp>;
// SVE2 integer pairwise arithmetic
- defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp">;
- defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp">;
- defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp">;
- defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp">;
- defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp">;
+ defm ADDP_ZPmZ : sve2_int_arith_pred<0b100011, "addp", int_aarch64_sve_addp>;
+ defm SMAXP_ZPmZ : sve2_int_arith_pred<0b101001, "smaxp", int_aarch64_sve_smaxp>;
+ defm UMAXP_ZPmZ : sve2_int_arith_pred<0b101011, "umaxp", int_aarch64_sve_umaxp>;
+ defm SMINP_ZPmZ : sve2_int_arith_pred<0b101101, "sminp", int_aarch64_sve_sminp>;
+ defm UMINP_ZPmZ : sve2_int_arith_pred<0b101111, "uminp", int_aarch64_sve_uminp>;
// SVE2 integer unary operations (predicated)
defm URECPE_ZPmZ : sve2_int_un_pred_arit_s<0b000, "urecpe">;
@@ -1530,28 +1530,28 @@ let Predicates = [HasSVE2] in {
defm SQNEG_ZPmZ : sve2_int_un_pred_arit<0b101, "sqneg">;
// SVE2 saturating add/subtract
- defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd">;
- defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd">;
- defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub">;
- defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub">;
- defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd">;
- defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd">;
- defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr">;
- defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr">;
+ defm SQADD_ZPmZ : sve2_int_arith_pred<0b110000, "sqadd", null_frag>;
+ defm UQADD_ZPmZ : sve2_int_arith_pred<0b110010, "uqadd", null_frag>;
+ defm SQSUB_ZPmZ : sve2_int_arith_pred<0b110100, "sqsub", null_frag>;
+ defm UQSUB_ZPmZ : sve2_int_arith_pred<0b110110, "uqsub", null_frag>;
+ defm SUQADD_ZPmZ : sve2_int_arith_pred<0b111000, "suqadd", null_frag>;
+ defm USQADD_ZPmZ : sve2_int_arith_pred<0b111010, "usqadd", null_frag>;
+ defm SQSUBR_ZPmZ : sve2_int_arith_pred<0b111100, "sqsubr", null_frag>;
+ defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", null_frag>;
// SVE2 saturating/rounding bitwise shift left (predicated)
- defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl">;
- defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl">;
- defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr">;
- defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr">;
- defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl">;
- defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl">;
- defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl">;
- defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl">;
- defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr">;
- defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr">;
- defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr">;
- defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr">;
+ defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", null_frag>;
+ defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", null_frag>;
+ defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>;
+ defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>;
+ defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", null_frag>;
+ defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", null_frag>;
+ defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", null_frag>;
+ defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", null_frag>;
+ defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>;
+ defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>;
+ defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>;
+ defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>;
// SVE2 predicated shifts
defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl">;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index e9f9381ad2fe..304df2b14250 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2716,7 +2716,7 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
bits<5> Zdn;
let Inst{31-24} = 0b01000100;
let Inst{23-22} = sz;
- let Inst{21} = 0b0;
+ let Inst{21-20} = 0b01;
let Inst{20-16} = opc{5-1};
let Inst{15-14} = 0b10;
let Inst{13} = opc{0};
@@ -2729,11 +2729,16 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve2_int_arith_pred<bits<6> opc, string asm> {
+multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op> {
def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>;
def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>;
def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>;
def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
@@ -2757,10 +2762,14 @@ class sve2_int_sadd_long_accum_pairwise<bits<2> sz, bit U, string asm,
let ElementSize = zprty1.ElementSize;
}
-multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm> {
+multiclass sve2_int_sadd_long_accum_pairwise<bit U, string asm, SDPatternOperator op> {
def _H : sve2_int_sadd_long_accum_pairwise<0b01, U, asm, ZPR16, ZPR8>;
def _S : sve2_int_sadd_long_accum_pairwise<0b10, U, asm, ZPR32, ZPR16>;
def _D : sve2_int_sadd_long_accum_pairwise<0b11, U, asm, ZPR64, ZPR32>;
+
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv16i8, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv8i16, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv4i32, !cast<Instruction>(NAME # _D)>;
}
class sve2_int_un_pred_arit<bits<2> sz, bit Q, bits<2> opc,
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
index 055c24b935e0..2b791d1800b9 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-non-widening-pairwise-arith.ll
@@ -1,5 +1,49 @@
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+;
+; ADDP
+;
+
+define <vscale x 16 x i8> @addp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: addp_i8:
+; CHECK: addp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.addp.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @addp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: addp_i16:
+; CHECK: addp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.addp.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @addp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: addp_i32:
+; CHECK: addp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.addp.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @addp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: addp_i64:
+; CHECK: addp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.addp.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
;
; FADDP
;
@@ -170,6 +214,187 @@ define <vscale x 2 x double> @fminnmp_f64(<vscale x 2 x i1> %pg, <vscale x 2 x d
ret <vscale x 2 x double> %out
}
+;
+; SMAXP
+;
+
+define <vscale x 16 x i8> @smaxp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: smaxp_i8:
+; CHECK: smaxp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.smaxp.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @smaxp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: smaxp_i16:
+; CHECK: smaxp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.smaxp.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @smaxp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: smaxp_i32:
+; CHECK: smaxp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.smaxp.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @smaxp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: smaxp_i64:
+; CHECK: smaxp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.smaxp.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; SMINP
+;
+
+define <vscale x 16 x i8> @sminp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sminp_i8:
+; CHECK: sminp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sminp.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sminp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sminp_i16:
+; CHECK: sminp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sminp.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sminp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sminp_i32:
+; CHECK: sminp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sminp.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sminp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sminp_i64:
+; CHECK: sminp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sminp.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UMINP
+;
+
+define <vscale x 16 x i8> @uminp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uminp_i8:
+; CHECK: uminp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uminp.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uminp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uminp_i16:
+; CHECK: uminp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uminp.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uminp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uminp_i32:
+; CHECK: uminp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uminp.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uminp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uminp_i64:
+; CHECK: uminp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uminp.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UMAXP
+;
+
+define <vscale x 16 x i8> @umaxp_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: umaxp_i8:
+; CHECK: umaxp z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.umaxp.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @umaxp_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: umaxp_i16:
+; CHECK: umaxp z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.umaxp.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @umaxp_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: umaxp_i32:
+; CHECK: umaxp z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.umaxp.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @umaxp_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: umaxp_i64:
+; CHECK: umaxp z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.umaxp.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.addp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.addp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.addp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.addp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
declare <vscale x 8 x half> @llvm.aarch64.sve.faddp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.faddp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.faddp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
@@ -189,3 +414,23 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.fminp.nxv2f64(<vscale x 2 x i1>,
declare <vscale x 8 x half> @llvm.aarch64.sve.fminnmp.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
declare <vscale x 4 x float> @llvm.aarch64.sve.fminnmp.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
declare <vscale x 2 x double> @llvm.aarch64.sve.fminnmp.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.smaxp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.smaxp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.smaxp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.smaxp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sminp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sminp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sminp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sminp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.umaxp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.umaxp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.umaxp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.umaxp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.uminp.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uminp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uminp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uminp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
new file mode 100644
index 000000000000..0f73ff2f931e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-widening-pairwise-arith.ll
@@ -0,0 +1,77 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s
+
+;
+; SADALP
+;
+
+define <vscale x 8 x i16> @sadalp_i8(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sadalp_i8:
+; CHECK: sadalp z0.h, p0/m, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sadalp.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sadalp_i16(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sadalp_i16:
+; CHECK: sadalp z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sadalp.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sadalp_i32(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sadalp_i32:
+; CHECK: sadalp z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sadalp.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; UADALP
+;
+
+define <vscale x 8 x i16> @uadalp_i8(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uadalp_i8:
+; CHECK: uadalp z0.h, p0/m, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uadalp.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uadalp_i16(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uadalp_i16:
+; CHECK: uadalp z0.s, p0/m, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uadalp.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uadalp_i32(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uadalp_i32:
+; CHECK: uadalp z0.d, p0/m, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uadalp.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sadalp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 16 x i8>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sadalp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 8 x i16>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sadalp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 4 x i32>)
+
+declare <vscale x 8 x i16> @llvm.aarch64.sve.uadalp.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 16 x i8>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.uadalp.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 8 x i16>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.uadalp.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 4 x i32>)
More information about the llvm-commits
mailing list