[llvm] 7483eb6 - [AArch64][SVE] Implement shift intrinsics
Kerry McLaughlin via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 3 03:54:44 PST 2019
Author: Kerry McLaughlin
Date: 2019-12-03T11:47:12Z
New Revision: 7483eb656fd290346e0ad70e553755fe9155e203
URL: https://github.com/llvm/llvm-project/commit/7483eb656fd290346e0ad70e553755fe9155e203
DIFF: https://github.com/llvm/llvm-project/commit/7483eb656fd290346e0ad70e553755fe9155e203.diff
LOG: [AArch64][SVE] Implement shift intrinsics
Summary:
Adds the following intrinsics:
- asr & asrd
- insr
- lsl & lsr
This patch also adds a new AArch64ISD node (INSR) to represent the int_aarch64_sve_insr intrinsic.
Reviewers: huntergr, sdesmalen, dancgr, mgudim, rengolin, efriedma
Reviewed By: sdesmalen
Subscribers: tschuett, kristof.beyls, hiraditya, rkruppe, psnobl, cameron.mcinally, cfe-commits, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D70437
Added:
llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
Modified:
llvm/include/llvm/IR/IntrinsicsAArch64.td
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64InstrInfo.td
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td
index d22993300eb2..72bc4a2aa216 100644
--- a/llvm/include/llvm/IR/IntrinsicsAArch64.td
+++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td
@@ -790,6 +790,21 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_i32_ty],
[IntrNoMem]>;
+ class AdvSIMD_Pred2VectorArg_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
+ class AdvSIMD_Pred3VectorArg_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>,
+ LLVMMatchType<0>],
+ [IntrNoMem]>;
+
class AdvSIMD_SVE_Compare_Intrinsic
: Intrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>],
[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
@@ -817,6 +832,20 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_anyvector_ty],
[IntrNoMem]>;
+ class AdvSIMD_SVE_ShiftByImm_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ llvm_i32_ty],
+ [IntrNoMem]>;
+
+ class AdvSIMD_SVE_ShiftWide_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>,
+ LLVMMatchType<0>,
+ llvm_nxv2i64_ty],
+ [IntrNoMem]>;
+
class AdvSIMD_SVE_Unpack_Intrinsic
: Intrinsic<[llvm_anyvector_ty],
[LLVMSubdivide2VectorType<0>],
@@ -867,6 +896,12 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
llvm_anyvector_ty],
[IntrNoMem]>;
+ class AdvSIMD_SVE_INSR_Intrinsic
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>,
+ LLVMVectorElementType<0>],
+ [IntrNoMem]>;
+
class AdvSIMD_SVE_PUNPKHI_Intrinsic
: Intrinsic<[LLVMHalfElementsVectorType<0>],
[llvm_anyvector_ty],
@@ -919,18 +954,6 @@ let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
let TargetPrefix = "aarch64" in { // All intrinsics start with "llvm.aarch64.".
-
-class AdvSIMD_Pred2VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem]>;
-
-class AdvSIMD_Pred3VectorArg_Intrinsic
- : Intrinsic<[llvm_anyvector_ty],
- [LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>],
- [IntrNoMem]>;
-
-
//
// Integer arithmetic
//
@@ -975,6 +998,17 @@ def int_aarch64_sve_sdot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
def int_aarch64_sve_udot : AdvSIMD_SVE_DOT_Intrinsic;
def int_aarch64_sve_udot_lane : AdvSIMD_SVE_DOT_Indexed_Intrinsic;
+// Shifts
+
+def int_aarch64_sve_asr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_asr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
+def int_aarch64_sve_asrd : AdvSIMD_SVE_ShiftByImm_Intrinsic;
+def int_aarch64_sve_insr : AdvSIMD_SVE_INSR_Intrinsic;
+def int_aarch64_sve_lsl : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_lsl_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
+def int_aarch64_sve_lsr : AdvSIMD_Pred2VectorArg_Intrinsic;
+def int_aarch64_sve_lsr_wide : AdvSIMD_SVE_ShiftWide_Intrinsic;
+
//
// Counting bits
//
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 9e8df33218b8..461d781effb8 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -828,6 +828,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
if (isTypeLegal(VT) && VT.getVectorElementType() != MVT::i1)
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
}
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
}
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
@@ -1333,6 +1335,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
case AArch64ISD::SUNPKLO: return "AArch64ISD::SUNPKLO";
case AArch64ISD::UUNPKHI: return "AArch64ISD::UUNPKHI";
case AArch64ISD::UUNPKLO: return "AArch64ISD::UUNPKLO";
+ case AArch64ISD::INSR: return "AArch64ISD::INSR";
}
return nullptr;
}
@@ -2884,6 +2887,16 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
return DAG.getNode(AArch64ISD::UUNPKLO, dl, Op.getValueType(),
Op.getOperand(1));
+ case Intrinsic::aarch64_sve_insr: {
+ SDValue Scalar = Op.getOperand(2);
+ EVT ScalarTy = Scalar.getValueType();
+ if ((ScalarTy == MVT::i8) || (ScalarTy == MVT::i16))
+ Scalar = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Scalar);
+
+ return DAG.getNode(AArch64ISD::INSR, dl, Op.getValueType(),
+ Op.getOperand(1), Scalar);
+ }
+
case Intrinsic::localaddress: {
const auto &MF = DAG.getMachineFunction();
const auto *RegInfo = Subtarget->getRegisterInfo();
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 384c7b4456f0..180dd50dc396 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -196,6 +196,8 @@ enum NodeType : unsigned {
UUNPKHI,
UUNPKLO,
+ INSR,
+
// NEON Load/Store with post-increment base updates
LD2post = ISD::FIRST_TARGET_MEMORY_OPCODE,
LD3post,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index d619362e029e..69c46e417ce7 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -214,6 +214,7 @@ def SDT_AArch64FCmp : SDTypeProfile<0, 2,
SDTCisSameAs<0, 1>]>;
def SDT_AArch64Dup : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
def SDT_AArch64DupLane : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
+def SDT_AArch64Insr : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
def SDT_AArch64Zip : SDTypeProfile<1, 2, [SDTCisVec<0>,
SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
@@ -401,6 +402,8 @@ def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
+def AArch64insr : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
+
def AArch64zip1 : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
def AArch64zip2 : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
def AArch64uzp1 : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 3b4e97ed844a..43e5ac058885 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -199,8 +199,8 @@ let Predicates = [HasSVE] in {
defm SPLICE_ZPZ : sve_int_perm_splice<"splice">;
defm COMPACT_ZPZ : sve_int_perm_compact<"compact">;
- defm INSR_ZR : sve_int_perm_insrs<"insr">;
- defm INSR_ZV : sve_int_perm_insrv<"insr">;
+ defm INSR_ZR : sve_int_perm_insrs<"insr", AArch64insr>;
+ defm INSR_ZV : sve_int_perm_insrv<"insr", AArch64insr>;
def EXT_ZZI : sve_int_perm_extract_i<"ext">;
defm RBIT_ZPmZ : sve_int_perm_rev_rbit<"rbit">;
@@ -876,18 +876,18 @@ let Predicates = [HasSVE] in {
defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr">;
defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr">;
defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">;
- defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd">;
-
- defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr">;
- defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr">;
- defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl">;
- defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr">;
- defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr">;
- defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr">;
-
- defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr">;
- defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr">;
- defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl">;
+ defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", int_aarch64_sve_asrd>;
+
+ defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", int_aarch64_sve_asr>;
+ defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", int_aarch64_sve_lsr>;
+ defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", int_aarch64_sve_lsl>;
+ defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", null_frag>;
+ defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", null_frag>;
+ defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", null_frag>;
+
+ defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
+ defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
+ defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
defm FCVT_ZPmZ_StoH : sve_fp_2op_p_zd<0b1001000, "fcvt", ZPR32, ZPR16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv16i1, nxv4f32, ElementSizeS>;
defm FCVT_ZPmZ_HtoS : sve_fp_2op_p_zd<0b1001001, "fcvt", ZPR16, ZPR32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv16i1, nxv8f16, ElementSizeS>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 38f318849b33..9169e463c662 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -304,6 +304,12 @@ class SVE_4_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, vt4:$Op4)),
(inst $Op1, $Op2, $Op3, $Op4)>;
+class SVE_3_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
+ ValueType vt2, ValueType vt3, Operand ImmTy,
+ Instruction inst>
+: Pat<(vtd (op vt1:$Op1, vt2:$Op2, (vt3 ImmTy:$Op3))),
+ (inst $Op1, $Op2, ImmTy:$Op3)>;
+
def SVEDup0Undef : ComplexPattern<i64, 0, "SelectDupZeroOrUndef", []>;
//===----------------------------------------------------------------------===//
@@ -888,14 +894,18 @@ class sve_int_perm_insrs<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = Destructive;
- let ElementSize = ElementSizeNone;
}
-multiclass sve_int_perm_insrs<string asm> {
+multiclass sve_int_perm_insrs<string asm, SDPatternOperator op> {
def _B : sve_int_perm_insrs<0b00, asm, ZPR8, GPR32>;
def _H : sve_int_perm_insrs<0b01, asm, ZPR16, GPR32>;
def _S : sve_int_perm_insrs<0b10, asm, ZPR32, GPR32>;
def _D : sve_int_perm_insrs<0b11, asm, ZPR64, GPR64>;
+
+ def : SVE_2_Op_Pat<nxv16i8, op, nxv16i8, i32, !cast<Instruction>(NAME # _B)>;
+ def : SVE_2_Op_Pat<nxv8i16, op, nxv8i16, i32, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4i32, op, nxv4i32, i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2i64, op, nxv2i64, i64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
@@ -914,14 +924,17 @@ class sve_int_perm_insrv<bits<2> sz8_64, string asm, ZPRRegOp zprty,
let Constraints = "$Zdn = $_Zdn";
let DestructiveInstType = Destructive;
- let ElementSize = ElementSizeNone;
}
-multiclass sve_int_perm_insrv<string asm> {
+multiclass sve_int_perm_insrv<string asm, SDPatternOperator op> {
def _B : sve_int_perm_insrv<0b00, asm, ZPR8, FPR8>;
def _H : sve_int_perm_insrv<0b01, asm, ZPR16, FPR16>;
def _S : sve_int_perm_insrv<0b10, asm, ZPR32, FPR32>;
def _D : sve_int_perm_insrv<0b11, asm, ZPR64, FPR64>;
+
+ def : SVE_2_Op_Pat<nxv8f16, op, nxv8f16, f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, f64, !cast<Instruction>(NAME # _D)>;
}
//===----------------------------------------------------------------------===//
@@ -3929,7 +3942,8 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm> {
}
}
-multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
+multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm,
+ SDPatternOperator op = null_frag> {
def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftR8,
ElementSizeB>;
def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftR16,
@@ -3945,6 +3959,11 @@ multiclass sve_int_bin_pred_shift_imm_right<bits<4> opc, string asm> {
let Inst{22} = imm{5};
let Inst{9-8} = imm{4-3};
}
+
+ def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, vecshiftR8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1, nxv8i16, i32, vecshiftR16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1, nxv4i32, i32, vecshiftR32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Imm_Pat<nxv2i64, op, nxv2i1, nxv2i64, i32, vecshiftR64, !cast<Instruction>(NAME # _D)>;
}
class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
@@ -3971,17 +3990,28 @@ class sve_int_bin_pred_shift<bits<2> sz8_64, bit wide, bits<3> opc,
let ElementSize = zprty.ElementSize;
}
-multiclass sve_int_bin_pred_shift<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_bin_pred_shift<0b00, 0b0, opc, asm, ZPR8, ZPR8>;
def _H : sve_int_bin_pred_shift<0b01, 0b0, opc, asm, ZPR16, ZPR16>;
def _S : sve_int_bin_pred_shift<0b10, 0b0, opc, asm, ZPR32, ZPR32>;
def _D : sve_int_bin_pred_shift<0b11, 0b0, opc, asm, ZPR64, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
-multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm> {
+multiclass sve_int_bin_pred_shift_wide<bits<3> opc, string asm,
+ SDPatternOperator op> {
def _B : sve_int_bin_pred_shift<0b00, 0b1, opc, asm, ZPR8, ZPR64>;
def _H : sve_int_bin_pred_shift<0b01, 0b1, opc, asm, ZPR16, ZPR64>;
def _S : sve_int_bin_pred_shift<0b10, 0b1, opc, asm, ZPR32, ZPR64>;
+
+ def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv2i64, !cast<Instruction>(NAME # _B)>;
+ def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv2i64, !cast<Instruction>(NAME # _H)>;
+ def : SVE_3_Op_Pat<nxv4i32, op, nxv4i1, nxv4i32, nxv2i64, !cast<Instruction>(NAME # _S)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
new file mode 100644
index 000000000000..b1b3dc61560b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -0,0 +1,367 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
+
+;
+; ASR
+;
+
+define <vscale x 16 x i8> @asr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: asr_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @asr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: asr_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @asr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: asr_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @asr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: asr_i64:
+; CHECK: asr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 16 x i8> @asr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: asr_wide_i8:
+; CHECK: asr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @asr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: asr_wide_i16:
+; CHECK: asr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @asr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: asr_wide_i32:
+; CHECK: asr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; ASRD
+;
+
+define <vscale x 16 x i8> @asrd_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: asrd_i8:
+; CHECK: asrd z0.b, p0/m, z0.b, #1
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ i32 1)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @asrd_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: asrd_i16:
+; CHECK: asrd z0.h, p0/m, z0.h, #2
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ i32 2)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @asrd_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: asrd_i32:
+; CHECK: asrd z0.s, p0/m, z0.s, #31
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ i32 31)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @asrd_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: asrd_i64:
+; CHECK: asrd z0.d, p0/m, z0.d, #64
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ i32 64)
+ ret <vscale x 2 x i64> %out
+}
+
+;
+; INSR
+;
+
+define <vscale x 16 x i8> @insr_i8(<vscale x 16 x i8> %a, i8 %b) {
+; CHECK-LABEL: insr_i8:
+; CHECK: insr z0.b, w0
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8> %a, i8 %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @insr_i16(<vscale x 8 x i16> %a, i16 %b) {
+; CHECK-LABEL: insr_i16:
+; CHECK: insr z0.h, w0
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16> %a, i16 %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @insr_i32(<vscale x 4 x i32> %a, i32 %b) {
+; CHECK-LABEL: insr_i32:
+; CHECK: insr z0.s, w0
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32> %a, i32 %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @insr_i64(<vscale x 2 x i64> %a, i64 %b) {
+; CHECK-LABEL: insr_i64:
+; CHECK: insr z0.d, x0
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64> %a, i64 %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 8 x half> @insr_f16(<vscale x 8 x half> %a, half %b) {
+; CHECK-LABEL: insr_f16:
+; CHECK: insr z0.h, h1
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half> %a, half %b)
+ ret <vscale x 8 x half> %out
+}
+
+define <vscale x 4 x float> @insr_f32(<vscale x 4 x float> %a, float %b) {
+; CHECK-LABEL: insr_f32:
+; CHECK: insr z0.s, s1
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float> %a, float %b)
+ ret <vscale x 4 x float> %out
+}
+
+define <vscale x 2 x double> @insr_f64(<vscale x 2 x double> %a, double %b) {
+; CHECK-LABEL: insr_f64:
+; CHECK: insr z0.d, d1
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double> %a, double %b)
+ ret <vscale x 2 x double> %out
+}
+
+;
+; LSL
+;
+
+define <vscale x 16 x i8> @lsl_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: lsl_i8:
+; CHECK: lsl z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @lsl_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: lsl_i16:
+; CHECK: lsl z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @lsl_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: lsl_i32:
+; CHECK: lsl z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @lsl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: lsl_i64:
+; CHECK: lsl z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 16 x i8> @lsl_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: lsl_wide_i8:
+; CHECK: lsl z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @lsl_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: lsl_wide_i16:
+; CHECK: lsl z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @lsl_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: lsl_wide_i32:
+; CHECK: lsl z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+;
+; LSR
+;
+
+define <vscale x 16 x i8> @lsr_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: lsr_i8:
+; CHECK: lsr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @lsr_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: lsr_i16:
+; CHECK: lsr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @lsr_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: lsr_i32:
+; CHECK: lsr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @lsr_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: lsr_i64:
+; CHECK: lsr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 16 x i8> @lsr_wide_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: lsr_wide_i8:
+; CHECK: lsr z0.b, p0/m, z0.b, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @lsr_wide_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: lsr_wide_i16:
+; CHECK: lsr z0.h, p0/m, z0.h, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @lsr_wide_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: lsr_wide_i32:
+; CHECK: lsr z0.s, p0/m, z0.s, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %a,
+ <vscale x 2 x i64> %b)
+ ret <vscale x 4 x i32> %out
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.asr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.asr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.asr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.asrd.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.asrd.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.asrd.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.asrd.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.insr.nxv16i8(<vscale x 16 x i8>, i8)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.insr.nxv8i16(<vscale x 8 x i16>, i16)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.insr.nxv4i32(<vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.insr.nxv2i64(<vscale x 2 x i64>, i64)
+declare <vscale x 8 x half> @llvm.aarch64.sve.insr.nxv8f16(<vscale x 8 x half>, half)
+declare <vscale x 4 x float> @llvm.aarch64.sve.insr.nxv4f32(<vscale x 4 x float>, float)
+declare <vscale x 2 x double> @llvm.aarch64.sve.insr.nxv2f64(<vscale x 2 x double>, double)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsl.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsl.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsl.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsl.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.lsr.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.lsr.wide.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, <vscale x 2 x i64>)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.lsr.wide.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, <vscale x 2 x i64>)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.lsr.wide.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 2 x i64>)
More information about the llvm-commits
mailing list