[llvm] e42ee2d - [AArch64][SVE] Add support for using reverse forms of SVE2 shifts
Bradley Smith via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 4 04:58:35 PDT 2021
Author: Bradley Smith
Date: 2021-06-04T12:56:53+01:00
New Revision: e42ee2d50963eb12e4d2dc0a20f36b1cb2af8543
URL: https://github.com/llvm/llvm-project/commit/e42ee2d50963eb12e4d2dc0a20f36b1cb2af8543
DIFF: https://github.com/llvm/llvm-project/commit/e42ee2d50963eb12e4d2dc0a20f36b1cb2af8543.diff
LOG: [AArch64][SVE] Add support for using reverse forms of SVE2 shifts
When using and ACLE intrinsic for an SVE2 shift, if the predicate passed
has all relevant lanes active, then use a reversed version of the
instruction if beneficial.
Added:
Modified:
llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
llvm/lib/Target/AArch64/SVEInstrFormats.td
llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 585fa50433fa..198260d7c472 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2518,18 +2518,25 @@ let Predicates = [HasSVE2] in {
defm UQSUBR_ZPmZ : sve2_int_arith_pred<0b111110, "uqsubr", int_aarch64_sve_uqsubr>;
// SVE2 saturating/rounding bitwise shift left (predicated)
- defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl>;
- defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl>;
- defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag>;
- defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag>;
- defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl>;
- defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl>;
- defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl>;
- defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl>;
- defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag>;
- defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag>;
- defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag>;
- defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag>;
+ defm SRSHL_ZPmZ : sve2_int_arith_pred<0b000100, "srshl", int_aarch64_sve_srshl, "SRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SRSHLR_ZPmZ">;
+ defm URSHL_ZPmZ : sve2_int_arith_pred<0b000110, "urshl", int_aarch64_sve_urshl, "URSHL_ZPZZ", DestructiveBinaryCommWithRev, "URSHLR_ZPmZ">;
+ defm SRSHLR_ZPmZ : sve2_int_arith_pred<0b001100, "srshlr", null_frag, "SRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SRSHL_ZPmZ", /*isReverseInstr*/ 1>;
+ defm URSHLR_ZPmZ : sve2_int_arith_pred<0b001110, "urshlr", null_frag, "URSHLR_ZPZZ", DestructiveBinaryCommWithRev, "URSHL_ZPmZ", /*isReverseInstr*/ 1>;
+ defm SQSHL_ZPmZ : sve2_int_arith_pred<0b010000, "sqshl", int_aarch64_sve_sqshl, "SQSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQSHLR_ZPmZ">;
+ defm UQSHL_ZPmZ : sve2_int_arith_pred<0b010010, "uqshl", int_aarch64_sve_uqshl, "UQSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQSHLR_ZPmZ">;
+ defm SQRSHL_ZPmZ : sve2_int_arith_pred<0b010100, "sqrshl", int_aarch64_sve_sqrshl, "SQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHLR_ZPmZ">;
+ defm UQRSHL_ZPmZ : sve2_int_arith_pred<0b010110, "uqrshl", int_aarch64_sve_uqrshl, "UQRSHL_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHLR_ZPmZ">;
+ defm SQSHLR_ZPmZ : sve2_int_arith_pred<0b011000, "sqshlr", null_frag, "SQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQSHL_ZPmZ", /*isReverseInstr*/ 1>;
+ defm UQSHLR_ZPmZ : sve2_int_arith_pred<0b011010, "uqshlr", null_frag, "UQSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQSHL_ZPmZ", /*isReverseInstr*/ 1>;
+ defm SQRSHLR_ZPmZ : sve2_int_arith_pred<0b011100, "sqrshlr", null_frag, "SQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "SQRSHL_ZPmZ", /*isReverseInstr*/ 1>;
+ defm UQRSHLR_ZPmZ : sve2_int_arith_pred<0b011110, "uqrshlr", null_frag, "UQRSHLR_ZPZZ", DestructiveBinaryCommWithRev, "UQRSHL_ZPmZ", /*isReverseInstr*/ 1>;
+
+ defm SRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_srshl>;
+ defm URSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_urshl>;
+ defm SQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqshl>;
+ defm UQSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqshl>;
+ defm SQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_sqrshl>;
+ defm UQRSHL_ZPZZ : sve_int_bin_pred_all_active_bhsd<int_aarch64_sve_uqrshl>;
let Predicates = [HasSVE2, UseExperimentalZeroingPseudos] in {
defm SQSHL_ZPZI : sve_int_bin_pred_shift_imm_left_zeroing_bhsd<null_frag>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index d0523c44cc49..02ac788b16e5 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -372,6 +372,12 @@ class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
: Pat<(vtd (op (pt (SVEAllActive)), vt1:$Op1, vt2:$Op2)),
(inst $Op1, $Op2)>;
+class SVE_2_Op_Pred_All_Active_Pt<ValueType vtd, SDPatternOperator op,
+ ValueType pt, ValueType vt1, ValueType vt2,
+ Instruction inst>
+: Pat<(vtd (op (pt (SVEAllActive:$Op1)), vt1:$Op2, vt2:$Op3)),
+ (inst $Op1, $Op2, $Op3)>;
+
class SVE_3_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3)),
@@ -3167,11 +3173,20 @@ class sve2_int_arith_pred<bits<2> sz, bits<6> opc, string asm,
let ElementSize = zprty.ElementSize;
}
-multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op> {
- def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>;
- def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>;
- def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>;
- def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>;
+multiclass sve2_int_arith_pred<bits<6> opc, string asm, SDPatternOperator op,
+ string Ps = "",
+ DestructiveInstTypeEnum flags=DestructiveOther,
+ string revname="", bit isReverseInstr=0> {
+ let DestructiveInstType = flags in {
+ def _B : sve2_int_arith_pred<0b00, opc, asm, ZPR8>,
+ SVEPseudo2Instr<Ps # _B, 1>, SVEInstr2Rev<NAME # _B, revname # _B, isReverseInstr>;
+ def _H : sve2_int_arith_pred<0b01, opc, asm, ZPR16>,
+ SVEPseudo2Instr<Ps # _H, 1>, SVEInstr2Rev<NAME # _H, revname # _H, isReverseInstr>;
+ def _S : sve2_int_arith_pred<0b10, opc, asm, ZPR32>,
+ SVEPseudo2Instr<Ps # _S, 1>, SVEInstr2Rev<NAME # _S, revname # _S, isReverseInstr>;
+ def _D : sve2_int_arith_pred<0b11, opc, asm, ZPR64>,
+ SVEPseudo2Instr<Ps # _D, 1>, SVEInstr2Rev<NAME # _D, revname # _D, isReverseInstr>;
+ }
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
@@ -8139,3 +8154,15 @@ multiclass sve_int_shift_pred_bhsd<SDPatternOperator op,
def : SVE_Shift_DupImm_Pred_Pat<nxv4i32, op, nxv4i1, i32, imm_s, !cast<Instruction>(NAME # _UNDEF_S)>;
def : SVE_Shift_DupImm_Pred_Pat<nxv2i64, op, nxv2i1, i64, imm_d, !cast<Instruction>(NAME # _UNDEF_D)>;
}
+
+multiclass sve_int_bin_pred_all_active_bhsd<SDPatternOperator op> {
+ def _UNDEF_B : PredTwoOpPseudo<NAME # _B, ZPR8, FalseLanesUndef>;
+ def _UNDEF_H : PredTwoOpPseudo<NAME # _H, ZPR16, FalseLanesUndef>;
+ def _UNDEF_S : PredTwoOpPseudo<NAME # _S, ZPR32, FalseLanesUndef>;
+ def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
+
+ def : SVE_2_Op_Pred_All_Active_Pt<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Pseudo>(NAME # _UNDEF_B)>;
+ def : SVE_2_Op_Pred_All_Active_Pt<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+ def : SVE_2_Op_Pred_All_Active_Pt<nxv4i32, op, nxv4i1, nxv4i32, nxv4i32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+ def : SVE_2_Op_Pred_All_Active_Pt<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Pseudo>(NAME # _UNDEF_D)>;
+}
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
index 30e521cc75f6..9afdb48c053e 100644
--- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp.ll
@@ -706,6 +706,69 @@ define <vscale x 2 x i64> @sqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
ret <vscale x 2 x i64> %out
}
+;
+; SQRSHLR
+;
+
+define <vscale x 16 x i8> @sqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqrshlr_i8:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: sqrshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqrshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqrshlr_i16:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: sqrshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqrshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqrshlr_i32:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: sqrshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqrshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqrshlr_i64:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: sqrshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @sqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqrshlr_i64_noptrue:
+; CHECK: sqrshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqrshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
;
; SQSHL (Vectors)
;
@@ -750,6 +813,69 @@ define <vscale x 2 x i64> @sqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %out
}
+;
+; SQSHLR
+;
+
+define <vscale x 16 x i8> @sqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: sqshlr_i8:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: sqshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: sqshlr_i16:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: sqshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: sqshlr_i32:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: sqshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqshlr_i64:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: sqshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @sqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: sqshlr_i64_noptrue:
+; CHECK: sqshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
;
; SQSHL (Scalar)
;
@@ -1110,6 +1236,69 @@ define <vscale x 2 x i64> @srshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %out
}
+;
+; SRSHLR
+;
+
+define <vscale x 16 x i8> @srshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: srshlr_i8:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: srshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.srshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @srshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: srshlr_i16:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: srshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.srshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @srshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: srshlr_i32:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: srshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.srshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @srshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: srshlr_i64:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: srshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @srshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: srshlr_i64_noptrue:
+; CHECK: srshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.srshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
;
; SRSHR
;
@@ -1550,6 +1739,69 @@ define <vscale x 2 x i64> @uqrshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64>
ret <vscale x 2 x i64> %out
}
+;
+; UQRSHLR
+;
+
+define <vscale x 16 x i8> @uqrshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqrshlr_i8:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: uqrshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqrshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqrshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqrshlr_i16:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: uqrshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqrshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqrshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqrshlr_i32:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: uqrshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqrshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqrshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqrshlr_i64:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: uqrshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @uqrshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqrshlr_i64_noptrue:
+; CHECK: uqrshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqrshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
;
; UQSHL (Vectors)
;
@@ -1594,6 +1846,69 @@ define <vscale x 2 x i64> @uqshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %out
}
+;
+; UQSHLR
+;
+
+define <vscale x 16 x i8> @uqshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: uqshlr_i8:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: uqshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uqshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @uqshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: uqshlr_i16:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: uqshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uqshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @uqshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: uqshlr_i32:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: uqshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uqshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @uqshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqshlr_i64:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: uqshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @uqshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: uqshlr_i64_noptrue:
+; CHECK: uqshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uqshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
;
; UQSHL (Scalar)
;
@@ -1880,6 +2195,69 @@ define <vscale x 2 x i64> @urshl_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %
ret <vscale x 2 x i64> %out
}
+;
+; URSHLR
+;
+
+define <vscale x 16 x i8> @urshlr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; CHECK-LABEL: urshlr_i8:
+; CHECK: ptrue p0.b
+; CHECK-NEXT: urshlr z0.b, p0/m, z0.b, z1.b
+; CHECK-NEXT: ret
+ %pg = call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32 31)
+ %out = call <vscale x 16 x i8> @llvm.aarch64.sve.urshl.nxv16i8(<vscale x 16 x i1> %pg,
+ <vscale x 16 x i8> %b,
+ <vscale x 16 x i8> %a)
+ ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @urshlr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; CHECK-LABEL: urshlr_i16:
+; CHECK: ptrue p0.h
+; CHECK-NEXT: urshlr z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT: ret
+ %pg = call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
+ %out = call <vscale x 8 x i16> @llvm.aarch64.sve.urshl.nxv8i16(<vscale x 8 x i1> %pg,
+ <vscale x 8 x i16> %b,
+ <vscale x 8 x i16> %a)
+ ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @urshlr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; CHECK-LABEL: urshlr_i32:
+; CHECK: ptrue p0.s
+; CHECK-NEXT: urshlr z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT: ret
+ %pg = call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
+ %out = call <vscale x 4 x i32> @llvm.aarch64.sve.urshl.nxv4i32(<vscale x 4 x i1> %pg,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %a)
+ ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @urshlr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: urshlr_i64:
+; CHECK: ptrue p0.d
+; CHECK-NEXT: urshlr z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT: ret
+ %pg = call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
+define <vscale x 2 x i64> @urshlr_i64_noptrue(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; CHECK-LABEL: urshlr_i64_noptrue:
+; CHECK: urshl z1.d, p0/m, z1.d, z0.d
+; CHECK-NEXT: mov z0.d, z1.d
+; CHECK-NEXT: ret
+ %out = call <vscale x 2 x i64> @llvm.aarch64.sve.urshl.nxv2i64(<vscale x 2 x i1> %pg,
+ <vscale x 2 x i64> %b,
+ <vscale x 2 x i64> %a)
+ ret <vscale x 2 x i64> %out
+}
+
;
; URSHR
;
@@ -2289,3 +2667,8 @@ declare <vscale x 16 x i8> @llvm.aarch64.sve.usra.nxv16i8(<vscale x 16 x i8>, <v
declare <vscale x 8 x i16> @llvm.aarch64.sve.usra.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, i32)
declare <vscale x 4 x i32> @llvm.aarch64.sve.usra.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, i32)
declare <vscale x 2 x i64> @llvm.aarch64.sve.usra.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, i32)
+
+declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
+declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
+declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
+declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
More information about the llvm-commits
mailing list