[llvm] [AArch64] Generate zeroing forms of certain SVE2.2 instructions (1/11) (PR #116259)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 14 09:20:42 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Momchil Velikov (momchil-velikov)
<details>
<summary>Changes</summary>
SVE2.2 introduces instructions with predicated forms with zeroing of the inactive lanes. This allows in some cases to save a `movprfx` or a `mov` instruction when emitting code for `_x` or `_z` variants of intrinsics.
This patch adds support for emitting the zeroing forms of `ABS`, `NEG`, `FABS`, and `FNEG` instructions.
---
Patch is 35.20 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/116259.diff
4 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+3)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+12-4)
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+39-19)
- (added) llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll (+666)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index e4ad27d4bcfc00..4b9d34be02b2ef 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -381,6 +381,9 @@ def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
+def UseUnaryUndefPseudos
+ : Predicate<"!(Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2()))">;
+
def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisInt<1>]>>;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 4f146b3ee59e9a..0ac009e38eed8b 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -664,6 +664,14 @@ let Predicates = [HasSVEorSME] in {
defm FABS_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b100, "fabs", AArch64fabs_mt>;
defm FNEG_ZPmZ : sve_int_un_pred_arit_bitwise_fp<0b101, "fneg", AArch64fneg_mt>;
+ let Predicates = [HasSVEorSME, UseUnaryUndefPseudos] in {
+ defm FABS_ZPmZ : sve_int_un_pred_arit_hsd<AArch64fabs_mt>;
+ defm FNEG_ZPmZ : sve_int_un_pred_arit_hsd<AArch64fneg_mt>;
+
+ defm ABS_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64abs_mt>;
+ defm NEG_ZPmZ : sve_int_un_pred_arit_bhsd<AArch64neg_mt>;
+ }
+
foreach VT = [nxv2bf16, nxv4bf16, nxv8bf16] in {
// No dedicated instruction, so just clear the sign bit.
def : Pat<(VT (fabs VT:$op)),
@@ -4310,16 +4318,16 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm NOT_ZPzZ : sve_int_un_pred_arit_bitwise_z<0b110, "not">;
// floating point
- defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs">;
- defm FNEG_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b101, "fneg">;
+ defm FABS_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b100, "fabs", AArch64fabs_mt>;
+ defm FNEG_ZPzZ : sve_int_un_pred_arit_bitwise_fp_z<0b101, "fneg", AArch64fneg_mt>;
// SVE2p2 integer unary arithmetic, zeroing predicate
defm SXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b000, "sxtb">;
defm UXTB_ZPzZ : sve_int_un_pred_arit_h_z<0b001, "uxtb">;
defm SXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b010, "sxth">;
defm UXTH_ZPzZ : sve_int_un_pred_arit_w_z<0b011, "uxth">;
- defm ABS_ZPzZ : sve_int_un_pred_arit_z< 0b110, "abs">;
- defm NEG_ZPzZ : sve_int_un_pred_arit_z< 0b111, "neg">;
+ defm ABS_ZPzZ : sve_int_un_pred_arit_z< 0b110, "abs", AArch64abs_mt>;
+ defm NEG_ZPzZ : sve_int_un_pred_arit_z< 0b111, "neg", AArch64neg_mt>;
def SXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1000, "sxtw", ZPR64>;
def UXTW_ZPzZ_D : sve_int_un_pred_arit_z<0b11, 0b1010, "uxtw", ZPR64>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 6de6aed3b2a816..84bf3585db5a16 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -482,6 +482,8 @@ let Predicates = [HasSVEorSME] in {
//===----------------------------------------------------------------------===//
// SVE pattern match helpers.
//===----------------------------------------------------------------------===//
+def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
+def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
class SVE_1_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
Instruction inst>
@@ -502,6 +504,11 @@ multiclass SVE_1_Op_PassthruUndef_Pat<ValueType vtd, SDPatternOperator op, Value
(inst $Op3, $Op1, $Op2)>;
}
+class SVE_1_Op_PassthruUndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
+ ValueType vts, Instruction inst>
+ : Pat<(vtd (op pg:$Op1, vts:$Op2, (vtd (SVEDup0Undef)))),
+ (inst $Op1, $Op2)>;
+
// Used to match FP_ROUND_MERGE_PASSTHRU, which has an additional flag for the
// type of rounding. This is matched by timm0_1 in pattern below and ignored.
class SVE_1_Op_Passthru_Round_Pat<ValueType vtd, SDPatternOperator op, ValueType pg,
@@ -517,13 +524,12 @@ multiclass SVE_1_Op_PassthruUndef_Round_Pat<ValueType vtd, SDPatternOperator op,
(inst $Op3, $Op1, $Op2)>;
}
-def SVEDup0 : ComplexPattern<vAny, 0, "SelectDupZero", []>;
-
class SVE_1_Op_PassthruZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, Instruction inst>
: Pat<(vtd (op (vtd (SVEDup0)), vt1:$Op1, vt2:$Op2)),
(inst (IMPLICIT_DEF), $Op1, $Op2)>;
+
class SVE_1_Op_Imm_OptLsl_Pat<ValueType vt, SDPatternOperator op, ZPRRegOp zprty,
ValueType it, ComplexPattern cpx, Instruction inst>
: Pat<(vt (op (vt zprty:$Op1), (vt (splat_vector (it (cpx i32:$imm, i32:$shift)))))),
@@ -606,8 +612,6 @@ class SVE_4_Op_Imm_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
: Pat<(vtd (op vt1:$Op1, vt2:$Op2, vt3:$Op3, (vt4 ImmTy:$Op4))),
(inst $Op1, $Op2, $Op3, ImmTy:$Op4)>;
-def SVEDup0Undef : ComplexPattern<vAny, 0, "SelectDupZeroOrUndef", []>;
-
let AddedComplexity = 1 in {
class SVE_3_Op_Pat_SelZero<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst>
@@ -4820,23 +4824,18 @@ multiclass sve_int_un_pred_arit<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
def : SVE_1_Op_Passthru_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
-
- def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
- def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
- def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
- def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
-
- defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
- defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
-multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm> {
+multiclass sve_int_un_pred_arit_z<bits<3> opc, string asm, SDPatternOperator op> {
def _B : sve_int_un_pred_arit_z<0b00, { opc, 0b0 }, asm, ZPR8>;
def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b0 }, asm, ZPR16>;
def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b0 }, asm, ZPR32>;
def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b0 }, asm, ZPR64>;
+
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _B)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _D)>;
}
multiclass sve_int_un_pred_arit_h<bits<3> opc, string asm,
@@ -4950,7 +4949,22 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
def : SVE_1_Op_Passthru_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
def : SVE_1_Op_Passthru_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm, SDPatternOperator op> {
+ def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>;
+ def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
+ def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
+}
+
+multiclass sve_int_un_pred_arit_hsd<SDPatternOperator op> {
def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
@@ -4963,10 +4977,16 @@ multiclass sve_int_un_pred_arit_bitwise_fp<bits<3> opc, string asm,
defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
-multiclass sve_int_un_pred_arit_bitwise_fp_z<bits<3> opc, string asm> {
- def _H : sve_int_un_pred_arit_z<0b01, { opc, 0b1 }, asm, ZPR16>;
- def _S : sve_int_un_pred_arit_z<0b10, { opc, 0b1 }, asm, ZPR32>;
- def _D : sve_int_un_pred_arit_z<0b11, { opc, 0b1 }, asm, ZPR64>;
+multiclass sve_int_un_pred_arit_bhsd<SDPatternOperator op> {
+ def _B_UNDEF : PredOneOpPassthruPseudo<NAME # _B, ZPR8>;
+ def _H_UNDEF : PredOneOpPassthruPseudo<NAME # _H, ZPR16>;
+ def _S_UNDEF : PredOneOpPassthruPseudo<NAME # _S, ZPR32>;
+ def _D_UNDEF : PredOneOpPassthruPseudo<NAME # _D, ZPR64>;
+
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _B_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _D_UNDEF)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
new file mode 100644
index 00000000000000..1caee994220f05
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/zeroing-forms-abs-neg.ll
@@ -0,0 +1,666 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mattr=+sve < %s | FileCheck %s
+; RUN: llc -mattr=+sve2p2 < %s | FileCheck %s -check-prefix CHECK-2p2
+
+; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s
+; RUN: llc -mattr=+sme2p2 -force-streaming < %s | FileCheck %s -check-prefix CHECK-2p2
+
+target triple = "aarch64-linux"
+
+define <vscale x 2 x double> @test_svabs_f64_x_1(<vscale x 2 x i1> %pg, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svabs_f64_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs z0.d, p0/m, z0.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f64_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z0.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svabs_f64_x_2(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svabs_f64_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f64_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 2 x double> @test_svabs_f64_z(<vscale x 2 x i1> %pg, double %z0, <vscale x 2 x double> %x) {
+; CHECK-LABEL: test_svabs_f64_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.d, #0 // =0x0
+; CHECK-NEXT: fabs z0.d, p0/m, z1.d
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f64_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.d, p0/z, z1.d
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fabs.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, <vscale x 2 x double> %x)
+ ret <vscale x 2 x double> %0
+}
+
+define <vscale x 4 x float> @test_svabs_f32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svabs_f32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svabs_f32_x_2(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svabs_f32_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f32_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 4 x float> @test_svabs_f32_z(<vscale x 4 x i1> %pg, double %z0, <vscale x 4 x float> %x) {
+; CHECK-LABEL: test_svabs_f32_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.s, #0 // =0x0
+; CHECK-NEXT: fabs z0.s, p0/m, z1.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f32_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.s, p0/z, z1.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.fabs.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, <vscale x 4 x float> %x)
+ ret <vscale x 4 x float> %0
+}
+
+define <vscale x 8 x half> @test_svabs_f16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svabs_f16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fabs z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svabs_f16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svabs_f16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 8 x half> @test_svabs_f16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x half> %x) {
+; CHECK-LABEL: test_svabs_f16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: fabs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_f16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: fabs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.fabs.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x half> %x)
+ ret <vscale x 8 x half> %0
+}
+
+define <vscale x 16 x i8> @test_svabs_s8_x_1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svabs_s8_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs z0.b, p0/m, z0.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s8_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: abs z0.b, p0/z, z0.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %0
+}
+
+define <vscale x 16 x i8> @test_svabs_s8_x_2(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svabs_s8_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s8_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %1
+}
+
+define <vscale x 16 x i8> @test_svabs_s8_z(<vscale x 16 x i1> %pg, double %z0, <vscale x 16 x i8> %x) {
+; CHECK-LABEL: test_svabs_s8_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.b, #0 // =0x0
+; CHECK-NEXT: abs z0.b, p0/m, z1.b
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s8_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: abs z0.b, p0/z, z1.b
+; CHECK-2p2-NEXT: ret
+entry:
+ %1 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.abs.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, <vscale x 16 x i8> %x)
+ ret <vscale x 16 x i8> %1
+}
+
+define <vscale x 8 x i16> @test_svabs_s16_x_1(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svabs_s16_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s16_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: abs z0.h, p0/z, z0.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svabs_s16_x_2(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svabs_s16_x_2:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movprfx z0, z1
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s16_x_2:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: abs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 8 x i16> @test_svabs_s16_z(<vscale x 8 x i1> %pg, double %z0, <vscale x 8 x i16> %x) {
+; CHECK-LABEL: test_svabs_s16_z:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: mov z0.h, #0 // =0x0
+; CHECK-NEXT: abs z0.h, p0/m, z1.h
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s16_z:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: abs z0.h, p0/z, z1.h
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.abs.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, <vscale x 8 x i16> %x)
+ ret <vscale x 8 x i16> %0
+}
+
+define <vscale x 4 x i32> @test_svabs_s32_x_1(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %x) {
+; CHECK-LABEL: test_svabs_s32_x_1:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: abs z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+;
+; CHECK-2p2-LABEL: test_svabs_s32_x_1:
+; CHECK-2p2: // %bb.0: // %entry
+; CHECK-2p2-NEXT: abs z0.s, p0/z, z0.s
+; CHECK-2p2-NEXT: ret
+entry:
+ %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.abs.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, <vscale x 4 x i32> %x)
+ ret <vscale x 4 x i32> %0
+}
+
+define <vscale x 4 x i32> @test_svabs_s32_x_2(<vsca...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/116259
More information about the llvm-commits
mailing list