[llvm] [AArch64] Generate zeroing forms of certain SVE2.2 instructions (2/n) (PR #115709)
Momchil Velikov via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 13 06:11:06 PST 2024
================
@@ -4442,3 +4442,77 @@ let Predicates = [HasSVE, HasCPA] in {
// Multiply-add vectors, writing addend
def MLA_CPA : sve_int_mla_cpa<"mlapt">;
}
+
+multiclass sve_int_un_pred_arit_bitwise_fp_pat<SDPatternOperator op> {
+ let Predicates = [HasSVEorSME, NotHasSVE2p2orSME2p2] in {
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Pseudo>(NAME # _ZPmZ_H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Pseudo>(NAME # _ZPmZ_H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Pseudo>(NAME # _ZPmZ_H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Pseudo>(NAME # _ZPmZ_S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Pseudo>(NAME # _ZPmZ_S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Pseudo>(NAME # _ZPmZ_D_UNDEF)>;
+ }
+
+ let Predicates = [HasSVE2p2orSME2p2] in {
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, op, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _ZPzZ_H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f16, op, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _ZPzZ_H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f16, op, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _ZPzZ_H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv4f32, op, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _ZPzZ_S)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f32, op, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _ZPzZ_S)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2f64, op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _ZPzZ_D)>;
+ }
+}
+
+defm FABS : sve_int_un_pred_arit_bitwise_fp_pat<AArch64fabs_mt>;
+defm FNEG : sve_int_un_pred_arit_bitwise_fp_pat<AArch64fneg_mt>;
+
+multiclass sve_int_un_pred_arit_pat<SDPatternOperator op> {
+ let Predicates = [HasSVEorSME, NotHasSVE2p2orSME2p2] in {
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Pseudo>(NAME # _ZPmZ_B_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Pseudo>(NAME # _ZPmZ_H_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Pseudo>(NAME # _ZPmZ_S_UNDEF)>;
+ defm : SVE_1_Op_PassthruUndef_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Pseudo>(NAME # _ZPmZ_D_UNDEF)>;
+ }
+
+ let Predicates = [HasSVE2p2orSME2p2] in {
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv16i8, op, nxv16i1, nxv16i8, !cast<Instruction>(NAME # _ZPzZ_B)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv8i16, op, nxv8i1, nxv8i16, !cast<Instruction>(NAME # _ZPzZ_H)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv4i32, op, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _ZPzZ_S)>;
+ def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, op, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _ZPzZ_D)>;
+ }
+}
+
+defm ABS : sve_int_un_pred_arit_pat<AArch64abs_mt>;
+defm NEG : sve_int_un_pred_arit_pat<AArch64neg_mt>;
+
+multiclass sve_fp_2op_p_zdr_pat {
+ let Predicates = [HasSVEorSME, NotHasSVE2p2orSME2p2] in {
+ defm : SVE_3_Op_Undef_Pat<nxv8f16, int_aarch64_sve_fcvt_f16f32, nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _ZPmZ_StoH)>;
+ defm : SVE_3_Op_Undef_Pat<nxv8f16, int_aarch64_sve_fcvt_f16f64, nxv8f16, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _ZPmZ_DtoH)>;
+ defm : SVE_3_Op_Undef_Pat<nxv4f32, int_aarch64_sve_fcvt_f32f64, nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _ZPmZ_DtoS)>;
+
+ defm : SVE_3_Op_Undef_Pat<nxv4f32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _ZPmZ_HtoS)>;
+ defm : SVE_3_Op_Undef_Pat<nxv2f64, int_aarch64_sve_fcvt_f64f16, nxv2f64, nxv2i1, nxv8f16, !cast<Instruction>(NAME # _ZPmZ_HtoD)>;
+ defm : SVE_3_Op_Undef_Pat<nxv2f64, int_aarch64_sve_fcvt_f64f32, nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _ZPmZ_StoD)>;
+ }
----------------
momchil-velikov wrote:
Maybe I've forgotten to remove the "undef" patterns from `sve_fp_2op_p_zdr`. I'll have a look.
I want to move them out so they can have a different set of predicates, namely one including `NotHasSVE2p2orSME2p2`.
Otherwise, which one would the compiler choose from:
```
defm : SVE_3_Op_Undef_Pat<nxv4f32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _ZPmZ_HtoS)>;
```
and
```
defm : SVE_3_Op_UndefZero_Pat<nxv4f32, int_aarch64_sve_fcvt_f32f16, nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _ZPzZ_HtoS)>;
```
To expand that a bit, the above multiclasses look like:
```
multiclass SVE_3_Op_Undef_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst> {
def : Pat<(vtd (op (vt1 undef), vt2:$Op1, vt3:$Op2)),
(inst (IMPLICIT_DEF), $Op1, $Op2)>;
def : Pat<(vtd (op vt1:$Op1, (vt2 (SVEAllActive:$Op2)), vt3:$Op3)),
(inst $Op1, $Op2, $Op3)>;
}
multiclass SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst> {
def : Pat<(vtd (op (vt1 undef), vt2:$Op1, vt3:$Op2)),
(inst $Op1, $Op2)>;
def : Pat<(vtd (op (vt1 (SVEDup0)), vt2:$Op1, vt3:$Op2)),
(inst $Op1, $Op2)>;
}
```
Both first `defs` would match the same sub-DAG (if they had the same predicates).
Now, the second multiclass can become simply (and am going to change it, just like it's in `class SVE_1_Op_PassthruUndefZero_Pat`):
```
multiclass SVE_3_Op_UndefZero_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
ValueType vt2, ValueType vt3, Instruction inst> {
def : Pat<(vtd (op (vt1 SVEDup0Undef), vt2:$Op1, vt3:$Op2)),
(inst $Op1, $Op2)>;
}
```
and then maybe, just maybe that part `(vt1 SVEDup0Undef)` would be considered "more complex" than ``(vt1 undef)` and it would take precedence. But that's really relying on a poorly (if at all) documented behaviour of ISel to prefer certain patterns over others, based on some vague notion of complexity of the pattern.
It may work, but just having a set of mutually exclusive predicates is certain to work, and is very easy and obvious to understand, especially when the patters are next to each other.
https://github.com/llvm/llvm-project/pull/115709
More information about the llvm-commits
mailing list