[llvm] [AArch64] Fixup destructive floating-point precision conversions (PR #118788)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 03:08:34 PST 2024
https://github.com/SpencerAbson created https://github.com/llvm/llvm-project/pull/118788
This patch changes the zeroing forms of `FCVTXNT`, `FCVTNT`, and `BFCVTNT` such that their destination operand is also listed as a dag input. These narrowing down-conversions leave the even elements of the destination vector unchanged, regardless of the predicate type.
This patch also makes the merging form of `BFCVTNT` non-movprfx'able.
- `FCVTXNT` - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/FCVTXNT--Floating-point-down-convert--rounding-to-odd--top--predicated--?lang=en)
- `FCVTNT` - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/FCVTNT--predicated---Floating-point-down-convert-and-narrow--top--predicated--?lang=en)
- `BFCVTNT` - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/BFCVTNT--Floating-point-down-convert-and-narrow-to-BFloat16--top--predicated--?lang=en)
>From 11e1bfed2d05c6a0762405518b50b5e300e6036a Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 4 Dec 2024 18:23:33 +0000
Subject: [PATCH] Make zeroing FCVT{XNT,NT} and BFCVTNT destructive
---
.../lib/Target/AArch64/AArch64SVEInstrInfo.td | 16 ++--
llvm/lib/Target/AArch64/SVEInstrFormats.td | 92 ++++++-------------
.../test/MC/AArch64/SVE/bfcvtnt-diagnostics.s | 11 ++-
llvm/test/MC/AArch64/SVE/bfcvtnt.s | 20 ----
4 files changed, 48 insertions(+), 91 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a15e89be1a24b2..b6cb9d54b84aa1 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2455,8 +2455,9 @@ let Predicates = [HasBF16, HasSVEorSME] in {
defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
- defm BFCVT_ZPmZ : sve_bfloat_convert<0b1, "bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
- defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
+
+ defm BFCVT_ZPmZ : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
+ defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
} // End HasBF16, HasSVEorSME
let Predicates = [HasSVEorSME] in {
@@ -4268,17 +4269,16 @@ let Predicates = [HasSVE2p2orSME2p2] in {
defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
// SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
- defm FCVTNT_ZPzZ : sve_fp_fcvtntz<"fcvtnt">;
- def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>;
+ defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
+ def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>;
// Placing even
- defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
+ defm FCVTX_ZPzZ : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
// SVE2p2 floating-point convert precision up, zeroing predicate
- defm FCVTLT_ZPzZ : sve_fp_fcvtltz<"fcvtlt", "int_aarch64_sve_fcvtlt">;
+ defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">;
// SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
- def BFCVTNT_ZPzZ : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
- // Placing corresponding
+ def BFCVTNT_ZPzZ : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>;
defm BFCVT_ZPzZ_StoH : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
// Floating-point convert to integer, zeroing predicate
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 828a048eaf6fb2..3e07048f03907c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2787,10 +2787,12 @@ multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
// SVE2 Floating Point Convert Group
//===----------------------------------------------------------------------===//
-class sve2_fp_convert_precision<bits<4> opc, string asm,
- ZPRRegOp zprty1, ZPRRegOp zprty2>
-: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
- asm, "\t$Zd, $Pg/m, $Zn",
+class sve2_fp_convert_precision<bits<4> opc, bit merging, string asm,
+ ZPRRegOp zprty1, ZPRRegOp zprty2, bit destructive=merging>
+: I<(outs zprty1:$Zd),
+ !if(destructive, (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
+ (ins PPR3bAny:$Pg, zprty2:$Zn)),
+ asm, "\t$Zd, " # !if(merging, "$Pg/m", "$Pg/z") # ", $Zn",
"",
[]>, Sched<[]> {
bits<5> Zd;
@@ -2798,74 +2800,55 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
bits<3> Pg;
let Inst{31-24} = 0b01100100;
let Inst{23-22} = opc{3-2};
- let Inst{21-18} = 0b0010;
+ let Inst{21-20} = 0b00;
+ let Inst{19} = merging;
+ let Inst{18} = 0b0;
let Inst{17-16} = opc{1-0};
let Inst{15-13} = 0b101;
let Inst{12-10} = Pg;
let Inst{9-5} = Zn;
let Inst{4-0} = Zd;
- let Constraints = "$Zd = $_Zd";
+ let Constraints = !if(destructive, "$Zd = $_Zd", "");
let hasSideEffects = 0;
let mayRaiseFPException = 1;
}
multiclass sve2_fp_convert_down_narrow<string asm, string op> {
- def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
- def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+ def _StoH : sve2_fp_convert_precision<0b1000, 0b1, asm, ZPR16, ZPR32>;
+ def _DtoS : sve2_fp_convert_precision<0b1110, 0b1, asm, ZPR32, ZPR64>;
def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
multiclass sve2_fp_convert_up_long<string asm, string op> {
- def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
- def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+ def _HtoS : sve2_fp_convert_precision<0b1001, 0b1, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_fp_convert_precision<0b1111, 0b1, asm, ZPR64, ZPR32>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
- def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+ def _DtoS : sve2_fp_convert_precision<0b0010, 0b1, asm, ZPR32, ZPR64>;
def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
}
-class sve_fp_fcvt2z<bits<4> opc, string asm, ZPRRegOp zprty1,
- ZPRRegOp zprty2>
- : I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
- asm, "\t$Zd, $Pg/z, $Zn",
- "",
- []>, Sched<[]> {
- bits<5> Zd;
- bits<5> Zn;
- bits<3> Pg;
- let Inst{31-24} = 0b01100100;
- let Inst{23-22} = opc{3-2};
- let Inst{21-18} = 0b0000;
- let Inst{17-16} = opc{1-0};
- let Inst{15-13} = 0b101;
- let Inst{12-10} = Pg;
- let Inst{9-5} = Zn;
- let Inst{4-0} = Zd;
- let hasSideEffects = 0;
- let mayRaiseFPException = 1;
-}
-
-multiclass sve_fp_fcvtntz<string asm> {
- def _StoH : sve_fp_fcvt2z<0b1000, asm, ZPR16, ZPR32>;
- def _DtoS : sve_fp_fcvt2z<0b1110, asm, ZPR32, ZPR64>;
-}
-
-multiclass sve_fp_fcvtltz<string asm, string op> {
- def _HtoS : sve_fp_fcvt2z<0b1001, asm, ZPR32, ZPR16>;
- def _StoD : sve_fp_fcvt2z<0b1111, asm, ZPR64, ZPR32>;
+multiclass sve2_fp_convert_up_long_z<string asm, string op> {
+ def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>;
+ def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>;
def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
}
+multiclass sve2_fp_convert_down_narrow_z<string asm> {
+ def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm, ZPR16, ZPR32, /*destructive*/ true>;
+ def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm, ZPR32, ZPR64, /*destructive*/ true>;
+}
+
//===----------------------------------------------------------------------===//
// SVE2 Floating Point Pairwise Group
//===----------------------------------------------------------------------===//
@@ -9296,33 +9279,18 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
}
-class sve_bfloat_convert<bit N, string asm>
-: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
- asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
- bits<5> Zd;
- bits<3> Pg;
- bits<5> Zn;
- let Inst{31-25} = 0b0110010;
- let Inst{24} = N;
- let Inst{23-13} = 0b10001010101;
- let Inst{12-10} = Pg;
- let Inst{9-5} = Zn;
- let Inst{4-0} = Zd;
+multiclass sve_bfloat_convert<string asm, SDPatternOperator op, SDPatternOperator ir_op> {
+ def NAME : sve_fp_2op_p_zd<0b1001010, asm, ZPR32, ZPR16, ElementSizeS>;
- let Constraints = "$Zd = $_Zd";
- let DestructiveInstType = DestructiveOther;
- let ElementSize = ElementSizeS;
- let hasSideEffects = 0;
- let mayRaiseFPException = 1;
+ def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+ def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
}
-multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
- SDPatternOperator ir_op = null_frag> {
- def NAME : sve_bfloat_convert<N, asm>;
+multiclass sve_bfloat_convert_top<string asm, SDPatternOperator op> {
+ def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
- def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
- def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s b/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
index d21a555ff87c60..644fe82ab9409a 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
@@ -20,8 +20,17 @@ bfcvtnt z0.h, p8/m, z1.s
// CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
movprfx z0.h, p0/m, z7.h
bfcvtnt z0.h, p0/m, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
// CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+bfcvtnt z0.h, p7/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bfcvtnt z0.h, p7/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt.s b/llvm/test/MC/AArch64/SVE/bfcvtnt.s
index 5f3b71e28b91e0..b374a27ecfb9ab 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvtnt.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvtnt.s
@@ -9,23 +9,3 @@ bfcvtnt z0.H, p0/m, z1.S
// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
// CHECK-ERROR: instruction requires: bf16 sve or sme
-
-movprfx z0.S, p0/m, z2.S
-// CHECK-INST: movprfx z0.s, p0/m, z2.s
-// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
-// CHECK-ERROR: instruction requires: sve or sme
-
-bfcvtnt z0.H, p0/m, z1.S
-// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
-// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
-// CHECK-ERROR: instruction requires: bf16 sve or sme
-
-movprfx z0, z2
-// CHECK-INST: movprfx z0, z2
-// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
-// CHECK-ERROR: instruction requires: sve or sme
-
-bfcvtnt z0.H, p0/m, z1.S
-// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
-// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
-// CHECK-ERROR: instruction requires: bf16 sve or sme
More information about the llvm-commits
mailing list