[llvm] [AArch64] Fixup destructive floating-point precision conversions (PR #118788)

Thu Dec 5 03:08:34 PST 2024

https://github.com/SpencerAbson created https://github.com/llvm/llvm-project/pull/118788

This patch changes the zeroing forms of `FCVTXNT`, `FCVTNT`, and `BFCVTNT` such that their destination operand is also listed as a dag input. These narrowing down-conversions leave the even elements of the destination vector unchanged, regardless of the predicate type.

This patch also makes the merging form of `BFCVTNT` non-movprfx'able.

- `FCVTXNT` - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/FCVTXNT--Floating-point-down-convert--rounding-to-odd--top--predicated--?lang=en)
- `FCVTNT`    - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/FCVTNT--predicated---Floating-point-down-convert-and-narrow--top--predicated--?lang=en)
- `BFCVTNT` - [Arm Developer](https://developer.arm.com/documentation/ddi0602/2024-09/SVE-Instructions/BFCVTNT--Floating-point-down-convert-and-narrow-to-BFloat16--top--predicated--?lang=en)

>From 11e1bfed2d05c6a0762405518b50b5e300e6036a Mon Sep 17 00:00:00 2001
From: Spencer Abson <Spencer.Abson at arm.com>
Date: Wed, 4 Dec 2024 18:23:33 +0000
Subject: [PATCH] Make zeroing FCVT{XNT,NT} and BFCVTNT destructive

---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 16 ++--
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 92 ++++++-------------
 .../test/MC/AArch64/SVE/bfcvtnt-diagnostics.s | 11 ++-
 llvm/test/MC/AArch64/SVE/bfcvtnt.s            | 20 ----
 4 files changed, 48 insertions(+), 91 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index a15e89be1a24b2..b6cb9d54b84aa1 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -2455,8 +2455,9 @@ let Predicates = [HasBF16, HasSVEorSME] in {
   defm BFMLALT_ZZZ : sve2_fp_mla_long<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt>;
   defm BFMLALB_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b100, "bfmlalb", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalb_lane_v2>;
   defm BFMLALT_ZZZI : sve2_fp_mla_long_by_indexed_elem<0b101, "bfmlalt", nxv4f32, nxv8bf16, int_aarch64_sve_bfmlalt_lane_v2>;
-  defm BFCVT_ZPmZ   : sve_bfloat_convert<0b1, "bfcvt",   int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
-  defm BFCVTNT_ZPmZ : sve_bfloat_convert<0b0, "bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
+
+  defm BFCVT_ZPmZ   : sve_bfloat_convert<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2, AArch64fcvtr_mt>;
+  defm BFCVTNT_ZPmZ : sve_bfloat_convert_top<"bfcvtnt", int_aarch64_sve_fcvtnt_bf16f32_v2>;
 } // End HasBF16, HasSVEorSME
 
 let Predicates = [HasSVEorSME] in {
@@ -4268,17 +4269,16 @@ let Predicates = [HasSVE2p2orSME2p2] in {
   defm FCVT_ZPzZ : sve_fp_z2op_p_zd_b_0<"fcvt", "int_aarch64_sve_fcvt">;
 
   // SVE2p2 floating-point convert precision down (placing odd), zeroing predicate
-  defm FCVTNT_ZPzZ      : sve_fp_fcvtntz<"fcvtnt">;
-  def FCVTXNT_ZPzZ_DtoS : sve_fp_fcvt2z<0b0010, "fcvtxnt", ZPR32, ZPR64>;
+  defm FCVTNT_ZPzZ : sve2_fp_convert_down_narrow_z<"fcvtnt">;
+  def FCVTXNT_ZPzZ : sve2_fp_convert_precision<0b0010, 0b0, "fcvtxnt", ZPR32, ZPR64, /*destructive*/ true>;
   // Placing even
-  defm FCVTX_ZPzZ       : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
+  defm FCVTX_ZPzZ  : sve_fp_z2op_p_zd<"fcvtx", int_aarch64_sve_fcvtx_f32f64>;
 
   // SVE2p2 floating-point convert precision up, zeroing predicate
-  defm FCVTLT_ZPzZ      : sve_fp_fcvtltz<"fcvtlt", "int_aarch64_sve_fcvtlt">;
+  defm FCVTLT_ZPzZ : sve2_fp_convert_up_long_z<"fcvtlt", "int_aarch64_sve_fcvtlt">;
 
   // SVE2p2 floating-point convert single-to-bf (placing odd), zeroing predicate
-  def BFCVTNT_ZPzZ      : sve_fp_fcvt2z<0b1010, "bfcvtnt", ZPR16, ZPR32>;
-  // Placing corresponding
+  def BFCVTNT_ZPzZ      : sve2_fp_convert_precision<0b1010, 0b0, "bfcvtnt", ZPR16, ZPR32, /*destructive*/ true>;
   defm BFCVT_ZPzZ_StoH  : sve_fp_z2op_p_zd_bfcvt<"bfcvt", int_aarch64_sve_fcvt_bf16f32_v2>;
 
   // Floating-point convert to integer, zeroing predicate
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 828a048eaf6fb2..3e07048f03907c 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -2787,10 +2787,12 @@ multiclass sve_fp_fcadd<string asm, SDPatternOperator op> {
 // SVE2 Floating Point Convert Group
 //===----------------------------------------------------------------------===//
 
-class sve2_fp_convert_precision<bits<4> opc, string asm,
-                                ZPRRegOp zprty1, ZPRRegOp zprty2>
-: I<(outs zprty1:$Zd), (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
-  asm, "\t$Zd, $Pg/m, $Zn",
+class sve2_fp_convert_precision<bits<4> opc, bit merging, string asm,
+                                ZPRRegOp zprty1, ZPRRegOp zprty2, bit destructive=merging>
+: I<(outs zprty1:$Zd),
+  !if(destructive, (ins zprty1:$_Zd, PPR3bAny:$Pg, zprty2:$Zn),
+                   (ins PPR3bAny:$Pg, zprty2:$Zn)),
+  asm, "\t$Zd, " # !if(merging, "$Pg/m", "$Pg/z")  # ", $Zn",
   "",
   []>, Sched<[]> {
   bits<5> Zd;
@@ -2798,74 +2800,55 @@ class sve2_fp_convert_precision<bits<4> opc, string asm,
   bits<3> Pg;
   let Inst{31-24} = 0b01100100;
   let Inst{23-22} = opc{3-2};
-  let Inst{21-18} = 0b0010;
+  let Inst{21-20} = 0b00;
+  let Inst{19}    = merging;
+  let Inst{18}    = 0b0;
   let Inst{17-16} = opc{1-0};
   let Inst{15-13} = 0b101;
   let Inst{12-10} = Pg;
   let Inst{9-5}   = Zn;
   let Inst{4-0}   = Zd;
 
-  let Constraints = "$Zd = $_Zd";
+  let Constraints = !if(destructive, "$Zd = $_Zd", "");
   let hasSideEffects = 0;
   let mayRaiseFPException = 1;
 }
 
 multiclass sve2_fp_convert_down_narrow<string asm, string op> {
-  def _StoH : sve2_fp_convert_precision<0b1000, asm, ZPR16, ZPR32>;
-  def _DtoS : sve2_fp_convert_precision<0b1110, asm, ZPR32, ZPR64>;
+  def _StoH : sve2_fp_convert_precision<0b1000, 0b1, asm, ZPR16, ZPR32>;
+  def _DtoS : sve2_fp_convert_precision<0b1110, 0b1, asm, ZPR32, ZPR64>;
 
   def : SVE_3_Op_Pat<nxv8f16, !cast<SDPatternOperator>(op # _f16f32), nxv8f16, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _StoH)>;
   def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
 }
 
 multiclass sve2_fp_convert_up_long<string asm, string op> {
-  def _HtoS : sve2_fp_convert_precision<0b1001, asm, ZPR32, ZPR16>;
-  def _StoD : sve2_fp_convert_precision<0b1111, asm, ZPR64, ZPR32>;
+  def _HtoS : sve2_fp_convert_precision<0b1001, 0b1, asm, ZPR32, ZPR16>;
+  def _StoD : sve2_fp_convert_precision<0b1111, 0b1, asm, ZPR64, ZPR32>;
 
   def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
   def : SVE_3_Op_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
 }
 
 multiclass sve2_fp_convert_down_odd_rounding_top<string asm, string op> {
-  def _DtoS : sve2_fp_convert_precision<0b0010, asm, ZPR32, ZPR64>;
+  def _DtoS : sve2_fp_convert_precision<0b0010, 0b1, asm, ZPR32, ZPR64>;
 
   def : SVE_3_Op_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f64), nxv4f32, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoS)>;
 }
 
-class sve_fp_fcvt2z<bits<4> opc, string asm, ZPRRegOp zprty1,
-                    ZPRRegOp zprty2>
-  : I<(outs zprty1:$Zd), (ins PPR3bAny:$Pg, zprty2:$Zn),
-    asm, "\t$Zd, $Pg/z, $Zn",
-    "",
-    []>, Sched<[]> {
-    bits<5> Zd;
-    bits<5> Zn;
-    bits<3> Pg;
-    let Inst{31-24} = 0b01100100;
-    let Inst{23-22} = opc{3-2};
-    let Inst{21-18} = 0b0000;
-    let Inst{17-16} = opc{1-0};
-    let Inst{15-13} = 0b101;
-    let Inst{12-10} = Pg;
-    let Inst{9-5}   = Zn;
-    let Inst{4-0}   = Zd;
-    let hasSideEffects = 0;
-    let mayRaiseFPException = 1;
-}
-
-multiclass sve_fp_fcvtntz<string asm> {
-  def _StoH : sve_fp_fcvt2z<0b1000, asm,  ZPR16, ZPR32>;
-  def _DtoS : sve_fp_fcvt2z<0b1110, asm,  ZPR32, ZPR64>;
-}
-
-multiclass sve_fp_fcvtltz<string asm, string op> {
-  def _HtoS  : sve_fp_fcvt2z<0b1001, asm,  ZPR32, ZPR16>;
-  def _StoD  : sve_fp_fcvt2z<0b1111, asm,  ZPR64, ZPR32>;
+multiclass sve2_fp_convert_up_long_z<string asm, string op> {
+  def _HtoS : sve2_fp_convert_precision<0b1001, 0b0, asm, ZPR32, ZPR16>;
+  def _StoD : sve2_fp_convert_precision<0b1111, 0b0, asm, ZPR64, ZPR32>;
 
   def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(op # _f32f16), nxv4f32, nxv4i1, nxv8f16, !cast<Instruction>(NAME # _HtoS)>;
   def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(op # _f64f32), nxv2f64, nxv2i1, nxv4f32, !cast<Instruction>(NAME # _StoD)>;
 }
 
+multiclass sve2_fp_convert_down_narrow_z<string asm> {
+  def _StoH : sve2_fp_convert_precision<0b1000, 0b0, asm,  ZPR16, ZPR32, /*destructive*/ true>;
+  def _DtoS : sve2_fp_convert_precision<0b1110, 0b0, asm,  ZPR32, ZPR64, /*destructive*/ true>;
+}
+
 //===----------------------------------------------------------------------===//
 // SVE2 Floating Point Pairwise Group
 //===----------------------------------------------------------------------===//
@@ -9296,33 +9279,18 @@ multiclass sve_float_dot_indexed<bit bf, bits<2> opc, ZPRRegOp src1_ty,
   def : SVE_4_Op_Imm_Pat<nxv4f32, op, nxv4f32, InVT, InVT, i32, VectorIndexS32b_timm, !cast<Instruction>(NAME)>;
 }
 
-class sve_bfloat_convert<bit N, string asm>
-: I<(outs ZPR16:$Zd), (ins ZPR16:$_Zd, PPR3bAny:$Pg, ZPR32:$Zn),
-  asm, "\t$Zd, $Pg/m, $Zn", "", []>, Sched<[]> {
-  bits<5> Zd;
-  bits<3> Pg;
-  bits<5> Zn;
-  let Inst{31-25} = 0b0110010;
-  let Inst{24}    = N;
-  let Inst{23-13} = 0b10001010101;
-  let Inst{12-10} = Pg;
-  let Inst{9-5}   = Zn;
-  let Inst{4-0}   = Zd;
+multiclass sve_bfloat_convert<string asm, SDPatternOperator op, SDPatternOperator ir_op> {
+  def NAME : sve_fp_2op_p_zd<0b1001010, asm, ZPR32, ZPR16, ElementSizeS>;
 
-  let Constraints = "$Zd = $_Zd";
-  let DestructiveInstType = DestructiveOther;
-  let ElementSize = ElementSizeS;
-  let hasSideEffects = 0;
-  let mayRaiseFPException = 1;
+  def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+  def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
+  def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
 }
 
-multiclass sve_bfloat_convert<bit N, string asm, SDPatternOperator op,
-                              SDPatternOperator ir_op = null_frag> {
-  def NAME : sve_bfloat_convert<N, asm>;
+multiclass sve_bfloat_convert_top<string asm,  SDPatternOperator op> {
+  def NAME : sve2_fp_convert_precision<0b1010, 0b1, asm, ZPR16, ZPR32>;
 
   def : SVE_3_Op_Pat<nxv8bf16, op, nxv8bf16, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
-  def : SVE_1_Op_Passthru_Round_Pat<nxv4bf16, ir_op, nxv4i1, nxv4f32, !cast<Instruction>(NAME)>;
-  def : SVE_1_Op_Passthru_Round_Pat<nxv2bf16, ir_op, nxv2i1, nxv2f32, !cast<Instruction>(NAME)>;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s b/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
index d21a555ff87c60..644fe82ab9409a 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvtnt-diagnostics.s
@@ -20,8 +20,17 @@ bfcvtnt z0.h, p8/m, z1.s
 // CHECK-NEXT: bfcvtnt z0.h, p8/m, z1.s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
 
+// --------------------------------------------------------------------------//
+// Negative tests for instructions that are incompatible with movprfx
+
 movprfx z0.h, p0/m, z7.h
 bfcvtnt z0.h, p0/m, z1.s
-// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a predicated movprfx with a different element size
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
 // CHECK-NEXT: bfcvtnt z0.h, p0/m, z1.s
 // CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
+
+movprfx z0, z7
+bfcvtnt z0.h, p7/m, z1.s
+// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: instruction is unpredictable when following a movprfx, suggest replacing movprfx with mov
+// CHECK-NEXT: bfcvtnt z0.h, p7/m, z1.s
+// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
\ No newline at end of file
diff --git a/llvm/test/MC/AArch64/SVE/bfcvtnt.s b/llvm/test/MC/AArch64/SVE/bfcvtnt.s
index 5f3b71e28b91e0..b374a27ecfb9ab 100644
--- a/llvm/test/MC/AArch64/SVE/bfcvtnt.s
+++ b/llvm/test/MC/AArch64/SVE/bfcvtnt.s
@@ -9,23 +9,3 @@ bfcvtnt z0.H, p0/m, z1.S
 // CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
 // CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
 // CHECK-ERROR: instruction requires: bf16 sve or sme
-
-movprfx z0.S, p0/m, z2.S
-// CHECK-INST: movprfx z0.s, p0/m, z2.s
-// CHECK-ENCODING: [0x40,0x20,0x91,0x04]
-// CHECK-ERROR: instruction requires: sve or sme
-
-bfcvtnt z0.H, p0/m, z1.S
-// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
-// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
-// CHECK-ERROR: instruction requires: bf16 sve or sme
-
-movprfx z0, z2
-// CHECK-INST: movprfx z0, z2
-// CHECK-ENCODING: [0x40,0xbc,0x20,0x04]
-// CHECK-ERROR: instruction requires: sve or sme
-
-bfcvtnt z0.H, p0/m, z1.S
-// CHECK-INST: bfcvtnt z0.h, p0/m, z1.s
-// CHECK-ENCODING: [0x20,0xa0,0x8a,0x64]
-// CHECK-ERROR: instruction requires: bf16 sve or sme