[llvm] [AArch64] Generate zeroing forms of certain SVE2.2 instructions (5/11) (PR #116831)

Thu Dec 12 02:06:06 PST 2024

================
@@ -3312,14 +3312,23 @@ multiclass sve_fp_z2op_p_zd_d<bit U, string asm, string int_op, SDPatternOperato
   def : SVE_1_Op_PassthruUndefZero_Pat<nxv2i64, ir_op, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _DtoD)>;
 }
 
-multiclass sve_fp_z2op_p_zd_c<bit U, string asm> {
+multiclass sve_fp_z2op_p_zd_c<bit U, string asm, string int_op, SDPatternOperator ir_op> {
   def _HtoH : sve_fp_z2op_p_zd<{ 0b011001, U }, asm, ZPR16, ZPR16>;
   def _StoH : sve_fp_z2op_p_zd<{ 0b011010, U }, asm, ZPR32, ZPR16>;
   def _StoS : sve_fp_z2op_p_zd<{ 0b101010, U }, asm, ZPR32, ZPR32>;
   def _StoD : sve_fp_z2op_p_zd<{ 0b111000, U }, asm, ZPR32, ZPR64>;
   def _DtoS : sve_fp_z2op_p_zd<{ 0b111010, U }, asm, ZPR64, ZPR32>;
   def _DtoH : sve_fp_z2op_p_zd<{ 0b011011, U }, asm, ZPR64, ZPR16>;
   def _DtoD : sve_fp_z2op_p_zd<{ 0b111011, U }, asm, ZPR64, ZPR64>;
+
+  def : SVE_3_Op_UndefZero_Pat<nxv4f32, !cast<SDPatternOperator>(int_op # _f32i64), nxv4f32, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoS)>;
+  def : SVE_3_Op_UndefZero_Pat<nxv2f64, !cast<SDPatternOperator>(int_op # _f64i32), nxv2f64, nxv2i1, nxv4i32, !cast<Instruction>(NAME # _StoD)>;
+  def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i32), nxv8f16, nxv4i1, nxv4i32, !cast<Instruction>(NAME # _StoH)>;
+  def : SVE_3_Op_UndefZero_Pat<nxv8f16, !cast<SDPatternOperator>(int_op # _f16i64), nxv8f16, nxv2i1, nxv2i64, !cast<Instruction>(NAME # _DtoH)>;
+
+  def : SVE_1_Op_PassthruUndefZero_Pat<nxv8f16, ir_op, nxv8i1,nxv8i16, !cast<Instruction>(NAME # _HtoH)>;
----------------
SpencerAbson wrote:

> ` dag PatternToMatch = (nxv8f16 (AArch64scvtf_mt (nxv8i1 (SVEAllActive:Op1)), nxv8i16:$Op2, nxv8f16:$Op3));`

This stream of work seems to have assumed that something like `instCombineSVEAllActiveReplace` has taken care of this already, or am I misunderstanding something?

E.g, if we run llc directly on:
```
define <vscale x 4 x float> @test(<vscale x 4 x float> %y, <vscale x 4 x i32> %x) #0 {
entry:
  %0 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  %1 = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtf.nxv4f32.nxv4i32(<vscale x 4 x float> %y, <vscale x 4 x i1> %0, <vscale x 4 x i32> %x)
  ret <vscale x 4 x float> %1
}
attributes #0 = {"target-features"="+sve2p2"}
```
We'd still get 
```
	ptrue	p0.s
	movprfx	z0, z1
	scvtf	z0.s, p0/m, z1.s
	ret
```


https://github.com/llvm/llvm-project/pull/116831