[llvm] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) -> scvtf(x, 2) (PR #141480)

Mon Jun 16 12:18:24 PDT 2025

================
@@ -5407,6 +5433,102 @@ class BaseIntegerToFPUnscaled<bits<2> rmode, bits<3> opcode,
   let Inst{4-0}   = Rd;
 }
 
+multiclass IntegerToFPVector<
+    bits<2> rmode, bits<3> opcode, string asm, RegisterClass srcRegClass,
+    RegisterClass dstRegClass, Operand imm_op, bits<1> q, bits<2> size,
+    bits<2> srcElemTypeBits, list<Predicate> preds> {
+
+  def _V : BaseIntegerToFP<rmode, opcode, srcRegClass, dstRegClass, imm_op,
+                           asm, []> {
+    let Inst{30} = q;
+    let Inst{23 -22} = size;
+    let Inst{18 -16} = 0b001;
+    let Inst{11 -10} = srcElemTypeBits;
+    let Predicates = preds;
+  }
+}
+
+// SCVTF (Signed Convert To Floating-Point) from Vector 32-bit Integer (vNi32)
+// defm SCVTFv2f16_v2i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+//                                     FPR64, FPR64,
+//                                     fixedpoint_recip_v2f16_v2i32,
+//                                     0, 0b00, 0b10, [HasFullFP16]>;
+
+// defm SCVTFv4f16_v4i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+//                                     FPR128, FPR128,
+//                                     fixedpoint_recip_v4f16_v4i32,
+//                                     1, 0b00, 0b10, [HasFullFP16]>;
+
+// defm SCVTFv8f16_v8i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+//                                     FPR128, FPR128,
+//                                     fixedpoint_recip_v8f16_v8i32,
+//                                     1, 0b00, 0b10, [HasFullFP16]>;
+
+defm SCVTFv2f32_v2i32
+    : IntegerToFPVector<0b00, 0b010, "scvtf", FPR64, FPR64,
+                        fixedpoint_recip_v2f32_v2i32, 0, 0b01, 0b10, []>;
+
+defm SCVTFv4f32_v4i32
+    : IntegerToFPVector<0b00, 0b010, "scvtf", FPR128, FPR128,
+                        fixedpoint_recip_v4f32_v4i32, 1, 0b01, 0b10, []>;
+
+// SCVTF (Signed Convert To Floating-Point) from Vector 64-bit Integer (vNi64)
+// defm SCVTFv2f16_v2i64 : IntegerToFPVector<0b00, 0b010, "scvtf",
+//                                     FPR128, FPR128,
+//                                     fixedpoint_recip_v2f16_v2i64,
+//                                     1, 0b00, 0b11, [HasFullFP16]>;
+
+// defm SCVTFv2f32_v2i64 : IntegerToFPVector<0b00, 0b010, "scvtf",
+//                                     FPR128, FPR128,
+//                                     fixedpoint_recip_v2f32_v2i64,
+//                                     1, 0b01, 0b11, []>;
+
+defm SCVTFv2f64_v2i64
+    : IntegerToFPVector<0b00, 0b010, "scvtf", FPR128, FPR128,
+                        fixedpoint_recip_v2f64_v2i64, 1, 0b10, 0b11, []>;
+
+// def : Pat<
+//   (fmul (sint_to_fp (v2i32 V64:$Rn)),
+//         fixedpoint_recip_v2f32_v2i32:$scale),
+//   (SCVTFv2f16_v2i32_V V64:$Rn, fixedpoint_recip_v2f32_v2i32:$scale)
+// >;
+
+// def : Pat<
+//   (fmul (sint_to_fp (v4i32 FPR128:$Rn)),
+//         fixedpoint_recip_v4f16_v4i32:$scale),
+//   (SCVTFv4f16_v4i32_V FPR128:$Rn, fixedpoint_recip_v4f16_v4i32:$scale)
+// >;
+
+// def : Pat<
+//   (fmul (sint_to_fp (v8i32 FPR128:$Rn)),
+//         fixedpoint_recip_v8f16_v8i32:$scale),
+//   (SCVTFv8f16_v8i32_V FPR128:$Rn, fixedpoint_recip_v8f16_v8i32:$scale)
+// >;
+
+def : Pat<(fmul(sint_to_fp(v2i32 V64:$Rn)),
+              fixedpoint_recip_v2f32_v2i32:$scale),
+          (SCVTFv2f32_v2i32_V V64:$Rn, fixedpoint_recip_v2f32_v2i32:$scale)>;
+
+def : Pat<(fmul(sint_to_fp(v4i32 FPR128:$Rn)),
+              fixedpoint_recip_v4f32_v4i32:$scale),
+          (SCVTFv4f32_v4i32_V FPR128:$Rn, fixedpoint_recip_v4f32_v4i32:$scale)>;
+
+// def : Pat<
+//   (fmul (sint_to_fp (v2i64 FPR128:$Rn)),
+//         fixedpoint_recip_v2f16_v2i64:$scale),
+//   (SCVTFv2f16_v2i64_V FPR128:$Rn, fixedpoint_recip_v2f16_v2i64:$scale)
+// >;
+
+// def : Pat<
+//   (fmul (sint_to_fp (v2i64 FPR128:$Rn)),
+//         fixedpoint_recip_v2f32_v2i64:$scale),
+//   (SCVTFv2f32_v2i64_V FPR128:$Rn, fixedpoint_recip_v2f32_v2i64:$scale)
+// >;
+
+def : Pat<(fmul(sint_to_fp(v2i64 FPR128:$Rn)),
----------------
jph-13 wrote:

OK, the error was not with the test but rather with the launch command I had for debugging. I repaired that now, sorry for the silly error. 
```
// %bb.0:                               // %entry
        movi.4h v1, #52, lsl #8
        scvtf.4h        v0, v0
        fmul.4h v0, v0, v1
        ret
.Lfunc_end0:
```
Now I think it is close but I still fail to extract the constant for the f16 cases. Using the patterns above it does match. The op comes in as an `AArch64ISD::NVCAST` which then has a 0 operand that is an `AArch64ISD::MOVIshift`. So I seem to be too deep to figure out if it is a splat or not.

https://github.com/llvm/llvm-project/pull/141480