[llvm] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) -> scvtf(x, 2) (PR #141480)
JP Hafer via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 16 12:18:24 PDT 2025
================
@@ -5407,6 +5433,102 @@ class BaseIntegerToFPUnscaled<bits<2> rmode, bits<3> opcode,
let Inst{4-0} = Rd;
}
+multiclass IntegerToFPVector<
+ bits<2> rmode, bits<3> opcode, string asm, RegisterClass srcRegClass,
+ RegisterClass dstRegClass, Operand imm_op, bits<1> q, bits<2> size,
+ bits<2> srcElemTypeBits, list<Predicate> preds> {
+
+ def _V : BaseIntegerToFP<rmode, opcode, srcRegClass, dstRegClass, imm_op,
+ asm, []> {
+ let Inst{30} = q;
+ let Inst{23 -22} = size;
+ let Inst{18 -16} = 0b001;
+ let Inst{11 -10} = srcElemTypeBits;
+ let Predicates = preds;
+ }
+}
+
+// SCVTF (Signed Convert To Floating-Point) from Vector 32-bit Integer (vNi32)
+// defm SCVTFv2f16_v2i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR64, FPR64,
+// fixedpoint_recip_v2f16_v2i32,
+// 0, 0b00, 0b10, [HasFullFP16]>;
+
+// defm SCVTFv4f16_v4i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR128, FPR128,
+// fixedpoint_recip_v4f16_v4i32,
+// 1, 0b00, 0b10, [HasFullFP16]>;
+
+// defm SCVTFv8f16_v8i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR128, FPR128,
+// fixedpoint_recip_v8f16_v8i32,
+// 1, 0b00, 0b10, [HasFullFP16]>;
+
+defm SCVTFv2f32_v2i32
+ : IntegerToFPVector<0b00, 0b010, "scvtf", FPR64, FPR64,
+ fixedpoint_recip_v2f32_v2i32, 0, 0b01, 0b10, []>;
+
+defm SCVTFv4f32_v4i32
+ : IntegerToFPVector<0b00, 0b010, "scvtf", FPR128, FPR128,
+ fixedpoint_recip_v4f32_v4i32, 1, 0b01, 0b10, []>;
+
+// SCVTF (Signed Convert To Floating-Point) from Vector 64-bit Integer (vNi64)
+// defm SCVTFv2f16_v2i64 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR128, FPR128,
+// fixedpoint_recip_v2f16_v2i64,
+// 1, 0b00, 0b11, [HasFullFP16]>;
+
+// defm SCVTFv2f32_v2i64 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR128, FPR128,
+// fixedpoint_recip_v2f32_v2i64,
+// 1, 0b01, 0b11, []>;
+
+defm SCVTFv2f64_v2i64
+ : IntegerToFPVector<0b00, 0b010, "scvtf", FPR128, FPR128,
+ fixedpoint_recip_v2f64_v2i64, 1, 0b10, 0b11, []>;
+
+// def : Pat<
+// (fmul (sint_to_fp (v2i32 V64:$Rn)),
+// fixedpoint_recip_v2f32_v2i32:$scale),
+// (SCVTFv2f16_v2i32_V V64:$Rn, fixedpoint_recip_v2f32_v2i32:$scale)
+// >;
+
+// def : Pat<
+// (fmul (sint_to_fp (v4i32 FPR128:$Rn)),
+// fixedpoint_recip_v4f16_v4i32:$scale),
+// (SCVTFv4f16_v4i32_V FPR128:$Rn, fixedpoint_recip_v4f16_v4i32:$scale)
+// >;
+
+// def : Pat<
+// (fmul (sint_to_fp (v8i32 FPR128:$Rn)),
+// fixedpoint_recip_v8f16_v8i32:$scale),
+// (SCVTFv8f16_v8i32_V FPR128:$Rn, fixedpoint_recip_v8f16_v8i32:$scale)
+// >;
+
+def : Pat<(fmul(sint_to_fp(v2i32 V64:$Rn)),
+ fixedpoint_recip_v2f32_v2i32:$scale),
+ (SCVTFv2f32_v2i32_V V64:$Rn, fixedpoint_recip_v2f32_v2i32:$scale)>;
+
+def : Pat<(fmul(sint_to_fp(v4i32 FPR128:$Rn)),
+ fixedpoint_recip_v4f32_v4i32:$scale),
+ (SCVTFv4f32_v4i32_V FPR128:$Rn, fixedpoint_recip_v4f32_v4i32:$scale)>;
+
+// def : Pat<
+// (fmul (sint_to_fp (v2i64 FPR128:$Rn)),
+// fixedpoint_recip_v2f16_v2i64:$scale),
+// (SCVTFv2f16_v2i64_V FPR128:$Rn, fixedpoint_recip_v2f16_v2i64:$scale)
+// >;
+
+// def : Pat<
+// (fmul (sint_to_fp (v2i64 FPR128:$Rn)),
+// fixedpoint_recip_v2f32_v2i64:$scale),
+// (SCVTFv2f32_v2i64_V FPR128:$Rn, fixedpoint_recip_v2f32_v2i64:$scale)
+// >;
+
+def : Pat<(fmul(sint_to_fp(v2i64 FPR128:$Rn)),
----------------
jph-13 wrote:
OK, the error was not with the test but rather with the launch command I had for debugging. I repaired that now, sorry for the silly error.
```
// %bb.0: // %entry
movi.4h v1, #52, lsl #8
scvtf.4h v0, v0
fmul.4h v0, v0, v1
ret
.Lfunc_end0:
```
Now I think it is close but I still fail to extract the constant for the f16 cases. Using the patterns above it does match. The op comes in as an `AArch64ISD::NVCAST` which then has a 0 operand that is an `AArch64ISD::MOVIshift`. So I seem to be too deep to figure out if it is a splat or not.
https://github.com/llvm/llvm-project/pull/141480
More information about the llvm-commits
mailing list