[llvm] [AArch64] Fix #94909: Optimize vector fmul(sitofp(x), 0.5) -> scvtf(x, 2) (PR #141480)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 10 14:52:17 PDT 2025
================
@@ -5407,6 +5433,102 @@ class BaseIntegerToFPUnscaled<bits<2> rmode, bits<3> opcode,
let Inst{4-0} = Rd;
}
+multiclass IntegerToFPVector<
+ bits<2> rmode, bits<3> opcode, string asm, RegisterClass srcRegClass,
+ RegisterClass dstRegClass, Operand imm_op, bits<1> q, bits<2> size,
+ bits<2> srcElemTypeBits, list<Predicate> preds> {
+
+ def _V : BaseIntegerToFP<rmode, opcode, srcRegClass, dstRegClass, imm_op,
+ asm, []> {
+ let Inst{30} = q;
+ let Inst{23 -22} = size;
+ let Inst{18 -16} = 0b001;
+ let Inst{11 -10} = srcElemTypeBits;
+ let Predicates = preds;
+ }
+}
+
+// SCVTF (Signed Convert To Floating-Point) from Vector 32-bit Integer (vNi32)
+// defm SCVTFv2f16_v2i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR64, FPR64,
+// fixedpoint_recip_v2f16_v2i32,
+// 0, 0b00, 0b10, [HasFullFP16]>;
+
+// defm SCVTFv4f16_v4i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR128, FPR128,
+// fixedpoint_recip_v4f16_v4i32,
+// 1, 0b00, 0b10, [HasFullFP16]>;
+
+// defm SCVTFv8f16_v8i32 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR128, FPR128,
+// fixedpoint_recip_v8f16_v8i32,
+// 1, 0b00, 0b10, [HasFullFP16]>;
+
+defm SCVTFv2f32_v2i32
+ : IntegerToFPVector<0b00, 0b010, "scvtf", FPR64, FPR64,
+ fixedpoint_recip_v2f32_v2i32, 0, 0b01, 0b10, []>;
+
+defm SCVTFv4f32_v4i32
+ : IntegerToFPVector<0b00, 0b010, "scvtf", FPR128, FPR128,
+ fixedpoint_recip_v4f32_v4i32, 1, 0b01, 0b10, []>;
+
+// SCVTF (Signed Convert To Floating-Point) from Vector 64-bit Integer (vNi64)
+// defm SCVTFv2f16_v2i64 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR128, FPR128,
+// fixedpoint_recip_v2f16_v2i64,
+// 1, 0b00, 0b11, [HasFullFP16]>;
+
+// defm SCVTFv2f32_v2i64 : IntegerToFPVector<0b00, 0b010, "scvtf",
+// FPR128, FPR128,
+// fixedpoint_recip_v2f32_v2i64,
+// 1, 0b01, 0b11, []>;
+
+defm SCVTFv2f64_v2i64
+ : IntegerToFPVector<0b00, 0b010, "scvtf", FPR128, FPR128,
+ fixedpoint_recip_v2f64_v2i64, 1, 0b10, 0b11, []>;
+
+// def : Pat<
+// (fmul (sint_to_fp (v2i32 V64:$Rn)),
+// fixedpoint_recip_v2f32_v2i32:$scale),
+// (SCVTFv2f16_v2i32_V V64:$Rn, fixedpoint_recip_v2f32_v2i32:$scale)
+// >;
+
+// def : Pat<
+// (fmul (sint_to_fp (v4i32 FPR128:$Rn)),
+// fixedpoint_recip_v4f16_v4i32:$scale),
+// (SCVTFv4f16_v4i32_V FPR128:$Rn, fixedpoint_recip_v4f16_v4i32:$scale)
+// >;
+
+// def : Pat<
+// (fmul (sint_to_fp (v8i32 FPR128:$Rn)),
+// fixedpoint_recip_v8f16_v8i32:$scale),
+// (SCVTFv8f16_v8i32_V FPR128:$Rn, fixedpoint_recip_v8f16_v8i32:$scale)
+// >;
+
+def : Pat<(fmul(sint_to_fp(v2i32 V64:$Rn)),
+ fixedpoint_recip_v2f32_v2i32:$scale),
+ (SCVTFv2f32_v2i32_V V64:$Rn, fixedpoint_recip_v2f32_v2i32:$scale)>;
+
+def : Pat<(fmul(sint_to_fp(v4i32 FPR128:$Rn)),
+ fixedpoint_recip_v4f32_v4i32:$scale),
+ (SCVTFv4f32_v4i32_V FPR128:$Rn, fixedpoint_recip_v4f32_v4i32:$scale)>;
+
+// def : Pat<
+// (fmul (sint_to_fp (v2i64 FPR128:$Rn)),
+// fixedpoint_recip_v2f16_v2i64:$scale),
+// (SCVTFv2f16_v2i64_V FPR128:$Rn, fixedpoint_recip_v2f16_v2i64:$scale)
+// >;
+
+// def : Pat<
+// (fmul (sint_to_fp (v2i64 FPR128:$Rn)),
+// fixedpoint_recip_v2f32_v2i64:$scale),
+// (SCVTFv2f32_v2i64_V FPR128:$Rn, fixedpoint_recip_v2f32_v2i64:$scale)
+// >;
+
+def : Pat<(fmul(sint_to_fp(v2i64 FPR128:$Rn)),
----------------
davemgreen wrote:
When you get this far, the tablegen pattern is a pretty gnarly as it needs to convert match the v2f32 $scale, but use that result as a i32 in the instruction. I'm not sure if there is a better way, but it might need a SDNodeXForm to convert the v2f32->i32, even if the complex pattern returns a i32 SDValue.
Something like this might work, if generalized to more types (and cleaned up a bit):
```
class fixedpoint_recip_vec<ValueType FloatVT> : ComplexPattern<FloatVT, 1, "SelectCVTFixedPosRecipOperandVec<32>", []>;
def fixedpoint_recip_v2f32_v2i32_2 : fixedpoint_recip_vec<v2f32>;
def fixedpoint_recip_vec_xform : SDNodeXForm<timm, [{
return V;
}]>;
def : Pat<(v2f32 (fmul (sint_to_fp(v2i32 V64:$Rn)), fixedpoint_recip_v2f32_v2i32_2:$scale)),
(v2f32 (SCVTFv2i32_shift (v2i32 V64:$Rn), (fixedpoint_recip_vec_xform fixedpoint_recip_v2f32_v2i32_2:$scale)))>;
```
The xform is a bit weird, as it just allows the type to change without actually doing anything to the value used. It could probably do with an comment explaining it.
https://github.com/llvm/llvm-project/pull/141480
More information about the llvm-commits
mailing list