[llvm] [AArch64] Keep floating-point conversion in SIMD (PR #147707)
David Sherwood via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 10 02:21:49 PDT 2025
================
@@ -6632,6 +6644,38 @@ def : Pat<(f64 (sint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
(UCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>;
+// float -> int conversion followed by a store should use the value in the first
+// lane to avoid expensive fpr -> gpr transfers.
+let AddedComplexity = 19 in {
+// f32 -> i32
+def : Pat<(store (i32 (fp_to_uint_oneuse f32:$src)), GPR64sp:$Rn),
----------------
david-arm wrote:
Is it worth doing this in a similar way to what we already do for normal stores, i.e.
```
multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
ValueType VTy, ValueType STy,
ValueType SubRegTy,
SubRegIndex SubRegIdx, Operand IndexType,
Instruction STR> {
def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))),
(UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
(STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
GPR64sp:$Rn, IndexType:$offset)>;
}
let AddedComplexity = 19 in {
defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
defm : VecStoreLane0Pat<am_indexed16, store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
defm : VecStoreLane0Pat<am_indexed32, store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
defm : VecStoreLane0Pat<am_indexed32, store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
defm : VecStoreLane0Pat<am_indexed64, store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
defm : VecStoreLane0Pat<am_indexed64, store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
}
```
? This way you might make better use of the addressing modes.
https://github.com/llvm/llvm-project/pull/147707
More information about the llvm-commits
mailing list