[llvm] fba17cd - [AArch64] Guard fptosi+sitofp patterns with one use checks. (#156407)

Wed Sep 3 00:21:31 PDT 2025

Author: David Green
Date: 2025-09-03T08:21:27+01:00
New Revision: fba17cdee1830951867ecfc7d40f7b6caa78310a

URL: https://github.com/llvm/llvm-project/commit/fba17cdee1830951867ecfc7d40f7b6caa78310a
DIFF: https://github.com/llvm/llvm-project/commit/fba17cdee1830951867ecfc7d40f7b6caa78310a.diff

LOG: [AArch64] Guard fptosi+sitofp patterns with one use checks. (#156407)

Otherwise we can end up with more instructions, needing to emit both
`fcvtzu w0, s0` and `fcvtzu s0, s0`.

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64InstrInfo.td
    llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index ce40e202f30f5..62b26b5239365 100644

--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -6706,20 +6706,24 @@ def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
 // Some float -> int -> float conversion patterns for which we want to keep the
 // int values in FP registers using the corresponding NEON instructions to
 // avoid more costly int <-> fp register transfers.
+let HasOneUse = 1 in {
+def any_fp_to_sint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_sint $src0)>;
+def any_fp_to_uint_oneuse: PatFrag<(ops node:$src0), (any_fp_to_uint $src0)>;
+}
 let Predicates = [HasNEONandIsSME2p2StreamingSafe] in {
-def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
+def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint_oneuse f64:$Rn)))),
           (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
-def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
+def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f32:$Rn)))),
           (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
-def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
+def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint_oneuse f64:$Rn)))),
           (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
-def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
+def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f32:$Rn)))),
           (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
 
 let Predicates = [HasNEONandIsSME2p2StreamingSafe, HasFullFP16] in {
-def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
+def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint_oneuse f16:$Rn)))),
           (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
-def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
+def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint_oneuse f16:$Rn)))),
           (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
 }
 

diff  --git a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
index d4caf64294f45..1207de746894b 100644
--- a/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
+++ b/llvm/test/CodeGen/AArch64/cvt-fp-int-fp.ll
@@ -122,9 +122,8 @@ entry:
 define i64 @testu_f64_multiuse(double %x) {
 ; CHECK-LABEL: testu_f64_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu d1, d0
 ; CHECK-NEXT:    fcvtzu x8, d0
-; CHECK-NEXT:    ucvtf d1, d1
+; CHECK-NEXT:    ucvtf d1, x8
 ; CHECK-NEXT:    fcmp d0, d1
 ; CHECK-NEXT:    csel x0, x8, xzr, eq
 ; CHECK-NEXT:    ret
@@ -139,9 +138,8 @@ entry:
 define i32 @testu_f32_multiuse(float %x) {
 ; CHECK-LABEL: testu_f32_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu s1, s0
 ; CHECK-NEXT:    fcvtzu w8, s0
-; CHECK-NEXT:    ucvtf s1, s1
+; CHECK-NEXT:    ucvtf s1, w8
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret
@@ -156,9 +154,8 @@ entry:
 define i32 @testu_f16_multiuse(half %x) {
 ; CHECK-LABEL: testu_f16_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzu h1, h0
 ; CHECK-NEXT:    fcvtzu w8, h0
-; CHECK-NEXT:    ucvtf h1, h1
+; CHECK-NEXT:    ucvtf h1, w8
 ; CHECK-NEXT:    fcmp h0, h1
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret
@@ -173,9 +170,8 @@ entry:
 define i64 @tests_f64_multiuse(double %x) {
 ; CHECK-LABEL: tests_f64_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs d1, d0
 ; CHECK-NEXT:    fcvtzs x8, d0
-; CHECK-NEXT:    scvtf d1, d1
+; CHECK-NEXT:    scvtf d1, x8
 ; CHECK-NEXT:    fcmp d0, d1
 ; CHECK-NEXT:    csel x0, x8, xzr, eq
 ; CHECK-NEXT:    ret
@@ -190,9 +186,8 @@ entry:
 define i32 @tests_f32_multiuse(float %x) {
 ; CHECK-LABEL: tests_f32_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs s1, s0
 ; CHECK-NEXT:    fcvtzs w8, s0
-; CHECK-NEXT:    scvtf s1, s1
+; CHECK-NEXT:    scvtf s1, w8
 ; CHECK-NEXT:    fcmp s0, s1
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret
@@ -207,9 +202,8 @@ entry:
 define i32 @tests_f16_multiuse(half %x) {
 ; CHECK-LABEL: tests_f16_multiuse:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    fcvtzs h1, h0
 ; CHECK-NEXT:    fcvtzs w8, h0
-; CHECK-NEXT:    scvtf h1, h1
+; CHECK-NEXT:    scvtf h1, w8
 ; CHECK-NEXT:    fcmp h0, h1
 ; CHECK-NEXT:    csel w0, w8, wzr, eq
 ; CHECK-NEXT:    ret