[llvm] [DAGCombiner] Extend fp->int->fp optimizations to include clamping (PR #164502)

Sat Dec 13 02:27:54 PST 2025

================
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefix=NO-SIGNED-ZEROS
+
+; Test folding of float->int->float roundtrips into float-only operations.
+; The optimization could converts patterns like:
+;   sitofp(fptosi(x)) -> ftrunc(x)
+;   sitofp(smin(fptosi(x), C)) -> fminnum(ftrunc(x), (float)C)
+; This is relevant for AArch64 as it avoids GPR bouncing and keeps computation in SIMD/FP registers.
+
+define float @test_signed_basic(float %x) {
+; CHECK-LABEL: test_signed_basic:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    scvtf s0, s0
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_signed_basic:
+; NO-SIGNED-ZEROS:       // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    ret
+entry:
+  %i = fptosi float %x to i32
+  %f = sitofp i32 %i to float
+  ret float %f
+}
+
+define float @test_unsigned_basic(float %x) {
+; CHECK-LABEL: test_unsigned_basic:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ucvtf s0, s0
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic:
+; NO-SIGNED-ZEROS:       // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    ret
+entry:
+  %i = fptoui float %x to i32
+  %f = uitofp i32 %i to float
+  ret float %f
+}
+
+define float @test_signed_min_max(float %x) {
+; CHECK-LABEL: test_signed_min_max:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs w9, s0
+; CHECK-NEXT:    mov w8, #-512 // =0xfffffe00
+; CHECK-NEXT:    cmn w9, #512
+; CHECK-NEXT:    csel w8, w9, w8, gt
+; CHECK-NEXT:    mov w9, #1023 // =0x3ff
+; CHECK-NEXT:    cmp w8, #1023
+; CHECK-NEXT:    csel w8, w8, w9, lt
+; CHECK-NEXT:    scvtf s0, w8
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_signed_min_max:
+; NO-SIGNED-ZEROS:       // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT:    movi v1.2s, #196, lsl #24
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    mov w8, #49152 // =0xc000
+; NO-SIGNED-ZEROS-NEXT:    movk w8, #17535, lsl #16
+; NO-SIGNED-ZEROS-NEXT:    fmaxnm s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT:    fmov s1, w8
+; NO-SIGNED-ZEROS-NEXT:    fminnm s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT:    ret
+entry:
+  %i = fptosi float %x to i32
+  %lower = call i32 @llvm.smax.i32(i32 %i, i32 -512)
+  %clamped = call i32 @llvm.smin.i32(i32 %lower, i32 1023)
+  %f = sitofp i32 %clamped to float
+  ret float %f
+}
+
+define float @test_unsigned_min_max(float %x) {
+; CHECK-LABEL: test_unsigned_min_max:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu w9, s0
+; CHECK-NEXT:    mov w8, #512 // =0x200
+; CHECK-NEXT:    cmp w9, #512
+; CHECK-NEXT:    csel w8, w9, w8, hi
+; CHECK-NEXT:    mov w9, #1023 // =0x3ff
+; CHECK-NEXT:    cmp w8, #1023
+; CHECK-NEXT:    csel w8, w8, w9, lo
+; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_unsigned_min_max:
+; NO-SIGNED-ZEROS:       // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT:    movi v1.2s, #68, lsl #24
+; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT:    mov w8, #49152 // =0xc000
+; NO-SIGNED-ZEROS-NEXT:    movk w8, #17535, lsl #16
+; NO-SIGNED-ZEROS-NEXT:    fmaxnm s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT:    fmov s1, w8
+; NO-SIGNED-ZEROS-NEXT:    fminnm s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT:    ret
+entry:
+  %i = fptoui float %x to i32
+  %lower = call i32 @llvm.umax.i32(i32 %i, i32 512)
+  %clamped = call i32 @llvm.umin.i32(i32 %lower, i32 1023)
+  %f = uitofp i32 %clamped to float
+  ret float %f
+}
+
+; 16777217 is NOT exactly representable in f32.
+define float @test_inexact_16777217(float %x) {
+; CHECK-LABEL: test_inexact_16777217:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs w8, s0
+; CHECK-NEXT:    mov w9, #16777216 // =0x1000000
+; CHECK-NEXT:    cmp w8, w9
+; CHECK-NEXT:    mov w9, #1 // =0x1
+; CHECK-NEXT:    movk w9, #256, lsl #16
+; CHECK-NEXT:    csel w8, w8, w9, le
+; CHECK-NEXT:    scvtf s0, w8
+; CHECK-NEXT:    ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_inexact_16777217:
+; NO-SIGNED-ZEROS:       // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT:    fcvtzs w8, s0
+; NO-SIGNED-ZEROS-NEXT:    mov w9, #16777216 // =0x1000000
+; NO-SIGNED-ZEROS-NEXT:    cmp w8, w9
+; NO-SIGNED-ZEROS-NEXT:    mov w9, #1 // =0x1
+; NO-SIGNED-ZEROS-NEXT:    movk w9, #256, lsl #16
+; NO-SIGNED-ZEROS-NEXT:    csel w8, w8, w9, le
+; NO-SIGNED-ZEROS-NEXT:    scvtf s0, w8
+; NO-SIGNED-ZEROS-NEXT:    ret
+entry:
+  %i = fptosi float %x to i32
+  %clamped = call i32 @llvm.smin.i32(i32 %i, i32 16777217)
+  %f = sitofp i32 %clamped to float
+  ret float %f
+}
+
----------------
arsenm wrote:

Test vector cases? 

https://github.com/llvm/llvm-project/pull/164502