[llvm] [DAGCombiner] Extend fp->int->fp optimizations to include clamping (PR #164502)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sat Dec 13 02:27:54 PST 2025
================
@@ -0,0 +1,140 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64 --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefix=NO-SIGNED-ZEROS
+
+; Test folding of float->int->float roundtrips into float-only operations.
+; The optimization could converts patterns like:
+; sitofp(fptosi(x)) -> ftrunc(x)
+; sitofp(smin(fptosi(x), C)) -> fminnum(ftrunc(x), (float)C)
+; This is relevant for AArch64 as it avoids GPR bouncing and keeps computation in SIMD/FP registers.
+
+define float @test_signed_basic(float %x) {
+; CHECK-LABEL: test_signed_basic:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: scvtf s0, s0
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_signed_basic:
+; NO-SIGNED-ZEROS: // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: ret
+entry:
+ %i = fptosi float %x to i32
+ %f = sitofp i32 %i to float
+ ret float %f
+}
+
+define float @test_unsigned_basic(float %x) {
+; CHECK-LABEL: test_unsigned_basic:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ucvtf s0, s0
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic:
+; NO-SIGNED-ZEROS: // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: ret
+entry:
+ %i = fptoui float %x to i32
+ %f = uitofp i32 %i to float
+ ret float %f
+}
+
+define float @test_signed_min_max(float %x) {
+; CHECK-LABEL: test_signed_min_max:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w9, s0
+; CHECK-NEXT: mov w8, #-512 // =0xfffffe00
+; CHECK-NEXT: cmn w9, #512
+; CHECK-NEXT: csel w8, w9, w8, gt
+; CHECK-NEXT: mov w9, #1023 // =0x3ff
+; CHECK-NEXT: cmp w8, #1023
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_signed_min_max:
+; NO-SIGNED-ZEROS: // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #196, lsl #24
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
+; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
+; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT: fmov s1, w8
+; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT: ret
+entry:
+ %i = fptosi float %x to i32
+ %lower = call i32 @llvm.smax.i32(i32 %i, i32 -512)
+ %clamped = call i32 @llvm.smin.i32(i32 %lower, i32 1023)
+ %f = sitofp i32 %clamped to float
+ ret float %f
+}
+
+define float @test_unsigned_min_max(float %x) {
+; CHECK-LABEL: test_unsigned_min_max:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w9, s0
+; CHECK-NEXT: mov w8, #512 // =0x200
+; CHECK-NEXT: cmp w9, #512
+; CHECK-NEXT: csel w8, w9, w8, hi
+; CHECK-NEXT: mov w9, #1023 // =0x3ff
+; CHECK-NEXT: cmp w8, #1023
+; CHECK-NEXT: csel w8, w8, w9, lo
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_unsigned_min_max:
+; NO-SIGNED-ZEROS: // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #68, lsl #24
+; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
+; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
+; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
+; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT: fmov s1, w8
+; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1
+; NO-SIGNED-ZEROS-NEXT: ret
+entry:
+ %i = fptoui float %x to i32
+ %lower = call i32 @llvm.umax.i32(i32 %i, i32 512)
+ %clamped = call i32 @llvm.umin.i32(i32 %lower, i32 1023)
+ %f = uitofp i32 %clamped to float
+ ret float %f
+}
+
+; 16777217 is NOT exactly representable in f32.
+define float @test_inexact_16777217(float %x) {
+; CHECK-LABEL: test_inexact_16777217:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w8, s0
+; CHECK-NEXT: mov w9, #16777216 // =0x1000000
+; CHECK-NEXT: cmp w8, w9
+; CHECK-NEXT: mov w9, #1 // =0x1
+; CHECK-NEXT: movk w9, #256, lsl #16
+; CHECK-NEXT: csel w8, w8, w9, le
+; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ret
+;
+; NO-SIGNED-ZEROS-LABEL: test_inexact_16777217:
+; NO-SIGNED-ZEROS: // %bb.0: // %entry
+; NO-SIGNED-ZEROS-NEXT: fcvtzs w8, s0
+; NO-SIGNED-ZEROS-NEXT: mov w9, #16777216 // =0x1000000
+; NO-SIGNED-ZEROS-NEXT: cmp w8, w9
+; NO-SIGNED-ZEROS-NEXT: mov w9, #1 // =0x1
+; NO-SIGNED-ZEROS-NEXT: movk w9, #256, lsl #16
+; NO-SIGNED-ZEROS-NEXT: csel w8, w8, w9, le
+; NO-SIGNED-ZEROS-NEXT: scvtf s0, w8
+; NO-SIGNED-ZEROS-NEXT: ret
+entry:
+ %i = fptosi float %x to i32
+ %clamped = call i32 @llvm.smin.i32(i32 %i, i32 16777217)
+ %f = sitofp i32 %clamped to float
+ ret float %f
+}
+
----------------
arsenm wrote:
Test vector cases?
https://github.com/llvm/llvm-project/pull/164502
More information about the llvm-commits
mailing list