[llvm] [DAGCombiner] Query nsz from function attribute and use `cannotBeOrderedNegativeFP` in `foldFPToIntToFP` (PR #186779)

via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 16 06:24:56 PDT 2026


https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/186779

>From 6ed375b301b6dff0e95e1de3e29a94433024f79f Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 16 Mar 2026 19:35:07 +0800
Subject: [PATCH] [SelectionDAG] Query nsz from function attribute

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |   9 +-
 llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll  | 362 +++++++++++-------
 llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll   |  63 +--
 3 files changed, 261 insertions(+), 173 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 781001d050f3d..ca168e57df4c0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19460,8 +19460,13 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
   bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
   assert(IsSigned || IsUnsigned);
 
-  bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
-                          DAG.canIgnoreSignBitOfZero(SDValue(N, 0));
+  bool IsSignedZeroSafe =
+      DAG.getMachineFunction()
+          .getFunction()
+          .getFnAttribute("no-signed-zeros-fp-math")
+          .getValueAsBool() ||
+      DAG.cannotBeOrderedNegativeFP(N->getOperand(0)->getOperand(0)) ||
+      DAG.canIgnoreSignBitOfZero(SDValue(N, 0));
   // For signed conversions: The optimization changes signed zero behavior.
   if (IsSigned && !IsSignedZeroSafe)
     return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
index a50716e4ab183..e4237006c9678 100644
--- a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
@@ -1,6 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,SIGNED-ZEROS
-; RUN: llc -mtriple=aarch64 --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NO-SIGNED-ZEROS
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
 
 ; Test folding of float->int->float roundtrips into float-only operations.
 ; The optimization could converts patterns like:
@@ -9,16 +8,22 @@
 ; This is relevant for AArch64 as it avoids GPR bouncing and keeps computation in SIMD/FP registers.
 
 define float @test_signed_basic(float %x) {
-; SIGNED-ZEROS-LABEL: test_signed_basic:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzs s0, s0
-; SIGNED-ZEROS-NEXT:    scvtf s0, s0
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_basic:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_signed_basic:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs s0, s0
+; CHECK-NEXT:    scvtf s0, s0
+; CHECK-NEXT:    ret
+entry:
+  %i = fptosi float %x to i32
+  %f = sitofp i32 %i to float
+  ret float %f
+}
+
+define float @test_signed_basic_nsz(float %x) #0 {
+; CHECK-LABEL: test_signed_basic_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    ret
 entry:
   %i = fptosi float %x to i32
   %f = sitofp i32 %i to float
@@ -26,16 +31,11 @@ entry:
 }
 
 define float @test_unsigned_basic(float %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_basic:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzu s0, s0
-; SIGNED-ZEROS-NEXT:    ucvtf s0, s0
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_unsigned_basic:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu s0, s0
+; CHECK-NEXT:    ucvtf s0, s0
+; CHECK-NEXT:    ret
 entry:
   %i = fptoui float %x to i32
   %f = uitofp i32 %i to float
@@ -43,28 +43,36 @@ entry:
 }
 
 define float @test_signed_min_max(float %x) {
-; SIGNED-ZEROS-LABEL: test_signed_min_max:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzs w9, s0
-; SIGNED-ZEROS-NEXT:    mov w8, #-512 // =0xfffffe00
-; SIGNED-ZEROS-NEXT:    cmn w9, #512
-; SIGNED-ZEROS-NEXT:    csel w8, w9, w8, gt
-; SIGNED-ZEROS-NEXT:    mov w9, #1023 // =0x3ff
-; SIGNED-ZEROS-NEXT:    cmp w8, #1023
-; SIGNED-ZEROS-NEXT:    csel w8, w8, w9, lt
-; SIGNED-ZEROS-NEXT:    scvtf s0, w8
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_min_max:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    movi v1.2s, #196, lsl #24
-; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT:    mov w8, #49152 // =0xc000
-; NO-SIGNED-ZEROS-NEXT:    movk w8, #17535, lsl #16
-; NO-SIGNED-ZEROS-NEXT:    fmaxnm s0, s0, s1
-; NO-SIGNED-ZEROS-NEXT:    fmov s1, w8
-; NO-SIGNED-ZEROS-NEXT:    fminnm s0, s0, s1
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_signed_min_max:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs w9, s0
+; CHECK-NEXT:    mov w8, #-512 // =0xfffffe00
+; CHECK-NEXT:    cmn w9, #512
+; CHECK-NEXT:    csel w8, w9, w8, gt
+; CHECK-NEXT:    mov w9, #1023 // =0x3ff
+; CHECK-NEXT:    cmp w8, #1023
+; CHECK-NEXT:    csel w8, w8, w9, lt
+; CHECK-NEXT:    scvtf s0, w8
+; CHECK-NEXT:    ret
+entry:
+  %i = fptosi float %x to i32
+  %lower = call i32 @llvm.smax.i32(i32 %i, i32 -512)
+  %clamped = call i32 @llvm.smin.i32(i32 %lower, i32 1023)
+  %f = sitofp i32 %clamped to float
+  ret float %f
+}
+
+define float @test_signed_min_max_nsz(float %x) #0 {
+; CHECK-LABEL: test_signed_min_max_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2s, #196, lsl #24
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    mov w8, #49152 // =0xc000
+; CHECK-NEXT:    movk w8, #17535, lsl #16
+; CHECK-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NEXT:    fmov s1, w8
+; CHECK-NEXT:    fminnm s0, s0, s1
+; CHECK-NEXT:    ret
 entry:
   %i = fptosi float %x to i32
   %lower = call i32 @llvm.smax.i32(i32 %i, i32 -512)
@@ -74,28 +82,36 @@ entry:
 }
 
 define float @test_unsigned_min_max(float %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_min_max:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzu w9, s0
-; SIGNED-ZEROS-NEXT:    mov w8, #512 // =0x200
-; SIGNED-ZEROS-NEXT:    cmp w9, #512
-; SIGNED-ZEROS-NEXT:    csel w8, w9, w8, hi
-; SIGNED-ZEROS-NEXT:    mov w9, #1023 // =0x3ff
-; SIGNED-ZEROS-NEXT:    cmp w8, #1023
-; SIGNED-ZEROS-NEXT:    csel w8, w8, w9, lo
-; SIGNED-ZEROS-NEXT:    ucvtf s0, w8
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_min_max:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    movi v1.2s, #68, lsl #24
-; NO-SIGNED-ZEROS-NEXT:    frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT:    mov w8, #49152 // =0xc000
-; NO-SIGNED-ZEROS-NEXT:    movk w8, #17535, lsl #16
-; NO-SIGNED-ZEROS-NEXT:    fmaxnm s0, s0, s1
-; NO-SIGNED-ZEROS-NEXT:    fmov s1, w8
-; NO-SIGNED-ZEROS-NEXT:    fminnm s0, s0, s1
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_unsigned_min_max:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu w9, s0
+; CHECK-NEXT:    mov w8, #512 // =0x200
+; CHECK-NEXT:    cmp w9, #512
+; CHECK-NEXT:    csel w8, w9, w8, hi
+; CHECK-NEXT:    mov w9, #1023 // =0x3ff
+; CHECK-NEXT:    cmp w8, #1023
+; CHECK-NEXT:    csel w8, w8, w9, lo
+; CHECK-NEXT:    ucvtf s0, w8
+; CHECK-NEXT:    ret
+entry:
+  %i = fptoui float %x to i32
+  %lower = call i32 @llvm.umax.i32(i32 %i, i32 512)
+  %clamped = call i32 @llvm.umin.i32(i32 %lower, i32 1023)
+  %f = uitofp i32 %clamped to float
+  ret float %f
+}
+
+define float @test_unsigned_min_max_nsz(float %x) #0 {
+; CHECK-LABEL: test_unsigned_min_max_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.2s, #68, lsl #24
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    mov w8, #49152 // =0xc000
+; CHECK-NEXT:    movk w8, #17535, lsl #16
+; CHECK-NEXT:    fmaxnm s0, s0, s1
+; CHECK-NEXT:    fmov s1, w8
+; CHECK-NEXT:    fminnm s0, s0, s1
+; CHECK-NEXT:    ret
 entry:
   %i = fptoui float %x to i32
   %lower = call i32 @llvm.umax.i32(i32 %i, i32 512)
@@ -124,16 +140,22 @@ entry:
 }
 
 define <4 x float> @test_signed_v4f32(<4 x float> %x) {
-; SIGNED-ZEROS-LABEL: test_signed_v4f32:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzs v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT:    scvtf v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_v4f32:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    frintz v0.4s, v0.4s
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_signed_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    scvtf v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %i = fptosi <4 x float> %x to <4 x i32>
+  %f = sitofp <4 x i32> %i to <4 x float>
+  ret <4 x float> %f
+}
+
+define <4 x float> @test_signed_v4f32_nsz(<4 x float> %x) #0 {
+; CHECK-LABEL: test_signed_v4f32_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %i = fptosi <4 x float> %x to <4 x i32>
   %f = sitofp <4 x i32> %i to <4 x float>
@@ -141,16 +163,22 @@ entry:
 }
 
 define <4 x float> @test_unsigned_v4f32(<4 x float> %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_v4f32:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzu v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT:    ucvtf v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_v4f32:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    frintz v0.4s, v0.4s
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_unsigned_v4f32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %i = fptoui <4 x float> %x to <4 x i32>
+  %f = uitofp <4 x i32> %i to <4 x float>
+  ret <4 x float> %f
+}
+
+define <4 x float> @test_unsigned_v4f32_nsz(<4 x float> %x) #0 {
+; CHECK-LABEL: test_unsigned_v4f32_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    ret
 entry:
   %i = fptoui <4 x float> %x to <4 x i32>
   %f = uitofp <4 x i32> %i to <4 x float>
@@ -158,16 +186,22 @@ entry:
 }
 
 define <2 x double> @test_signed_v2f64(<2 x double> %x) {
-; SIGNED-ZEROS-LABEL: test_signed_v2f64:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzs v0.2d, v0.2d
-; SIGNED-ZEROS-NEXT:    scvtf v0.2d, v0.2d
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_v2f64:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    frintz v0.2d, v0.2d
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_signed_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs v0.2d, v0.2d
+; CHECK-NEXT:    scvtf v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %i = fptosi <2 x double> %x to <2 x i64>
+  %f = sitofp <2 x i64> %i to <2 x double>
+  ret <2 x double> %f
+}
+
+define <2 x double> @test_signed_v2f64_nsz(<2 x double> %x) #0 {
+; CHECK-LABEL: test_signed_v2f64_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.2d, v0.2d
+; CHECK-NEXT:    ret
 entry:
   %i = fptosi <2 x double> %x to <2 x i64>
   %f = sitofp <2 x i64> %i to <2 x double>
@@ -175,16 +209,22 @@ entry:
 }
 
 define <2 x double> @test_unsigned_v2f64(<2 x double> %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_v2f64:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzu v0.2d, v0.2d
-; SIGNED-ZEROS-NEXT:    ucvtf v0.2d, v0.2d
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_v2f64:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    frintz v0.2d, v0.2d
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_unsigned_v2f64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzu v0.2d, v0.2d
+; CHECK-NEXT:    ucvtf v0.2d, v0.2d
+; CHECK-NEXT:    ret
+entry:
+  %i = fptoui <2 x double> %x to <2 x i64>
+  %f = uitofp <2 x i64> %i to <2 x double>
+  ret <2 x double> %f
+}
+
+define <2 x double> @test_unsigned_v2f64_nsz(<2 x double> %x) #0 {
+; CHECK-LABEL: test_unsigned_v2f64_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    frintz v0.2d, v0.2d
+; CHECK-NEXT:    ret
 entry:
   %i = fptoui <2 x double> %x to <2 x i64>
   %f = uitofp <2 x i64> %i to <2 x double>
@@ -192,26 +232,34 @@ entry:
 }
 
 define <4 x float> @test_signed_v4f32_min_max(<4 x float> %x) {
-; SIGNED-ZEROS-LABEL: test_signed_v4f32_min_max:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    fcvtzs v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT:    mvni v1.4s, #1, msl #8
-; SIGNED-ZEROS-NEXT:    movi v2.4s, #3, msl #8
-; SIGNED-ZEROS-NEXT:    smax v0.4s, v0.4s, v1.4s
-; SIGNED-ZEROS-NEXT:    smin v0.4s, v0.4s, v2.4s
-; SIGNED-ZEROS-NEXT:    scvtf v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_v4f32_min_max:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    movi v1.4s, #196, lsl #24
-; NO-SIGNED-ZEROS-NEXT:    frintz v0.4s, v0.4s
-; NO-SIGNED-ZEROS-NEXT:    mov w8, #49152 // =0xc000
-; NO-SIGNED-ZEROS-NEXT:    movk w8, #17535, lsl #16
-; NO-SIGNED-ZEROS-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
-; NO-SIGNED-ZEROS-NEXT:    dup v1.4s, w8
-; NO-SIGNED-ZEROS-NEXT:    fminnm v0.4s, v0.4s, v1.4s
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_signed_v4f32_min_max:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    fcvtzs v0.4s, v0.4s
+; CHECK-NEXT:    mvni v1.4s, #1, msl #8
+; CHECK-NEXT:    movi v2.4s, #3, msl #8
+; CHECK-NEXT:    smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    smin v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    scvtf v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %i = fptosi <4 x float> %x to <4 x i32>
+  %lower = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 -512))
+  %clamped = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %lower, <4 x i32> splat (i32 1023))
+  %f = sitofp <4 x i32> %clamped to <4 x float>
+  ret <4 x float> %f
+}
+
+define <4 x float> @test_signed_v4f32_min_max_nsz(<4 x float> %x) #0 {
+; CHECK-LABEL: test_signed_v4f32_min_max_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.4s, #196, lsl #24
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    mov w8, #49152 // =0xc000
+; CHECK-NEXT:    movk w8, #17535, lsl #16
+; CHECK-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    dup v1.4s, w8
+; CHECK-NEXT:    fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
 entry:
   %i = fptosi <4 x float> %x to <4 x i32>
   %lower = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 -512))
@@ -221,26 +269,34 @@ entry:
 }
 
 define <4 x float> @test_unsigned_v4f32_min_max(<4 x float> %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_v4f32_min_max:
-; SIGNED-ZEROS:       // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT:    movi v1.4s, #2, lsl #8
-; SIGNED-ZEROS-NEXT:    fcvtzu v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT:    movi v2.4s, #3, msl #8
-; SIGNED-ZEROS-NEXT:    umax v0.4s, v0.4s, v1.4s
-; SIGNED-ZEROS-NEXT:    umin v0.4s, v0.4s, v2.4s
-; SIGNED-ZEROS-NEXT:    ucvtf v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT:    ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_v4f32_min_max:
-; NO-SIGNED-ZEROS:       // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT:    movi v1.4s, #68, lsl #24
-; NO-SIGNED-ZEROS-NEXT:    frintz v0.4s, v0.4s
-; NO-SIGNED-ZEROS-NEXT:    mov w8, #49152 // =0xc000
-; NO-SIGNED-ZEROS-NEXT:    movk w8, #17535, lsl #16
-; NO-SIGNED-ZEROS-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
-; NO-SIGNED-ZEROS-NEXT:    dup v1.4s, w8
-; NO-SIGNED-ZEROS-NEXT:    fminnm v0.4s, v0.4s, v1.4s
-; NO-SIGNED-ZEROS-NEXT:    ret
+; CHECK-LABEL: test_unsigned_v4f32_min_max:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.4s, #2, lsl #8
+; CHECK-NEXT:    fcvtzu v0.4s, v0.4s
+; CHECK-NEXT:    movi v2.4s, #3, msl #8
+; CHECK-NEXT:    umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    umin v0.4s, v0.4s, v2.4s
+; CHECK-NEXT:    ucvtf v0.4s, v0.4s
+; CHECK-NEXT:    ret
+entry:
+  %i = fptoui <4 x float> %x to <4 x i32>
+  %lower = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 512))
+  %clamped = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %lower, <4 x i32> splat (i32 1023))
+  %f = uitofp <4 x i32> %clamped to <4 x float>
+  ret <4 x float> %f
+}
+
+define <4 x float> @test_unsigned_v4f32_min_max_nsz(<4 x float> %x) #0 {
+; CHECK-LABEL: test_unsigned_v4f32_min_max_nsz:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    movi v1.4s, #68, lsl #24
+; CHECK-NEXT:    frintz v0.4s, v0.4s
+; CHECK-NEXT:    mov w8, #49152 // =0xc000
+; CHECK-NEXT:    movk w8, #17535, lsl #16
+; CHECK-NEXT:    fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    dup v1.4s, w8
+; CHECK-NEXT:    fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    ret
 entry:
   %i = fptoui <4 x float> %x to <4 x i32>
   %lower = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 512))
@@ -275,6 +331,18 @@ define float @test_fabs(float %x) {
   ret float %abs
 }
 
+define float @test_fabs_positive(float %x) {
+; CHECK-LABEL: test_fabs_positive:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    fabs s0, s0
+; CHECK-NEXT:    frintz s0, s0
+; CHECK-NEXT:    ret
+  %abs = call float @llvm.fabs(float %x)
+  %conv1 = fptosi float %abs to i32
+  %conv2 = sitofp i32 %conv1 to float
+  ret float %conv2
+}
+
 define float @test_copysign(float %x, float %y) {
 ; CHECK-LABEL: test_copysign:
 ; CHECK:       // %bb.0:
@@ -326,3 +394,5 @@ declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
 declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
 declare float @llvm.fabs.f32(float)
 declare float @llvm.copysign.f32(float, float)
+
+attributes #0 = { "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll b/llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll
index 2416d6a852eb9..0a6a25384c136 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll
@@ -1,22 +1,27 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn < %s | FileCheck %s --check-prefixes=CHECK,SIGNED-ZEROS
-; RUN: llc -mtriple=amdgcn --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NO-SIGNED-ZEROS
+; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
 
 ; Test folding of float->int->float roundtrips into float-only operations.
 
 define float @test_signed_basic(float %x) {
-; SIGNED-ZEROS-LABEL: test_signed_basic:
-; SIGNED-ZEROS:       ; %bb.0: ; %entry
-; SIGNED-ZEROS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SIGNED-ZEROS-NEXT:    v_cvt_i32_f32_e32 v0, v0
-; SIGNED-ZEROS-NEXT:    v_cvt_f32_i32_e32 v0, v0
-; SIGNED-ZEROS-NEXT:    s_setpc_b64 s[30:31]
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_basic:
-; NO-SIGNED-ZEROS:       ; %bb.0: ; %entry
-; NO-SIGNED-ZEROS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; NO-SIGNED-ZEROS-NEXT:    v_trunc_f32_e32 v0, v0
-; NO-SIGNED-ZEROS-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: test_signed_basic:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_cvt_i32_f32_e32 v0, v0
+; CHECK-NEXT:    v_cvt_f32_i32_e32 v0, v0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %i = fptosi float %x to i32
+  %f = sitofp i32 %i to float
+  ret float %f
+}
+
+define float @test_signed_basic_nsz(float %x) #0 {
+; CHECK-LABEL: test_signed_basic_nsz:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_trunc_f32_e32 v0, v0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %i = fptosi float %x to i32
   %f = sitofp i32 %i to float
@@ -26,17 +31,23 @@ entry:
 ; For unsigned conversions, even when signed zeros are possible, we can still
 ; use truncate because fabs is free.
 define float @test_unsigned_basic(float %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_basic:
-; SIGNED-ZEROS:       ; %bb.0: ; %entry
-; SIGNED-ZEROS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SIGNED-ZEROS-NEXT:    v_trunc_f32_e64 v0, |v0|
-; SIGNED-ZEROS-NEXT:    s_setpc_b64 s[30:31]
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic:
-; NO-SIGNED-ZEROS:       ; %bb.0: ; %entry
-; NO-SIGNED-ZEROS-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; NO-SIGNED-ZEROS-NEXT:    v_trunc_f32_e32 v0, v0
-; NO-SIGNED-ZEROS-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-LABEL: test_unsigned_basic:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_trunc_f32_e64 v0, |v0|
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %i = fptoui float %x to i32
+  %f = uitofp i32 %i to float
+  ret float %f
+}
+
+define float @test_unsigned_basic_nsz(float %x) #0 {
+; CHECK-LABEL: test_unsigned_basic_nsz:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_trunc_f32_e32 v0, v0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
 entry:
   %i = fptoui float %x to i32
   %f = uitofp i32 %i to float
@@ -60,3 +71,5 @@ entry:
 }
 
 declare i32 @llvm.smin.i32(i32, i32)
+
+attributes #0 = { "no-signed-zeros-fp-math"="true" }



More information about the llvm-commits mailing list