[llvm] [DAGCombiner] Query nsz from function attribute and use `cannotBeOrderedNegativeFP` in `foldFPToIntToFP` (PR #186779)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 16 06:24:56 PDT 2026
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/186779
>From 6ed375b301b6dff0e95e1de3e29a94433024f79f Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Mon, 16 Mar 2026 19:35:07 +0800
Subject: [PATCH] [SelectionDAG] Query nsz from function attribute
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 9 +-
llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll | 362 +++++++++++-------
llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll | 63 +--
3 files changed, 261 insertions(+), 173 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 781001d050f3d..ca168e57df4c0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -19460,8 +19460,13 @@ static SDValue foldFPToIntToFP(SDNode *N, const SDLoc &DL, SelectionDAG &DAG,
bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP;
assert(IsSigned || IsUnsigned);
- bool IsSignedZeroSafe = DAG.getTarget().Options.NoSignedZerosFPMath ||
- DAG.canIgnoreSignBitOfZero(SDValue(N, 0));
+ bool IsSignedZeroSafe =
+ DAG.getMachineFunction()
+ .getFunction()
+ .getFnAttribute("no-signed-zeros-fp-math")
+ .getValueAsBool() ||
+ DAG.cannotBeOrderedNegativeFP(N->getOperand(0)->getOperand(0)) ||
+ DAG.canIgnoreSignBitOfZero(SDValue(N, 0));
// For signed conversions: The optimization changes signed zero behavior.
if (IsSigned && !IsSignedZeroSafe)
return SDValue();
diff --git a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
index a50716e4ab183..e4237006c9678 100644
--- a/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/AArch64/fp-to-int-to-fp.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64 < %s | FileCheck %s --check-prefixes=CHECK,SIGNED-ZEROS
-; RUN: llc -mtriple=aarch64 --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NO-SIGNED-ZEROS
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
; Test folding of float->int->float roundtrips into float-only operations.
; The optimization could converts patterns like:
@@ -9,16 +8,22 @@
; This is relevant for AArch64 as it avoids GPR bouncing and keeps computation in SIMD/FP registers.
define float @test_signed_basic(float %x) {
-; SIGNED-ZEROS-LABEL: test_signed_basic:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzs s0, s0
-; SIGNED-ZEROS-NEXT: scvtf s0, s0
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_basic:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_signed_basic:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs s0, s0
+; CHECK-NEXT: scvtf s0, s0
+; CHECK-NEXT: ret
+entry:
+ %i = fptosi float %x to i32
+ %f = sitofp i32 %i to float
+ ret float %f
+}
+
+define float @test_signed_basic_nsz(float %x) #0 {
+; CHECK-LABEL: test_signed_basic_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: ret
entry:
%i = fptosi float %x to i32
%f = sitofp i32 %i to float
@@ -26,16 +31,11 @@ entry:
}
define float @test_unsigned_basic(float %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_basic:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzu s0, s0
-; SIGNED-ZEROS-NEXT: ucvtf s0, s0
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_unsigned_basic:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu s0, s0
+; CHECK-NEXT: ucvtf s0, s0
+; CHECK-NEXT: ret
entry:
%i = fptoui float %x to i32
%f = uitofp i32 %i to float
@@ -43,28 +43,36 @@ entry:
}
define float @test_signed_min_max(float %x) {
-; SIGNED-ZEROS-LABEL: test_signed_min_max:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzs w9, s0
-; SIGNED-ZEROS-NEXT: mov w8, #-512 // =0xfffffe00
-; SIGNED-ZEROS-NEXT: cmn w9, #512
-; SIGNED-ZEROS-NEXT: csel w8, w9, w8, gt
-; SIGNED-ZEROS-NEXT: mov w9, #1023 // =0x3ff
-; SIGNED-ZEROS-NEXT: cmp w8, #1023
-; SIGNED-ZEROS-NEXT: csel w8, w8, w9, lt
-; SIGNED-ZEROS-NEXT: scvtf s0, w8
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_min_max:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #196, lsl #24
-; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
-; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
-; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1
-; NO-SIGNED-ZEROS-NEXT: fmov s1, w8
-; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_signed_min_max:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs w9, s0
+; CHECK-NEXT: mov w8, #-512 // =0xfffffe00
+; CHECK-NEXT: cmn w9, #512
+; CHECK-NEXT: csel w8, w9, w8, gt
+; CHECK-NEXT: mov w9, #1023 // =0x3ff
+; CHECK-NEXT: cmp w8, #1023
+; CHECK-NEXT: csel w8, w8, w9, lt
+; CHECK-NEXT: scvtf s0, w8
+; CHECK-NEXT: ret
+entry:
+ %i = fptosi float %x to i32
+ %lower = call i32 @llvm.smax.i32(i32 %i, i32 -512)
+ %clamped = call i32 @llvm.smin.i32(i32 %lower, i32 1023)
+ %f = sitofp i32 %clamped to float
+ ret float %f
+}
+
+define float @test_signed_min_max_nsz(float %x) #0 {
+; CHECK-LABEL: test_signed_min_max_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2s, #196, lsl #24
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: mov w8, #49152 // =0xc000
+; CHECK-NEXT: movk w8, #17535, lsl #16
+; CHECK-NEXT: fmaxnm s0, s0, s1
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: ret
entry:
%i = fptosi float %x to i32
%lower = call i32 @llvm.smax.i32(i32 %i, i32 -512)
@@ -74,28 +82,36 @@ entry:
}
define float @test_unsigned_min_max(float %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_min_max:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzu w9, s0
-; SIGNED-ZEROS-NEXT: mov w8, #512 // =0x200
-; SIGNED-ZEROS-NEXT: cmp w9, #512
-; SIGNED-ZEROS-NEXT: csel w8, w9, w8, hi
-; SIGNED-ZEROS-NEXT: mov w9, #1023 // =0x3ff
-; SIGNED-ZEROS-NEXT: cmp w8, #1023
-; SIGNED-ZEROS-NEXT: csel w8, w8, w9, lo
-; SIGNED-ZEROS-NEXT: ucvtf s0, w8
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_min_max:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #68, lsl #24
-; NO-SIGNED-ZEROS-NEXT: frintz s0, s0
-; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
-; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
-; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1
-; NO-SIGNED-ZEROS-NEXT: fmov s1, w8
-; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_unsigned_min_max:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu w9, s0
+; CHECK-NEXT: mov w8, #512 // =0x200
+; CHECK-NEXT: cmp w9, #512
+; CHECK-NEXT: csel w8, w9, w8, hi
+; CHECK-NEXT: mov w9, #1023 // =0x3ff
+; CHECK-NEXT: cmp w8, #1023
+; CHECK-NEXT: csel w8, w8, w9, lo
+; CHECK-NEXT: ucvtf s0, w8
+; CHECK-NEXT: ret
+entry:
+ %i = fptoui float %x to i32
+ %lower = call i32 @llvm.umax.i32(i32 %i, i32 512)
+ %clamped = call i32 @llvm.umin.i32(i32 %lower, i32 1023)
+ %f = uitofp i32 %clamped to float
+ ret float %f
+}
+
+define float @test_unsigned_min_max_nsz(float %x) #0 {
+; CHECK-LABEL: test_unsigned_min_max_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.2s, #68, lsl #24
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: mov w8, #49152 // =0xc000
+; CHECK-NEXT: movk w8, #17535, lsl #16
+; CHECK-NEXT: fmaxnm s0, s0, s1
+; CHECK-NEXT: fmov s1, w8
+; CHECK-NEXT: fminnm s0, s0, s1
+; CHECK-NEXT: ret
entry:
%i = fptoui float %x to i32
%lower = call i32 @llvm.umax.i32(i32 %i, i32 512)
@@ -124,16 +140,22 @@ entry:
}
define <4 x float> @test_signed_v4f32(<4 x float> %x) {
-; SIGNED-ZEROS-LABEL: test_signed_v4f32:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzs v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT: scvtf v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_v4f32:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_signed_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: scvtf v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %i = fptosi <4 x float> %x to <4 x i32>
+ %f = sitofp <4 x i32> %i to <4 x float>
+ ret <4 x float> %f
+}
+
+define <4 x float> @test_signed_v4f32_nsz(<4 x float> %x) #0 {
+; CHECK-LABEL: test_signed_v4f32_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%i = fptosi <4 x float> %x to <4 x i32>
%f = sitofp <4 x i32> %i to <4 x float>
@@ -141,16 +163,22 @@ entry:
}
define <4 x float> @test_unsigned_v4f32(<4 x float> %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_v4f32:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzu v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT: ucvtf v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_v4f32:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_unsigned_v4f32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %i = fptoui <4 x float> %x to <4 x i32>
+ %f = uitofp <4 x i32> %i to <4 x float>
+ ret <4 x float> %f
+}
+
+define <4 x float> @test_unsigned_v4f32_nsz(<4 x float> %x) #0 {
+; CHECK-LABEL: test_unsigned_v4f32_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: ret
entry:
%i = fptoui <4 x float> %x to <4 x i32>
%f = uitofp <4 x i32> %i to <4 x float>
@@ -158,16 +186,22 @@ entry:
}
define <2 x double> @test_signed_v2f64(<2 x double> %x) {
-; SIGNED-ZEROS-LABEL: test_signed_v2f64:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzs v0.2d, v0.2d
-; SIGNED-ZEROS-NEXT: scvtf v0.2d, v0.2d
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_v2f64:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: frintz v0.2d, v0.2d
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_signed_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.2d, v0.2d
+; CHECK-NEXT: scvtf v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %i = fptosi <2 x double> %x to <2 x i64>
+ %f = sitofp <2 x i64> %i to <2 x double>
+ ret <2 x double> %f
+}
+
+define <2 x double> @test_signed_v2f64_nsz(<2 x double> %x) #0 {
+; CHECK-LABEL: test_signed_v2f64_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.2d, v0.2d
+; CHECK-NEXT: ret
entry:
%i = fptosi <2 x double> %x to <2 x i64>
%f = sitofp <2 x i64> %i to <2 x double>
@@ -175,16 +209,22 @@ entry:
}
define <2 x double> @test_unsigned_v2f64(<2 x double> %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_v2f64:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzu v0.2d, v0.2d
-; SIGNED-ZEROS-NEXT: ucvtf v0.2d, v0.2d
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_v2f64:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: frintz v0.2d, v0.2d
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_unsigned_v2f64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzu v0.2d, v0.2d
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: ret
+entry:
+ %i = fptoui <2 x double> %x to <2 x i64>
+ %f = uitofp <2 x i64> %i to <2 x double>
+ ret <2 x double> %f
+}
+
+define <2 x double> @test_unsigned_v2f64_nsz(<2 x double> %x) #0 {
+; CHECK-LABEL: test_unsigned_v2f64_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: frintz v0.2d, v0.2d
+; CHECK-NEXT: ret
entry:
%i = fptoui <2 x double> %x to <2 x i64>
%f = uitofp <2 x i64> %i to <2 x double>
@@ -192,26 +232,34 @@ entry:
}
define <4 x float> @test_signed_v4f32_min_max(<4 x float> %x) {
-; SIGNED-ZEROS-LABEL: test_signed_v4f32_min_max:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: fcvtzs v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT: mvni v1.4s, #1, msl #8
-; SIGNED-ZEROS-NEXT: movi v2.4s, #3, msl #8
-; SIGNED-ZEROS-NEXT: smax v0.4s, v0.4s, v1.4s
-; SIGNED-ZEROS-NEXT: smin v0.4s, v0.4s, v2.4s
-; SIGNED-ZEROS-NEXT: scvtf v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_v4f32_min_max:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: movi v1.4s, #196, lsl #24
-; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s
-; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
-; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
-; NO-SIGNED-ZEROS-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
-; NO-SIGNED-ZEROS-NEXT: dup v1.4s, w8
-; NO-SIGNED-ZEROS-NEXT: fminnm v0.4s, v0.4s, v1.4s
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_signed_v4f32_min_max:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-NEXT: mvni v1.4s, #1, msl #8
+; CHECK-NEXT: movi v2.4s, #3, msl #8
+; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: scvtf v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %i = fptosi <4 x float> %x to <4 x i32>
+ %lower = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 -512))
+ %clamped = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %lower, <4 x i32> splat (i32 1023))
+ %f = sitofp <4 x i32> %clamped to <4 x float>
+ ret <4 x float> %f
+}
+
+define <4 x float> @test_signed_v4f32_min_max_nsz(<4 x float> %x) #0 {
+; CHECK-LABEL: test_signed_v4f32_min_max_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.4s, #196, lsl #24
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: mov w8, #49152 // =0xc000
+; CHECK-NEXT: movk w8, #17535, lsl #16
+; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%i = fptosi <4 x float> %x to <4 x i32>
%lower = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 -512))
@@ -221,26 +269,34 @@ entry:
}
define <4 x float> @test_unsigned_v4f32_min_max(<4 x float> %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_v4f32_min_max:
-; SIGNED-ZEROS: // %bb.0: // %entry
-; SIGNED-ZEROS-NEXT: movi v1.4s, #2, lsl #8
-; SIGNED-ZEROS-NEXT: fcvtzu v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT: movi v2.4s, #3, msl #8
-; SIGNED-ZEROS-NEXT: umax v0.4s, v0.4s, v1.4s
-; SIGNED-ZEROS-NEXT: umin v0.4s, v0.4s, v2.4s
-; SIGNED-ZEROS-NEXT: ucvtf v0.4s, v0.4s
-; SIGNED-ZEROS-NEXT: ret
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_v4f32_min_max:
-; NO-SIGNED-ZEROS: // %bb.0: // %entry
-; NO-SIGNED-ZEROS-NEXT: movi v1.4s, #68, lsl #24
-; NO-SIGNED-ZEROS-NEXT: frintz v0.4s, v0.4s
-; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000
-; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16
-; NO-SIGNED-ZEROS-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
-; NO-SIGNED-ZEROS-NEXT: dup v1.4s, w8
-; NO-SIGNED-ZEROS-NEXT: fminnm v0.4s, v0.4s, v1.4s
-; NO-SIGNED-ZEROS-NEXT: ret
+; CHECK-LABEL: test_unsigned_v4f32_min_max:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.4s, #2, lsl #8
+; CHECK-NEXT: fcvtzu v0.4s, v0.4s
+; CHECK-NEXT: movi v2.4s, #3, msl #8
+; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-NEXT: ret
+entry:
+ %i = fptoui <4 x float> %x to <4 x i32>
+ %lower = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 512))
+ %clamped = call <4 x i32> @llvm.umin.v4i32(<4 x i32> %lower, <4 x i32> splat (i32 1023))
+ %f = uitofp <4 x i32> %clamped to <4 x float>
+ ret <4 x float> %f
+}
+
+define <4 x float> @test_unsigned_v4f32_min_max_nsz(<4 x float> %x) #0 {
+; CHECK-LABEL: test_unsigned_v4f32_min_max_nsz:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: movi v1.4s, #68, lsl #24
+; CHECK-NEXT: frintz v0.4s, v0.4s
+; CHECK-NEXT: mov w8, #49152 // =0xc000
+; CHECK-NEXT: movk w8, #17535, lsl #16
+; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: dup v1.4s, w8
+; CHECK-NEXT: fminnm v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
entry:
%i = fptoui <4 x float> %x to <4 x i32>
%lower = call <4 x i32> @llvm.umax.v4i32(<4 x i32> %i, <4 x i32> splat (i32 512))
@@ -275,6 +331,18 @@ define float @test_fabs(float %x) {
ret float %abs
}
+define float @test_fabs_positive(float %x) {
+; CHECK-LABEL: test_fabs_positive:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fabs s0, s0
+; CHECK-NEXT: frintz s0, s0
+; CHECK-NEXT: ret
+ %abs = call float @llvm.fabs(float %x)
+ %conv1 = fptosi float %abs to i32
+ %conv2 = sitofp i32 %conv1 to float
+ ret float %conv2
+}
+
define float @test_copysign(float %x, float %y) {
; CHECK-LABEL: test_copysign:
; CHECK: // %bb.0:
@@ -326,3 +394,5 @@ declare <4 x i32> @llvm.umin.v4i32(<4 x i32>, <4 x i32>)
declare <4 x i32> @llvm.umax.v4i32(<4 x i32>, <4 x i32>)
declare float @llvm.fabs.f32(float)
declare float @llvm.copysign.f32(float, float)
+
+attributes #0 = { "no-signed-zeros-fp-math"="true" }
diff --git a/llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll b/llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll
index 2416d6a852eb9..0a6a25384c136 100644
--- a/llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fp-to-int-to-fp.ll
@@ -1,22 +1,27 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn < %s | FileCheck %s --check-prefixes=CHECK,SIGNED-ZEROS
-; RUN: llc -mtriple=amdgcn --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefixes=CHECK,NO-SIGNED-ZEROS
+; RUN: llc -mtriple=amdgcn < %s | FileCheck %s
; Test folding of float->int->float roundtrips into float-only operations.
define float @test_signed_basic(float %x) {
-; SIGNED-ZEROS-LABEL: test_signed_basic:
-; SIGNED-ZEROS: ; %bb.0: ; %entry
-; SIGNED-ZEROS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SIGNED-ZEROS-NEXT: v_cvt_i32_f32_e32 v0, v0
-; SIGNED-ZEROS-NEXT: v_cvt_f32_i32_e32 v0, v0
-; SIGNED-ZEROS-NEXT: s_setpc_b64 s[30:31]
-;
-; NO-SIGNED-ZEROS-LABEL: test_signed_basic:
-; NO-SIGNED-ZEROS: ; %bb.0: ; %entry
-; NO-SIGNED-ZEROS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; NO-SIGNED-ZEROS-NEXT: v_trunc_f32_e32 v0, v0
-; NO-SIGNED-ZEROS-NEXT: s_setpc_b64 s[30:31]
+; CHECK-LABEL: test_signed_basic:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_cvt_i32_f32_e32 v0, v0
+; CHECK-NEXT: v_cvt_f32_i32_e32 v0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %i = fptosi float %x to i32
+ %f = sitofp i32 %i to float
+ ret float %f
+}
+
+define float @test_signed_basic_nsz(float %x) #0 {
+; CHECK-LABEL: test_signed_basic_nsz:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_trunc_f32_e32 v0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%i = fptosi float %x to i32
%f = sitofp i32 %i to float
@@ -26,17 +31,23 @@ entry:
; For unsigned conversions, even when signed zeros are possible, we can still
; use truncate because fabs is free.
define float @test_unsigned_basic(float %x) {
-; SIGNED-ZEROS-LABEL: test_unsigned_basic:
-; SIGNED-ZEROS: ; %bb.0: ; %entry
-; SIGNED-ZEROS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SIGNED-ZEROS-NEXT: v_trunc_f32_e64 v0, |v0|
-; SIGNED-ZEROS-NEXT: s_setpc_b64 s[30:31]
-;
-; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic:
-; NO-SIGNED-ZEROS: ; %bb.0: ; %entry
-; NO-SIGNED-ZEROS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; NO-SIGNED-ZEROS-NEXT: v_trunc_f32_e32 v0, v0
-; NO-SIGNED-ZEROS-NEXT: s_setpc_b64 s[30:31]
+; CHECK-LABEL: test_unsigned_basic:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_trunc_f32_e64 v0, |v0|
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %i = fptoui float %x to i32
+ %f = uitofp i32 %i to float
+ ret float %f
+}
+
+define float @test_unsigned_basic_nsz(float %x) #0 {
+; CHECK-LABEL: test_unsigned_basic_nsz:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_trunc_f32_e32 v0, v0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
%i = fptoui float %x to i32
%f = uitofp i32 %i to float
@@ -60,3 +71,5 @@ entry:
}
declare i32 @llvm.smin.i32(i32, i32)
+
+attributes #0 = { "no-signed-zeros-fp-math"="true" }
More information about the llvm-commits
mailing list