[llvm] [X86] Remove `NoSignedZerosFPMath` uses (PR #163902)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 27 05:21:21 PST 2026
https://github.com/paperchalice updated https://github.com/llvm/llvm-project/pull/163902
>From b3c592b645c581bc06cf520d09409d0b23bd98c7 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Fri, 17 Oct 2025 11:03:51 +0800
Subject: [PATCH 1/2] [X86] Remove `NoSignedZerosFPMath` uses
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 32 +++++++++++--------------
1 file changed, 14 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 9291bf5c20ede..374b5957ad4ec 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -29771,8 +29771,7 @@ static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
bool IsXNeverNaN = DAG.isKnownNeverNaN(X);
bool IsYNeverNaN = DAG.isKnownNeverNaN(Y);
- bool IgnoreSignedZero = DAG.getTarget().Options.NoSignedZerosFPMath ||
- Op->getFlags().hasNoSignedZeros() ||
+ bool IgnoreSignedZero = Op->getFlags().hasNoSignedZeros() ||
DAG.isKnownNeverZeroFloat(X) ||
DAG.isKnownNeverZeroFloat(Y);
bool ShouldHandleZeros = true;
@@ -48348,7 +48347,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
+ if (!N->getFlags().hasNoSignedZeros() &&
!(DAG.isKnownNeverZeroFloat(LHS) ||
DAG.isKnownNeverZeroFloat(RHS)))
break;
@@ -48359,7 +48358,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
case ISD::SETOLE:
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly.
- if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
+ if (!N->getFlags().hasNoSignedZeros() &&
!DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS))
break;
Opcode = X86ISD::FMIN;
@@ -48378,7 +48377,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
case ISD::SETOGE:
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly.
- if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
+ if (!N->getFlags().hasNoSignedZeros() &&
!DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS))
break;
Opcode = X86ISD::FMAX;
@@ -48388,7 +48387,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// the operands would cause it to handle comparisons between positive
// and negative zero incorrectly.
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) {
- if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
+ if (!N->getFlags().hasNoSignedZeros() &&
!(DAG.isKnownNeverZeroFloat(LHS) ||
DAG.isKnownNeverZeroFloat(RHS)))
break;
@@ -48415,7 +48414,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a min would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
+ if (!N->getFlags().hasNoSignedZeros() &&
!(DAG.isKnownNeverZeroFloat(LHS) ||
DAG.isKnownNeverZeroFloat(RHS))) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
@@ -48451,7 +48450,7 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
// Converting this to a max would handle comparisons between positive
// and negative zero incorrectly, and swapping the operands would
// cause it to handle NaNs incorrectly.
- if (!DAG.getTarget().Options.NoSignedZerosFPMath &&
+ if (!N->getFlags().hasNoSignedZeros() &&
!DAG.isKnownNeverZeroFloat(LHS) &&
!DAG.isKnownNeverZeroFloat(RHS)) {
if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))
@@ -48474,15 +48473,16 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
}
if (Opcode) {
+ SDNodeFlags Flags = N->getFlags();
if (IsStrict) {
SDValue Ret = DAG.getNode(Opcode == X86ISD::FMIN ? X86ISD::STRICT_FMIN
: X86ISD::STRICT_FMAX,
DL, {N->getValueType(0), MVT::Other},
- {Cond.getOperand(0), LHS, RHS});
+ {Cond.getOperand(0), LHS, RHS}, Flags);
DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Ret.getValue(1));
return Ret;
}
- return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS);
+ return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS, Flags);
}
}
@@ -54944,10 +54944,6 @@ static SDValue combineFMulcFCMulc(SDNode *N, SelectionDAG &DAG,
// FADD(A, FMA(B, C, 0)) and FADD(A, FMUL(B, C)) to FMA(B, C, A)
static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- auto HasNoSignedZero = [&DAG](const SDNodeFlags &Flags) {
- return DAG.getTarget().Options.NoSignedZerosFPMath ||
- Flags.hasNoSignedZeros();
- };
auto IsVectorAllNegativeZero = [&DAG](SDValue Op) {
APInt AI = APInt(32, 0x80008000);
KnownBits Bits = DAG.computeKnownBits(Op);
@@ -54967,8 +54963,8 @@ static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(1);
bool IsConj;
SDValue FAddOp1, MulOp0, MulOp1;
- auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj, &IsVectorAllNegativeZero,
- &HasNoSignedZero](SDValue N) -> bool {
+ auto GetCFmulFrom = [&MulOp0, &MulOp1, &IsConj,
+ &IsVectorAllNegativeZero](SDValue N) -> bool {
if (!N.hasOneUse() || N.getOpcode() != ISD::BITCAST)
return false;
SDValue Op0 = N.getOperand(0);
@@ -54982,7 +54978,7 @@ static SDValue combineFaddCFmul(SDNode *N, SelectionDAG &DAG,
}
if ((Opcode == X86ISD::VFMADDC || Opcode == X86ISD::VFCMADDC) &&
((ISD::isBuildVectorAllZeros(Op0->getOperand(2).getNode()) &&
- HasNoSignedZero(Op0->getFlags())) ||
+ Op0->getFlags().hasNoSignedZeros()) ||
IsVectorAllNegativeZero(Op0->getOperand(2)))) {
MulOp0 = Op0.getOperand(0);
MulOp1 = Op0.getOperand(1);
@@ -56116,7 +56112,7 @@ static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) {
// FMIN/FMAX are commutative if no NaNs and no negative zeros are allowed.
if (!DAG.getTarget().Options.NoNaNsFPMath ||
- !DAG.getTarget().Options.NoSignedZerosFPMath)
+ !N->getFlags().hasNoSignedZeros())
return SDValue();
// If we run in unsafe-math mode, then convert the FMAX and FMIN nodes
>From 52cfdc11fe5cf34246facc7c6abbde63431eab18 Mon Sep 17 00:00:00 2001
From: PaperChalice <liujunchang97 at outlook.com>
Date: Tue, 27 Jan 2026 21:21:00 +0800
Subject: [PATCH 2/2] Fix tests
---
llvm/test/CodeGen/X86/avx512-safe-fp-math.ll | 79 ++
.../test/CodeGen/X86/avx512-unsafe-fp-math.ll | 52 +-
.../X86/avx512fp16-combine-vfmac-fadd-nsz.ll | 87 ++
.../X86/avx512fp16-combine-vfmac-fadd.ll | 105 +-
.../X86/avx512fp16-fminimum-fmaximum.ll | 20 +-
.../X86/avx512fp16-machine-combiner.ll | 36 +-
.../CodeGen/X86/avx512fp16-safe-fp-math.ll | 100 ++
.../CodeGen/X86/avx512fp16-unsafe-fp-math.ll | 90 +-
llvm/test/CodeGen/X86/exedepsfix-broadcast.ll | 18 +-
llvm/test/CodeGen/X86/fminimum-fmaximum.ll | 16 +-
.../CodeGen/X86/fminimumnum-fmaximumnum.ll | 16 +-
llvm/test/CodeGen/X86/machine-combiner.ll | 76 +-
llvm/test/CodeGen/X86/sse-minmax-unsafe.ll | 972 ++++++++++++++++++
llvm/test/CodeGen/X86/sse-minmax.ll | 609 ++++-------
.../CodeGen/X86/stack-folding-fp-avx512.ll | 18 +-
.../X86/stack-folding-fp-avx512fp16.ll | 18 +-
16 files changed, 1597 insertions(+), 715 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/avx512-safe-fp-math.ll
create mode 100644 llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd-nsz.ll
create mode 100644 llvm/test/CodeGen/X86/avx512fp16-safe-fp-math.ll
create mode 100644 llvm/test/CodeGen/X86/sse-minmax-unsafe.ll
diff --git a/llvm/test/CodeGen/X86/avx512-safe-fp-math.ll b/llvm/test/CodeGen/X86/avx512-safe-fp-math.ll
new file mode 100644
index 0000000000000..c54bcdb3b7edb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512-safe-fp-math.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64 -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s
+
+define <16 x float> @test_max_v16f32(ptr %a_ptr, <16 x float> %b) {
+; CHECK-LABEL: test_max_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vmaxps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %a = load <16 x float>, ptr %a_ptr
+ %tmp = fcmp fast ogt <16 x float> %a, %b
+ %tmp4 = select <16 x i1> %tmp, <16 x float> %a, <16 x float> %b
+ ret <16 x float> %tmp4
+}
+
+define <16 x float> @test_min_v16f32(ptr %a_ptr, <16 x float> %b) {
+; CHECK-LABEL: test_min_v16f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vminps %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %a = load <16 x float>, ptr %a_ptr
+ %tmp = fcmp fast olt <16 x float> %a, %b
+ %tmp4 = select <16 x i1> %tmp, <16 x float> %a, <16 x float> %b
+ ret <16 x float> %tmp4
+}
+
+define <8 x double> @test_max_v8f64(ptr %a_ptr, <8 x double> %b) {
+; CHECK-LABEL: test_max_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovapd (%rdi), %zmm1
+; CHECK-NEXT: vmaxpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %a = load <8 x double>, ptr %a_ptr
+ %tmp = fcmp fast ogt <8 x double> %a, %b
+ %tmp4 = select <8 x i1> %tmp, <8 x double> %a, <8 x double> %b
+ ret <8 x double> %tmp4
+}
+
+define <8 x double> @test_min_v8f64(ptr %a_ptr, <8 x double> %b) {
+; CHECK-LABEL: test_min_v8f64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovapd (%rdi), %zmm1
+; CHECK-NEXT: vminpd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %a = load <8 x double>, ptr %a_ptr
+ %tmp = fcmp fast olt <8 x double> %a, %b
+ %tmp4 = select <8 x i1> %tmp, <8 x double> %a, <8 x double> %b
+ ret <8 x double> %tmp4
+}
+
+define float @test_min_f32(float %a, ptr %ptr) {
+; CHECK-LABEL: test_min_f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT: vminss %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = load float, ptr %ptr
+ %1 = fcmp fast olt float %0, %a
+ %2 = select i1 %1, float %0, float %a
+ ret float %2
+}
+
+define double @test_max_f64(double %a, ptr %ptr) {
+; CHECK-LABEL: test_max_f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = load double, ptr %ptr
+ %1 = fcmp fast ogt double %0, %a
+ %2 = select i1 %1, double %0, double %a
+ ret double %2
+}
diff --git a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
index eb9de8a9536d0..fe965989b4293 100644
--- a/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
+++ b/llvm/test/CodeGen/X86/avx512-unsafe-fp-math.ll
@@ -1,24 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s --check-prefix=CHECK_UNSAFE
-; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -mattr=+avx512f | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64 -enable-no-signed-zeros-fp-math -mattr=+avx512f | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512f | FileCheck %s
define <16 x float> @test_max_v16f32(ptr %a_ptr, <16 x float> %b) {
; CHECK_UNSAFE-LABEL: test_max_v16f32:
; CHECK_UNSAFE: # %bb.0:
; CHECK_UNSAFE-NEXT: vmaxps (%rdi), %zmm0, %zmm0
; CHECK_UNSAFE-NEXT: retq
-;
-; CHECK-LABEL: test_max_v16f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vmaxps %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
%a = load <16 x float>, ptr %a_ptr
%tmp = fcmp fast ogt <16 x float> %a, %b
- %tmp4 = select <16 x i1> %tmp, <16 x float> %a, <16 x float> %b
+ %tmp4 = select nnan nsz <16 x i1> %tmp, <16 x float> %a, <16 x float> %b
ret <16 x float> %tmp4;
}
@@ -27,15 +17,9 @@ define <16 x float> @test_min_v16f32(ptr %a_ptr, <16 x float> %b) {
; CHECK_UNSAFE: # %bb.0:
; CHECK_UNSAFE-NEXT: vminps (%rdi), %zmm0, %zmm0
; CHECK_UNSAFE-NEXT: retq
-;
-; CHECK-LABEL: test_min_v16f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vminps %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
%a = load <16 x float>, ptr %a_ptr
%tmp = fcmp fast olt <16 x float> %a, %b
- %tmp4 = select <16 x i1> %tmp, <16 x float> %a, <16 x float> %b
+ %tmp4 = select nnan nsz <16 x i1> %tmp, <16 x float> %a, <16 x float> %b
ret <16 x float> %tmp4;
}
@@ -44,15 +28,9 @@ define <8 x double> @test_max_v8f64(ptr %a_ptr, <8 x double> %b) {
; CHECK_UNSAFE: # %bb.0:
; CHECK_UNSAFE-NEXT: vmaxpd (%rdi), %zmm0, %zmm0
; CHECK_UNSAFE-NEXT: retq
-;
-; CHECK-LABEL: test_max_v8f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovapd (%rdi), %zmm1
-; CHECK-NEXT: vmaxpd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
%a = load <8 x double>, ptr %a_ptr
%tmp = fcmp fast ogt <8 x double> %a, %b
- %tmp4 = select <8 x i1> %tmp, <8 x double> %a, <8 x double> %b
+ %tmp4 = select nnan nsz <8 x i1> %tmp, <8 x double> %a, <8 x double> %b
ret <8 x double> %tmp4;
}
@@ -61,15 +39,9 @@ define <8 x double> @test_min_v8f64(ptr %a_ptr, <8 x double> %b) {
; CHECK_UNSAFE: # %bb.0:
; CHECK_UNSAFE-NEXT: vminpd (%rdi), %zmm0, %zmm0
; CHECK_UNSAFE-NEXT: retq
-;
-; CHECK-LABEL: test_min_v8f64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vmovapd (%rdi), %zmm1
-; CHECK-NEXT: vminpd %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
%a = load <8 x double>, ptr %a_ptr
%tmp = fcmp fast olt <8 x double> %a, %b
- %tmp4 = select <8 x i1> %tmp, <8 x double> %a, <8 x double> %b
+ %tmp4 = select nnan nsz <8 x i1> %tmp, <8 x double> %a, <8 x double> %b
ret <8 x double> %tmp4;
}
@@ -78,16 +50,10 @@ define float @test_min_f32(float %a, ptr %ptr) {
; CHECK_UNSAFE: # %bb.0: # %entry
; CHECK_UNSAFE-NEXT: vminss (%rdi), %xmm0, %xmm0
; CHECK_UNSAFE-NEXT: retq
-;
-; CHECK-LABEL: test_min_f32:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; CHECK-NEXT: vminss %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
entry:
%0 = load float, ptr %ptr
%1 = fcmp fast olt float %0, %a
- %2 = select i1 %1, float %0, float %a
+ %2 = select nnan nsz i1 %1, float %0, float %a
ret float %2
}
@@ -96,15 +62,9 @@ define double @test_max_f64(double %a, ptr %ptr) {
; CHECK_UNSAFE: # %bb.0: # %entry
; CHECK_UNSAFE-NEXT: vmaxsd (%rdi), %xmm0, %xmm0
; CHECK_UNSAFE-NEXT: retq
-;
-; CHECK-LABEL: test_max_f64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; CHECK-NEXT: vmaxsd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT: retq
entry:
%0 = load double, ptr %ptr
%1 = fcmp fast ogt double %0, %a
- %2 = select i1 %1, double %0, double %a
+ %2 = select nnan nsz i1 %1, double %0, double %a
ret double %2
}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd-nsz.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd-nsz.ll
new file mode 100644
index 0000000000000..5a98650388028
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd-nsz.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s
+
+; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
+define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <32 x half> %a to <16 x float>
+ %1 = bitcast <32 x half> %b to <16 x float>
+ %2 = tail call nsz contract <16 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ %3 = bitcast <16 x float> %2 to <32 x half>
+ %add.i = fadd contract <32 x half> %3, %acc
+ ret <32 x half> %add.i
+}
+
+define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
+; CHECK-LABEL: test2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <32 x half> %a to <16 x float>
+ %1 = bitcast <32 x half> %b to <16 x float>
+ %2 = tail call nsz contract <16 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.512(<16 x float> %0, <16 x float> %1, <16 x float> zeroinitializer, i16 -1, i32 4)
+ %3 = bitcast <16 x float> %2 to <32 x half>
+ %add.i = fadd contract <32 x half> %3, %acc
+ ret <32 x half> %add.i
+}
+
+define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
+; CHECK-LABEL: test3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <16 x half> %a to <8 x float>
+ %1 = bitcast <16 x half> %b to <8 x float>
+ %2 = tail call nsz contract <8 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
+ %3 = bitcast <8 x float> %2 to <16 x half>
+ %add.i = fadd contract <16 x half> %3, %acc
+ ret <16 x half> %add.i
+}
+
+define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
+; CHECK-LABEL: test4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <16 x half> %a to <8 x float>
+ %1 = bitcast <16 x half> %b to <8 x float>
+ %2 = tail call nsz contract <8 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.256(<8 x float> %0, <8 x float> %1, <8 x float> zeroinitializer, i8 -1)
+ %3 = bitcast <8 x float> %2 to <16 x half>
+ %add.i = fadd contract <16 x half> %3, %acc
+ ret <16 x half> %add.i
+}
+
+define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x half> %a to <4 x float>
+ %1 = bitcast <8 x half> %b to <4 x float>
+ %2 = tail call nsz contract <4 x float> @llvm.x86.avx512fp16.mask.vfcmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
+ %3 = bitcast <4 x float> %2 to <8 x half>
+ %add.i = fadd contract <8 x half> %3, %acc
+ ret <8 x half> %add.i
+}
+
+define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
+; CHECK-LABEL: test6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = bitcast <8 x half> %a to <4 x float>
+ %1 = bitcast <8 x half> %b to <4 x float>
+ %2 = tail call nsz contract <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float> %0, <4 x float> %1, <4 x float> zeroinitializer, i8 -1)
+ %3 = bitcast <4 x float> %2 to <8 x half>
+ %add.i = fadd contract <8 x half> %3, %acc
+ ret <8 x half> %add.i
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
index f1477b57375c4..b256113845ba9 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-combine-vfmac-fadd.ll
@@ -1,20 +1,14 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown --enable-no-signed-zeros-fp-math -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,NO-SZ
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s --check-prefixes=CHECK,HAS-SZ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512fp16,avx512vl | FileCheck %s
; FADD(acc, FMA(a, b, +0.0)) can be combined to FMA(a, b, acc) if the nsz flag set.
define dso_local <32 x half> @test1(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
-; NO-SZ-LABEL: test1:
-; NO-SZ: # %bb.0: # %entry
-; NO-SZ-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm0
-; NO-SZ-NEXT: retq
-;
-; HAS-SZ-LABEL: test1:
-; HAS-SZ: # %bb.0: # %entry
-; HAS-SZ-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; HAS-SZ-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm3
-; HAS-SZ-NEXT: vaddph %zmm0, %zmm3, %zmm0
-; HAS-SZ-NEXT: retq
+; CHECK-LABEL: test1:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vfcmaddcph %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: vaddph %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
@@ -25,17 +19,12 @@ entry:
}
define dso_local <32 x half> @test2(<32 x half> %acc, <32 x half> %a, <32 x half> %b) {
-; NO-SZ-LABEL: test2:
-; NO-SZ: # %bb.0: # %entry
-; NO-SZ-NEXT: vfmaddcph %zmm2, %zmm1, %zmm0
-; NO-SZ-NEXT: retq
-;
-; HAS-SZ-LABEL: test2:
-; HAS-SZ: # %bb.0: # %entry
-; HAS-SZ-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; HAS-SZ-NEXT: vfmaddcph %zmm2, %zmm1, %zmm3
-; HAS-SZ-NEXT: vaddph %zmm0, %zmm3, %zmm0
-; HAS-SZ-NEXT: retq
+; CHECK-LABEL: test2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vfmaddcph %zmm2, %zmm1, %zmm3
+; CHECK-NEXT: vaddph %zmm0, %zmm3, %zmm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <32 x half> %a to <16 x float>
%1 = bitcast <32 x half> %b to <16 x float>
@@ -46,17 +35,12 @@ entry:
}
define dso_local <16 x half> @test3(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
-; NO-SZ-LABEL: test3:
-; NO-SZ: # %bb.0: # %entry
-; NO-SZ-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm0
-; NO-SZ-NEXT: retq
-;
-; HAS-SZ-LABEL: test3:
-; HAS-SZ: # %bb.0: # %entry
-; HAS-SZ-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; HAS-SZ-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm3
-; HAS-SZ-NEXT: vaddph %ymm0, %ymm3, %ymm0
-; HAS-SZ-NEXT: retq
+; CHECK-LABEL: test3:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vfcmaddcph %ymm2, %ymm1, %ymm3
+; CHECK-NEXT: vaddph %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
@@ -67,17 +51,12 @@ entry:
}
define dso_local <16 x half> @test4(<16 x half> %acc, <16 x half> %a, <16 x half> %b) {
-; NO-SZ-LABEL: test4:
-; NO-SZ: # %bb.0: # %entry
-; NO-SZ-NEXT: vfmaddcph %ymm2, %ymm1, %ymm0
-; NO-SZ-NEXT: retq
-;
-; HAS-SZ-LABEL: test4:
-; HAS-SZ: # %bb.0: # %entry
-; HAS-SZ-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; HAS-SZ-NEXT: vfmaddcph %ymm2, %ymm1, %ymm3
-; HAS-SZ-NEXT: vaddph %ymm0, %ymm3, %ymm0
-; HAS-SZ-NEXT: retq
+; CHECK-LABEL: test4:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vfmaddcph %ymm2, %ymm1, %ymm3
+; CHECK-NEXT: vaddph %ymm0, %ymm3, %ymm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <16 x half> %a to <8 x float>
%1 = bitcast <16 x half> %b to <8 x float>
@@ -88,17 +67,12 @@ entry:
}
define dso_local <8 x half> @test5(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
-; NO-SZ-LABEL: test5:
-; NO-SZ: # %bb.0: # %entry
-; NO-SZ-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm0
-; NO-SZ-NEXT: retq
-;
-; HAS-SZ-LABEL: test5:
-; HAS-SZ: # %bb.0: # %entry
-; HAS-SZ-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; HAS-SZ-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm3
-; HAS-SZ-NEXT: vaddph %xmm0, %xmm3, %xmm0
-; HAS-SZ-NEXT: retq
+; CHECK-LABEL: test5:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vfcmaddcph %xmm2, %xmm1, %xmm3
+; CHECK-NEXT: vaddph %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
@@ -109,17 +83,12 @@ entry:
}
define dso_local <8 x half> @test6(<8 x half> %acc, <8 x half> %a, <8 x half> %b) {
-; NO-SZ-LABEL: test6:
-; NO-SZ: # %bb.0: # %entry
-; NO-SZ-NEXT: vfmaddcph %xmm2, %xmm1, %xmm0
-; NO-SZ-NEXT: retq
-;
-; HAS-SZ-LABEL: test6:
-; HAS-SZ: # %bb.0: # %entry
-; HAS-SZ-NEXT: vxorps %xmm3, %xmm3, %xmm3
-; HAS-SZ-NEXT: vfmaddcph %xmm2, %xmm1, %xmm3
-; HAS-SZ-NEXT: vaddph %xmm0, %xmm3, %xmm0
-; HAS-SZ-NEXT: retq
+; CHECK-LABEL: test6:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
+; CHECK-NEXT: vfmaddcph %xmm2, %xmm1, %xmm3
+; CHECK-NEXT: vaddph %xmm0, %xmm3, %xmm0
+; CHECK-NEXT: retq
entry:
%0 = bitcast <8 x half> %a to <4 x float>
%1 = bitcast <8 x half> %b to <4 x float>
diff --git a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
index 59cf38f82b7c0..1264633f9b2c0 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-fminimum-fmaximum.ll
@@ -24,12 +24,12 @@ define half @test_fminimum(half %x, half %y) {
ret half %z
}
-define <8 x half> @test_fminimum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <8 x half> @test_fminimum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" {
; CHECK-LABEL: test_fminimum_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vminph %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
- %r = call <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
+ %r = call nsz <8 x half> @llvm.minimum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
}
@@ -98,12 +98,12 @@ define half @test_fmaximum(half %x, half %y) {
ret half %r
}
-define <8 x half> @test_fmaximum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <8 x half> @test_fmaximum_v8f16(<8 x half> %x, <8 x half> %y) "no-nans-fp-math"="true" {
; CHECK-LABEL: test_fmaximum_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmaxph %xmm1, %xmm0, %xmm0
; CHECK-NEXT: retq
- %r = call <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
+ %r = call nsz <8 x half> @llvm.maximum.v8f16(<8 x half> %x, <8 x half> %y)
ret <8 x half> %r
}
@@ -136,7 +136,7 @@ define half @test_fmaximum_zero(half %x, half %y) {
ret half %1
}
-define half @test_fmaximum_nsz(half %x, half %y) "no-signed-zeros-fp-math"="true" {
+define half @test_fmaximum_nsz(half %x, half %y) {
; CHECK-LABEL: test_fmaximum_nsz:
; CHECK: # %bb.0:
; CHECK-NEXT: vmaxsh %xmm1, %xmm0, %xmm1
@@ -144,7 +144,7 @@ define half @test_fmaximum_nsz(half %x, half %y) "no-signed-zeros-fp-math"="true
; CHECK-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1}
; CHECK-NEXT: vmovaps %xmm1, %xmm0
; CHECK-NEXT: retq
- %1 = tail call half @llvm.maximum.f16(half %x, half %y)
+ %1 = tail call nsz half @llvm.maximum.f16(half %x, half %y)
ret half %1
}
@@ -163,16 +163,16 @@ define half @test_fmaximum_combine_cmps(half %x, half %y) {
ret half %2
}
-define <16 x half> @test_fminimum_v16f16(<16 x half> %x, <16 x half> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <16 x half> @test_fminimum_v16f16(<16 x half> %x, <16 x half> %y) "no-nans-fp-math"="true" {
; CHECK-LABEL: test_fminimum_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vminph %ymm1, %ymm0, %ymm0
; CHECK-NEXT: retq
- %r = call <16 x half> @llvm.minimum.v16f16(<16 x half> %x, <16 x half> %y)
+ %r = call nsz <16 x half> @llvm.minimum.v16f16(<16 x half> %x, <16 x half> %y)
ret <16 x half> %r
}
-define <16 x half> @test_fmaximum_v16f16_nans(<16 x half> %x, <16 x half> %y) "no-signed-zeros-fp-math"="true" {
+define <16 x half> @test_fmaximum_v16f16_nans(<16 x half> %x, <16 x half> %y) {
; CHECK-LABEL: test_fmaximum_v16f16_nans:
; CHECK: # %bb.0:
; CHECK-NEXT: vmaxph %ymm1, %ymm0, %ymm1
@@ -180,7 +180,7 @@ define <16 x half> @test_fmaximum_v16f16_nans(<16 x half> %x, <16 x half> %y) "n
; CHECK-NEXT: vmovdqu16 %ymm0, %ymm1 {%k1}
; CHECK-NEXT: vmovdqa %ymm1, %ymm0
; CHECK-NEXT: retq
- %r = call <16 x half> @llvm.maximum.v16f16(<16 x half> %x, <16 x half> %y)
+ %r = call nsz <16 x half> @llvm.maximum.v16f16(<16 x half> %x, <16 x half> %y)
ret <16 x half> %r
}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll b/llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll
index 6ff40c2339e1d..56ca9fce1e9f3 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-machine-combiner.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512fp16,avx512vl -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512fp16,avx512vl -enable-no-nans-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s
; Incremental updates of the instruction depths should be enough for this test
; case.
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512fp16,avx512vl -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512fp16,avx512vl -enable-no-nans-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s
; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.
@@ -218,9 +218,9 @@ define half @reassociate_mins_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-NEXT: retq
%t0 = fdiv half %x0, %x1
%cmp1 = fcmp olt half %x2, %t0
- %sel1 = select i1 %cmp1, half %x2, half %t0
+ %sel1 = select nsz i1 %cmp1, half %x2, half %t0
%cmp2 = fcmp olt half %x3, %sel1
- %sel2 = select i1 %cmp2, half %x3, half %sel1
+ %sel2 = select nsz i1 %cmp2, half %x3, half %sel1
ret half %sel2
}
@@ -235,9 +235,9 @@ define half @reassociate_maxs_half(half %x0, half %x1, half %x2, half %x3) {
; CHECK-NEXT: retq
%t0 = fdiv half %x0, %x1
%cmp1 = fcmp ogt half %x2, %t0
- %sel1 = select i1 %cmp1, half %x2, half %t0
+ %sel1 = select nsz i1 %cmp1, half %x2, half %t0
%cmp2 = fcmp ogt half %x3, %sel1
- %sel2 = select i1 %cmp2, half %x3, half %sel1
+ %sel2 = select nsz i1 %cmp2, half %x3, half %sel1
ret half %sel2
}
@@ -252,9 +252,9 @@ define <8 x half> @reassociate_mins_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x h
; CHECK-NEXT: retq
%t0 = fadd <8 x half> %x0, %x1
%cmp1 = fcmp olt <8 x half> %x2, %t0
- %sel1 = select <8 x i1> %cmp1, <8 x half> %x2, <8 x half> %t0
+ %sel1 = select nsz <8 x i1> %cmp1, <8 x half> %x2, <8 x half> %t0
%cmp2 = fcmp olt <8 x half> %x3, %sel1
- %sel2 = select <8 x i1> %cmp2, <8 x half> %x3, <8 x half> %sel1
+ %sel2 = select nsz <8 x i1> %cmp2, <8 x half> %x3, <8 x half> %sel1
ret <8 x half> %sel2
}
@@ -269,9 +269,9 @@ define <8 x half> @reassociate_maxs_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x h
; CHECK-NEXT: retq
%t0 = fadd <8 x half> %x0, %x1
%cmp1 = fcmp ogt <8 x half> %x2, %t0
- %sel1 = select <8 x i1> %cmp1, <8 x half> %x2, <8 x half> %t0
+ %sel1 = select nsz <8 x i1> %cmp1, <8 x half> %x2, <8 x half> %t0
%cmp2 = fcmp ogt <8 x half> %x3, %sel1
- %sel2 = select <8 x i1> %cmp2, <8 x half> %x3, <8 x half> %sel1
+ %sel2 = select nsz <8 x i1> %cmp2, <8 x half> %x3, <8 x half> %sel1
ret <8 x half> %sel2
}
@@ -286,9 +286,9 @@ define <16 x half> @reassociate_mins_v16f16(<16 x half> %x0, <16 x half> %x1, <1
; CHECK-NEXT: retq
%t0 = fadd <16 x half> %x0, %x1
%cmp1 = fcmp olt <16 x half> %x2, %t0
- %sel1 = select <16 x i1> %cmp1, <16 x half> %x2, <16 x half> %t0
+ %sel1 = select nsz <16 x i1> %cmp1, <16 x half> %x2, <16 x half> %t0
%cmp2 = fcmp olt <16 x half> %x3, %sel1
- %sel2 = select <16 x i1> %cmp2, <16 x half> %x3, <16 x half> %sel1
+ %sel2 = select nsz <16 x i1> %cmp2, <16 x half> %x3, <16 x half> %sel1
ret <16 x half> %sel2
}
@@ -303,9 +303,9 @@ define <16 x half> @reassociate_maxs_v16f16(<16 x half> %x0, <16 x half> %x1, <1
; CHECK-NEXT: retq
%t0 = fadd <16 x half> %x0, %x1
%cmp1 = fcmp ogt <16 x half> %x2, %t0
- %sel1 = select <16 x i1> %cmp1, <16 x half> %x2, <16 x half> %t0
+ %sel1 = select nsz <16 x i1> %cmp1, <16 x half> %x2, <16 x half> %t0
%cmp2 = fcmp ogt <16 x half> %x3, %sel1
- %sel2 = select <16 x i1> %cmp2, <16 x half> %x3, <16 x half> %sel1
+ %sel2 = select nsz <16 x i1> %cmp2, <16 x half> %x3, <16 x half> %sel1
ret <16 x half> %sel2
}
@@ -320,9 +320,9 @@ define <32 x half> @reassociate_mins_v32f16(<32 x half> %x0, <32 x half> %x1, <3
; CHECK-NEXT: retq
%t0 = fadd <32 x half> %x0, %x1
%cmp1 = fcmp olt <32 x half> %x2, %t0
- %sel1 = select <32 x i1> %cmp1, <32 x half> %x2, <32 x half> %t0
+ %sel1 = select nsz <32 x i1> %cmp1, <32 x half> %x2, <32 x half> %t0
%cmp2 = fcmp olt <32 x half> %x3, %sel1
- %sel2 = select <32 x i1> %cmp2, <32 x half> %x3, <32 x half> %sel1
+ %sel2 = select nsz <32 x i1> %cmp2, <32 x half> %x3, <32 x half> %sel1
ret <32 x half> %sel2
}
@@ -337,9 +337,9 @@ define <32 x half> @reassociate_maxs_v16f32(<32 x half> %x0, <32 x half> %x1, <3
; CHECK-NEXT: retq
%t0 = fadd <32 x half> %x0, %x1
%cmp1 = fcmp ogt <32 x half> %x2, %t0
- %sel1 = select <32 x i1> %cmp1, <32 x half> %x2, <32 x half> %t0
+ %sel1 = select nsz <32 x i1> %cmp1, <32 x half> %x2, <32 x half> %t0
%cmp2 = fcmp ogt <32 x half> %x3, %sel1
- %sel2 = select <32 x i1> %cmp2, <32 x half> %x3, <32 x half> %sel1
+ %sel2 = select nsz <32 x i1> %cmp2, <32 x half> %x3, <32 x half> %sel1
ret <32 x half> %sel2
}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-safe-fp-math.ll b/llvm/test/CodeGen/X86/avx512fp16-safe-fp-math.ll
new file mode 100644
index 0000000000000..d7092175b752a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512fp16-safe-fp-math.ll
@@ -0,0 +1,100 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512fp16,+avx512vl | FileCheck %s
+
+define <32 x half> @test_max_v32f16(ptr %a_ptr, <32 x half> %b) {
+; CHECK-LABEL: test_max_v32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vmaxph %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %a = load <32 x half>, ptr %a_ptr
+ %tmp = fcmp fast ogt <32 x half> %a, %b
+ %tmp4 = select <32 x i1> %tmp, <32 x half> %a, <32 x half> %b
+ ret <32 x half> %tmp4
+}
+
+define <32 x half> @test_min_v32f16(ptr %a_ptr, <32 x half> %b) {
+; CHECK-LABEL: test_min_v32f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %zmm1
+; CHECK-NEXT: vminph %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %a = load <32 x half>, ptr %a_ptr
+ %tmp = fcmp fast olt <32 x half> %a, %b
+ %tmp4 = select <32 x i1> %tmp, <32 x half> %a, <32 x half> %b
+ ret <32 x half> %tmp4
+}
+
+define <16 x half> @test_max_v16f16(ptr %a_ptr, <16 x half> %b) {
+; CHECK-LABEL: test_max_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %ymm1
+; CHECK-NEXT: vmaxph %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %a = load <16 x half>, ptr %a_ptr
+ %tmp = fcmp fast ogt <16 x half> %a, %b
+ %tmp4 = select <16 x i1> %tmp, <16 x half> %a, <16 x half> %b
+ ret <16 x half> %tmp4
+}
+
+define <16 x half> @test_min_v16f16(ptr %a_ptr, <16 x half> %b) {
+; CHECK-LABEL: test_min_v16f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %ymm1
+; CHECK-NEXT: vminph %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %a = load <16 x half>, ptr %a_ptr
+ %tmp = fcmp fast olt <16 x half> %a, %b
+ %tmp4 = select <16 x i1> %tmp, <16 x half> %a, <16 x half> %b
+ ret <16 x half> %tmp4
+}
+
+define <8 x half> @test_max_v8f16(ptr %a_ptr, <8 x half> %b) {
+; CHECK-LABEL: test_max_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %xmm1
+; CHECK-NEXT: vmaxph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %a = load <8 x half>, ptr %a_ptr
+ %tmp = fcmp fast ogt <8 x half> %a, %b
+ %tmp4 = select <8 x i1> %tmp, <8 x half> %a, <8 x half> %b
+ ret <8 x half> %tmp4
+}
+
+define <8 x half> @test_min_v8f16(ptr %a_ptr, <8 x half> %b) {
+; CHECK-LABEL: test_min_v8f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmovaps (%rdi), %xmm1
+; CHECK-NEXT: vminph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %a = load <8 x half>, ptr %a_ptr
+ %tmp = fcmp fast olt <8 x half> %a, %b
+ %tmp4 = select <8 x i1> %tmp, <8 x half> %a, <8 x half> %b
+ ret <8 x half> %tmp4
+}
+
+define half @test_max_f16(half %a, ptr %ptr) {
+; CHECK-LABEL: test_max_f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vmaxsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = load half, ptr %ptr
+ %1 = fcmp fast ogt half %0, %a
+ %2 = select i1 %1, half %0, half %a
+ ret half %2
+}
+
+define half @test_min_f16(half %a, ptr %ptr) {
+; CHECK-LABEL: test_min_f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vminsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+entry:
+ %0 = load half, ptr %ptr
+ %1 = fcmp fast olt half %0, %a
+ %2 = select i1 %1, half %0, half %a
+ ret half %2
+}
diff --git a/llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll b/llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll
index e96800051aa74..e99a8a67673b9 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-unsafe-fp-math.ll
@@ -1,64 +1,40 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=+avx512fp16,+avx512vl | FileCheck %s --check-prefix=CHECK_UNSAFE
-; RUN: llc < %s -mtriple=x86_64 -mattr=+avx512fp16,+avx512vl | FileCheck %s --check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64 -enable-no-nans-fp-math -mattr=+avx512fp16,+avx512vl | FileCheck %s
define <32 x half> @test_max_v32f16(ptr %a_ptr, <32 x half> %b) {
-; CHECK_UNSAFE-LABEL: test_max_v32f16:
-; CHECK_UNSAFE: # %bb.0:
-; CHECK_UNSAFE-NEXT: vmaxph (%rdi), %zmm0, %zmm0
-; CHECK_UNSAFE-NEXT: retq
-;
; CHECK-LABEL: test_max_v32f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vmaxph %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vmaxph (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
%a = load <32 x half>, ptr %a_ptr
%tmp = fcmp fast ogt <32 x half> %a, %b
- %tmp4 = select <32 x i1> %tmp, <32 x half> %a, <32 x half> %b
- ret <32 x half> %tmp4;
+ %tmp4 = select nsz <32 x i1> %tmp, <32 x half> %a, <32 x half> %b
+ ret <32 x half> %tmp4
}
define <32 x half> @test_min_v32f16(ptr %a_ptr, <32 x half> %b) {
-; CHECK_UNSAFE-LABEL: test_min_v32f16:
-; CHECK_UNSAFE: # %bb.0:
-; CHECK_UNSAFE-NEXT: vminph (%rdi), %zmm0, %zmm0
-; CHECK_UNSAFE-NEXT: retq
-;
; CHECK-LABEL: test_min_v32f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm1
-; CHECK-NEXT: vminph %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: vminph (%rdi), %zmm0, %zmm0
; CHECK-NEXT: retq
%a = load <32 x half>, ptr %a_ptr
%tmp = fcmp fast olt <32 x half> %a, %b
- %tmp4 = select <32 x i1> %tmp, <32 x half> %a, <32 x half> %b
- ret <32 x half> %tmp4;
+ %tmp4 = select nsz <32 x i1> %tmp, <32 x half> %a, <32 x half> %b
+ ret <32 x half> %tmp4
}
define <16 x half> @test_max_v16f16(ptr %a_ptr, <16 x half> %b) {
-; CHECK_UNSAFE-LABEL: test_max_v16f16:
-; CHECK_UNSAFE: # %bb.0:
-; CHECK_UNSAFE-NEXT: vmaxph (%rdi), %ymm0, %ymm0
-; CHECK_UNSAFE-NEXT: retq
-;
; CHECK-LABEL: test_max_v16f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps (%rdi), %ymm1
-; CHECK-NEXT: vmaxph %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: vmaxph (%rdi), %ymm0, %ymm0
; CHECK-NEXT: retq
%a = load <16 x half>, ptr %a_ptr
%tmp = fcmp fast ogt <16 x half> %a, %b
- %tmp4 = select <16 x i1> %tmp, <16 x half> %a, <16 x half> %b
- ret <16 x half> %tmp4;
+ %tmp4 = select nsz <16 x i1> %tmp, <16 x half> %a, <16 x half> %b
+ ret <16 x half> %tmp4
}
define <16 x half> @test_min_v16f16(ptr %a_ptr, <16 x half> %b) {
-; CHECK_UNSAFE-LABEL: test_min_v16f16:
-; CHECK_UNSAFE: # %bb.0:
-; CHECK_UNSAFE-NEXT: vminph (%rdi), %ymm0, %ymm0
-; CHECK_UNSAFE-NEXT: retq
-;
; CHECK-LABEL: test_min_v16f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vmovaps (%rdi), %ymm1
@@ -67,75 +43,51 @@ define <16 x half> @test_min_v16f16(ptr %a_ptr, <16 x half> %b) {
%a = load <16 x half>, ptr %a_ptr
%tmp = fcmp fast olt <16 x half> %a, %b
%tmp4 = select <16 x i1> %tmp, <16 x half> %a, <16 x half> %b
- ret <16 x half> %tmp4;
+ ret <16 x half> %tmp4
}
define <8 x half> @test_max_v8f16(ptr %a_ptr, <8 x half> %b) {
-; CHECK_UNSAFE-LABEL: test_max_v8f16:
-; CHECK_UNSAFE: # %bb.0:
-; CHECK_UNSAFE-NEXT: vmaxph (%rdi), %xmm0, %xmm0
-; CHECK_UNSAFE-NEXT: retq
-;
; CHECK-LABEL: test_max_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps (%rdi), %xmm1
-; CHECK-NEXT: vmaxph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vmaxph (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%a = load <8 x half>, ptr %a_ptr
%tmp = fcmp fast ogt <8 x half> %a, %b
- %tmp4 = select <8 x i1> %tmp, <8 x half> %a, <8 x half> %b
- ret <8 x half> %tmp4;
+ %tmp4 = select nsz <8 x i1> %tmp, <8 x half> %a, <8 x half> %b
+ ret <8 x half> %tmp4
}
define <8 x half> @test_min_v8f16(ptr %a_ptr, <8 x half> %b) {
-; CHECK_UNSAFE-LABEL: test_min_v8f16:
-; CHECK_UNSAFE: # %bb.0:
-; CHECK_UNSAFE-NEXT: vminph (%rdi), %xmm0, %xmm0
-; CHECK_UNSAFE-NEXT: retq
-;
; CHECK-LABEL: test_min_v8f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vmovaps (%rdi), %xmm1
-; CHECK-NEXT: vminph %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vminph (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
%a = load <8 x half>, ptr %a_ptr
%tmp = fcmp fast olt <8 x half> %a, %b
- %tmp4 = select <8 x i1> %tmp, <8 x half> %a, <8 x half> %b
- ret <8 x half> %tmp4;
+ %tmp4 = select nsz <8 x i1> %tmp, <8 x half> %a, <8 x half> %b
+ ret <8 x half> %tmp4
}
define half @test_max_f16(half %a, ptr %ptr) {
-; CHECK_UNSAFE-LABEL: test_max_f16:
-; CHECK_UNSAFE: # %bb.0: # %entry
-; CHECK_UNSAFE-NEXT: vmaxsh (%rdi), %xmm0, %xmm0
-; CHECK_UNSAFE-NEXT: retq
-;
; CHECK-LABEL: test_max_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vmaxsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vmaxsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
entry:
%0 = load half, ptr %ptr
%1 = fcmp fast ogt half %0, %a
- %2 = select i1 %1, half %0, half %a
+ %2 = select nsz i1 %1, half %0, half %a
ret half %2
}
define half @test_min_f16(half %a, ptr %ptr) {
-; CHECK_UNSAFE-LABEL: test_min_f16:
-; CHECK_UNSAFE: # %bb.0: # %entry
-; CHECK_UNSAFE-NEXT: vminsh (%rdi), %xmm0, %xmm0
-; CHECK_UNSAFE-NEXT: retq
-;
; CHECK-LABEL: test_min_f16:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vmovsh {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero
-; CHECK-NEXT: vminsh %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vminsh (%rdi), %xmm0, %xmm0
; CHECK-NEXT: retq
entry:
%0 = load half, ptr %ptr
%1 = fcmp fast olt half %0, %a
- %2 = select i1 %1, half %0, half %a
+ %2 = select nsz i1 %1, half %0, half %a
ret half %2
}
diff --git a/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll b/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll
index e57c95c2eddef..8bb5fd5afdda5 100644
--- a/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll
+++ b/llvm/test/CodeGen/X86/exedepsfix-broadcast.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 -enable-no-signed-zeros-fp-math | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 | FileCheck %s
; Check that the ExeDepsFix pass correctly fixes the domain for broadcast instructions.
; <rdar://problem/16354675>
@@ -15,7 +15,7 @@ define <4 x float> @ExeDepsFix_broadcastss(<4 x float> %arg, <4 x float> %arg2)
%and = and <4 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%floatcast = bitcast <4 x i32> %and to <4 x float>
%max_is_x = fcmp oge <4 x float> %floatcast, %arg2
- %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
+ %max = select nsz <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
ret <4 x float> %max
}
@@ -30,7 +30,7 @@ define <8 x float> @ExeDepsFix_broadcastss256(<8 x float> %arg, <8 x float> %arg
%and = and <8 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%floatcast = bitcast <8 x i32> %and to <8 x float>
%max_is_x = fcmp oge <8 x float> %floatcast, %arg2
- %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
+ %max = select nsz <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
ret <8 x float> %max
}
@@ -48,7 +48,7 @@ define <4 x float> @ExeDepsFix_broadcastss_inreg(<4 x float> %arg, <4 x float> %
%and = and <4 x i32> %bitcast, %mask
%floatcast = bitcast <4 x i32> %and to <4 x float>
%max_is_x = fcmp oge <4 x float> %floatcast, %arg2
- %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
+ %max = select nsz <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
ret <4 x float> %max
}
@@ -66,7 +66,7 @@ define <8 x float> @ExeDepsFix_broadcastss256_inreg(<8 x float> %arg, <8 x float
%and = and <8 x i32> %bitcast, %mask
%floatcast = bitcast <8 x i32> %and to <8 x float>
%max_is_x = fcmp oge <8 x float> %floatcast, %arg2
- %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
+ %max = select nsz <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
ret <8 x float> %max
}
@@ -81,7 +81,7 @@ define <2 x double> @ExeDepsFix_broadcastsd(<2 x double> %arg, <2 x double> %arg
%and = and <2 x i64> %bitcast, <i64 2147483647, i64 2147483647>
%floatcast = bitcast <2 x i64> %and to <2 x double>
%max_is_x = fcmp oge <2 x double> %floatcast, %arg2
- %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
+ %max = select nsz <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
ret <2 x double> %max
}
@@ -96,7 +96,7 @@ define <4 x double> @ExeDepsFix_broadcastsd256(<4 x double> %arg, <4 x double> %
%and = and <4 x i64> %bitcast, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
%floatcast = bitcast <4 x i64> %and to <4 x double>
%max_is_x = fcmp oge <4 x double> %floatcast, %arg2
- %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
+ %max = select nsz <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
ret <4 x double> %max
}
@@ -116,7 +116,7 @@ define <2 x double> @ExeDepsFix_broadcastsd_inreg(<2 x double> %arg, <2 x double
%and = and <2 x i64> %bitcast, %mask
%floatcast = bitcast <2 x i64> %and to <2 x double>
%max_is_x = fcmp oge <2 x double> %floatcast, %arg2
- %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
+ %max = select nsz <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
ret <2 x double> %max
}
@@ -134,6 +134,6 @@ define <4 x double> @ExeDepsFix_broadcastsd256_inreg(<4 x double> %arg, <4 x dou
%and = and <4 x i64> %bitcast, %mask
%floatcast = bitcast <4 x i64> %and to <4 x double>
%max_is_x = fcmp oge <4 x double> %floatcast, %arg2
- %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
+ %max = select nsz <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
ret <4 x double> %max
}
diff --git a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
index dedfe22240201..2bbff1fecfc95 100644
--- a/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
+++ b/llvm/test/CodeGen/X86/fminimum-fmaximum.ll
@@ -100,7 +100,7 @@ define float @test_fmaximum(float %x, float %y) nounwind {
ret float %1
}
-define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" {
; SSE2-LABEL: test_fmaximum_scalarize:
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
@@ -120,7 +120,7 @@ define <4 x float> @test_fmaximum_scalarize(<4 x float> %x, <4 x float> %y) "no-
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
+ %r = call nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %r
}
@@ -387,7 +387,7 @@ define double @test_fmaximum_zero2(double %x, double %y) {
ret double %1
}
-define float @test_fmaximum_nsz(float %x, float %y) "no-signed-zeros-fp-math"="true" nounwind {
+define float @test_fmaximum_nsz(float %x, float %y) nounwind {
; SSE2-LABEL: test_fmaximum_nsz:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm2
@@ -431,7 +431,7 @@ define float @test_fmaximum_nsz(float %x, float %y) "no-signed-zeros-fp-math"="t
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
- %1 = tail call float @llvm.maximum.f32(float %x, float %y)
+ %1 = tail call nsz float @llvm.maximum.f32(float %x, float %y)
ret float %1
}
@@ -603,7 +603,7 @@ define float @test_fminimum(float %x, float %y) nounwind {
ret float %1
}
-define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "no-nans-fp-math"="true" {
; SSE2-LABEL: test_fminimum_scalarize:
; SSE2: # %bb.0:
; SSE2-NEXT: minpd %xmm1, %xmm0
@@ -623,7 +623,7 @@ define <2 x double> @test_fminimum_scalarize(<2 x double> %x, <2 x double> %y) "
; X86: # %bb.0:
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
+ %r = call nsz <2 x double> @llvm.minimum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %r
}
@@ -1068,7 +1068,7 @@ define <2 x double> @test_fminimum_vector(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" {
; SSE2-LABEL: test_fmaximum_vector:
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
@@ -1088,7 +1088,7 @@ define <4 x float> @test_fmaximum_vector(<4 x float> %x, <4 x float> %y) "no-nan
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
+ %r = call nsz <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %r
}
diff --git a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
index 96be1c7fc1f34..3980ffc9a60fd 100644
--- a/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
+++ b/llvm/test/CodeGen/X86/fminimumnum-fmaximumnum.ll
@@ -102,7 +102,7 @@ define float @test_fmaximumnum(float %x, float %y) nounwind {
ret float %1
}
-define <4 x float> @test_fmaximumnum_scalarize(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <4 x float> @test_fmaximumnum_scalarize(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" {
; SSE2-LABEL: test_fmaximumnum_scalarize:
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
@@ -122,7 +122,7 @@ define <4 x float> @test_fmaximumnum_scalarize(<4 x float> %x, <4 x float> %y) "
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
+ %r = call nsz <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %r
}
@@ -349,7 +349,7 @@ define double @test_fmaximumnum_zero2(double %x, double %y) {
ret double %1
}
-define float @test_fmaximumnum_nsz(float %x, float %y) "no-signed-zeros-fp-math"="true" nounwind {
+define float @test_fmaximumnum_nsz(float %x, float %y) nounwind {
; SSE2-LABEL: test_fmaximumnum_nsz:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm1, %xmm2
@@ -394,7 +394,7 @@ define float @test_fmaximumnum_nsz(float %x, float %y) "no-signed-zeros-fp-math"
; X86-NEXT: flds (%esp)
; X86-NEXT: popl %eax
; X86-NEXT: retl
- %1 = tail call float @llvm.maximumnum.f32(float %x, float %y)
+ %1 = tail call nsz float @llvm.maximumnum.f32(float %x, float %y)
ret float %1
}
@@ -549,7 +549,7 @@ define float @test_fminimumnum(float %x, float %y) nounwind {
ret float %1
}
-define <2 x double> @test_fminimumnum_scalarize(<2 x double> %x, <2 x double> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <2 x double> @test_fminimumnum_scalarize(<2 x double> %x, <2 x double> %y) "no-nans-fp-math"="true" {
; SSE2-LABEL: test_fminimumnum_scalarize:
; SSE2: # %bb.0:
; SSE2-NEXT: minpd %xmm1, %xmm0
@@ -569,7 +569,7 @@ define <2 x double> @test_fminimumnum_scalarize(<2 x double> %x, <2 x double> %y
; X86: # %bb.0:
; X86-NEXT: vminpd %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y)
+ %r = call nsz <2 x double> @llvm.minimumnum.v2f64(<2 x double> %x, <2 x double> %y)
ret <2 x double> %r
}
@@ -959,7 +959,7 @@ define <2 x double> @test_fminimumnum_vector(<2 x double> %x, <2 x double> %y) {
ret <2 x double> %r
}
-define <4 x float> @test_fmaximumnum_vector(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" {
+define <4 x float> @test_fmaximumnum_vector(<4 x float> %x, <4 x float> %y) "no-nans-fp-math"="true" {
; SSE2-LABEL: test_fmaximumnum_vector:
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
@@ -979,7 +979,7 @@ define <4 x float> @test_fmaximumnum_vector(<4 x float> %x, <4 x float> %y) "no-
; X86: # %bb.0:
; X86-NEXT: vmaxps %xmm1, %xmm0, %xmm0
; X86-NEXT: retl
- %r = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
+ %r = call nsz <4 x float> @llvm.maximumnum.v4f32(<4 x float> %x, <4 x float> %y)
ret <4 x float> %r
}
diff --git a/llvm/test/CodeGen/X86/machine-combiner.ll b/llvm/test/CodeGen/X86/machine-combiner.ll
index d7e2ebd2cccfd..7aaeaa828bbd9 100644
--- a/llvm/test/CodeGen/X86/machine-combiner.ll
+++ b/llvm/test/CodeGen/X86/machine-combiner.ll
@@ -1,13 +1,13 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-no-nans-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-no-nans-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-no-nans-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
; Incremental updates of the instruction depths should be enough for this test
; case.
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=sse -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=avx -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
-; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -enable-no-signed-zeros-fp-math -mattr=avx512vl -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -mattr=sse -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -mattr=avx -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
+; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -enable-no-nans-fp-math -mattr=avx512vl -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.
@@ -610,9 +610,9 @@ define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%cmp1 = fcmp olt float %x2, %t0
- %sel1 = select i1 %cmp1, float %x2, float %t0
+ %sel1 = select nsz i1 %cmp1, float %x2, float %t0
%cmp2 = fcmp olt float %x3, %sel1
- %sel2 = select i1 %cmp2, float %x3, float %sel1
+ %sel2 = select nsz i1 %cmp2, float %x3, float %sel1
ret float %sel2
}
@@ -634,9 +634,9 @@ define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3
; AVX-NEXT: retq
%t0 = fdiv float %x0, %x1
%cmp1 = fcmp ogt float %x2, %t0
- %sel1 = select i1 %cmp1, float %x2, float %t0
+ %sel1 = select nsz i1 %cmp1, float %x2, float %t0
%cmp2 = fcmp ogt float %x3, %sel1
- %sel2 = select i1 %cmp2, float %x3, float %sel1
+ %sel2 = select nsz i1 %cmp2, float %x3, float %sel1
ret float %sel2
}
@@ -658,9 +658,9 @@ define double @reassociate_mins_double(double %x0, double %x1, double %x2, doubl
; AVX-NEXT: retq
%t0 = fdiv double %x0, %x1
%cmp1 = fcmp olt double %x2, %t0
- %sel1 = select i1 %cmp1, double %x2, double %t0
+ %sel1 = select nsz i1 %cmp1, double %x2, double %t0
%cmp2 = fcmp olt double %x3, %sel1
- %sel2 = select i1 %cmp2, double %x3, double %sel1
+ %sel2 = select nsz i1 %cmp2, double %x3, double %sel1
ret double %sel2
}
@@ -682,9 +682,9 @@ define double @reassociate_maxs_double(double %x0, double %x1, double %x2, doubl
; AVX-NEXT: retq
%t0 = fdiv double %x0, %x1
%cmp1 = fcmp ogt double %x2, %t0
- %sel1 = select i1 %cmp1, double %x2, double %t0
+ %sel1 = select nsz i1 %cmp1, double %x2, double %t0
%cmp2 = fcmp ogt double %x3, %sel1
- %sel2 = select i1 %cmp2, double %x3, double %sel1
+ %sel2 = select nsz i1 %cmp2, double %x3, double %sel1
ret double %sel2
}
@@ -706,9 +706,9 @@ define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4
; AVX-NEXT: retq
%t0 = fadd <4 x float> %x0, %x1
%cmp1 = fcmp olt <4 x float> %x2, %t0
- %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
+ %sel1 = select nsz <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
%cmp2 = fcmp olt <4 x float> %x3, %sel1
- %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
+ %sel2 = select nsz <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
ret <4 x float> %sel2
}
@@ -730,9 +730,9 @@ define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4
; AVX-NEXT: retq
%t0 = fadd <4 x float> %x0, %x1
%cmp1 = fcmp ogt <4 x float> %x2, %t0
- %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
+ %sel1 = select nsz <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
%cmp2 = fcmp ogt <4 x float> %x3, %sel1
- %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
+ %sel2 = select nsz <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
ret <4 x float> %sel2
}
@@ -754,9 +754,9 @@ define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1,
; AVX-NEXT: retq
%t0 = fadd <2 x double> %x0, %x1
%cmp1 = fcmp olt <2 x double> %x2, %t0
- %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
+ %sel1 = select nsz <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
%cmp2 = fcmp olt <2 x double> %x3, %sel1
- %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
+ %sel2 = select nsz <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
ret <2 x double> %sel2
}
@@ -778,9 +778,9 @@ define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1,
; AVX-NEXT: retq
%t0 = fadd <2 x double> %x0, %x1
%cmp1 = fcmp ogt <2 x double> %x2, %t0
- %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
+ %sel1 = select nsz <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
%cmp2 = fcmp ogt <2 x double> %x3, %sel1
- %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
+ %sel2 = select nsz <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
ret <2 x double> %sel2
}
@@ -805,9 +805,9 @@ define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8
; AVX-NEXT: retq
%t0 = fadd <8 x float> %x0, %x1
%cmp1 = fcmp olt <8 x float> %x2, %t0
- %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
+ %sel1 = select nsz <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
%cmp2 = fcmp olt <8 x float> %x3, %sel1
- %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
+ %sel2 = select nsz <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
ret <8 x float> %sel2
}
@@ -832,9 +832,9 @@ define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8
; AVX-NEXT: retq
%t0 = fadd <8 x float> %x0, %x1
%cmp1 = fcmp ogt <8 x float> %x2, %t0
- %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
+ %sel1 = select nsz <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
%cmp2 = fcmp ogt <8 x float> %x3, %sel1
- %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
+ %sel2 = select nsz <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
ret <8 x float> %sel2
}
@@ -859,9 +859,9 @@ define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1,
; AVX-NEXT: retq
%t0 = fadd <4 x double> %x0, %x1
%cmp1 = fcmp olt <4 x double> %x2, %t0
- %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
+ %sel1 = select nsz <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
%cmp2 = fcmp olt <4 x double> %x3, %sel1
- %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
+ %sel2 = select nsz <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
ret <4 x double> %sel2
}
@@ -886,9 +886,9 @@ define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1,
; AVX-NEXT: retq
%t0 = fadd <4 x double> %x0, %x1
%cmp1 = fcmp ogt <4 x double> %x2, %t0
- %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
+ %sel1 = select nsz <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
%cmp2 = fcmp ogt <4 x double> %x3, %sel1
- %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
+ %sel2 = select nsz <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
ret <4 x double> %sel2
}
@@ -929,9 +929,9 @@ define <16 x float> @reassociate_mins_v16f32(<16 x float> %x0, <16 x float> %x1,
; AVX512-NEXT: retq
%t0 = fadd <16 x float> %x0, %x1
%cmp1 = fcmp olt <16 x float> %x2, %t0
- %sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
+ %sel1 = select nsz <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
%cmp2 = fcmp olt <16 x float> %x3, %sel1
- %sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
+ %sel2 = select nsz <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
ret <16 x float> %sel2
}
@@ -972,9 +972,9 @@ define <16 x float> @reassociate_maxs_v16f32(<16 x float> %x0, <16 x float> %x1,
; AVX512-NEXT: retq
%t0 = fadd <16 x float> %x0, %x1
%cmp1 = fcmp ogt <16 x float> %x2, %t0
- %sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
+ %sel1 = select nsz <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
%cmp2 = fcmp ogt <16 x float> %x3, %sel1
- %sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
+ %sel2 = select nsz <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
ret <16 x float> %sel2
}
@@ -1015,9 +1015,9 @@ define <8 x double> @reassociate_mins_v8f64(<8 x double> %x0, <8 x double> %x1,
; AVX512-NEXT: retq
%t0 = fadd <8 x double> %x0, %x1
%cmp1 = fcmp olt <8 x double> %x2, %t0
- %sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
+ %sel1 = select nsz <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
%cmp2 = fcmp olt <8 x double> %x3, %sel1
- %sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
+ %sel2 = select nsz <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
ret <8 x double> %sel2
}
@@ -1058,9 +1058,9 @@ define <8 x double> @reassociate_maxs_v8f64(<8 x double> %x0, <8 x double> %x1,
; AVX512-NEXT: retq
%t0 = fadd <8 x double> %x0, %x1
%cmp1 = fcmp ogt <8 x double> %x2, %t0
- %sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
+ %sel1 = select nsz <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
%cmp2 = fcmp ogt <8 x double> %x3, %sel1
- %sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
+ %sel2 = select nsz <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
ret <8 x double> %sel2
}
diff --git a/llvm/test/CodeGen/X86/sse-minmax-unsafe.ll b/llvm/test/CodeGen/X86/sse-minmax-unsafe.ll
new file mode 100644
index 0000000000000..b807ae6adf9f1
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sse-minmax-unsafe.ll
@@ -0,0 +1,972 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-nans-fp-math | FileCheck %s
+
+; Some of these patterns can be matched as SSE min or max. Some of
+; them can be matched provided that the operands are swapped.
+; Some of them can't be matched at all and require a comparison
+; and a conditional branch.
+
+; The naming convention is {,x_,y_}{o,u}{gt,lt,ge,le}{,_inverse}
+; _x: use 0.0 instead of %y
+; _y: use -0.0 instead of %y
+; _inverse : swap the arms of the select.
+
+define double @ogt(double %x, double %y) {
+; CHECK-LABEL: ogt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ogt double %x, %y
+ %d = select nsz i1 %c, double %x, double %y
+ ret double %d
+}
+
+define double @olt(double %x, double %y) {
+; CHECK-LABEL: olt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp olt double %x, %y
+ %d = select nsz i1 %c, double %x, double %y
+ ret double %d
+}
+
+define double @ogt_inverse(double %x, double %y) {
+; CHECK-LABEL: ogt_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ogt double %x, %y
+ %d = select nsz i1 %c, double %y, double %x
+ ret double %d
+}
+
+define double @olt_inverse(double %x, double %y) {
+; CHECK-LABEL: olt_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp olt double %x, %y
+ %d = select nsz i1 %c, double %y, double %x
+ ret double %d
+}
+
+define double @oge(double %x, double %y) {
+;
+; RELAX-LABEL: oge:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: oge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp oge double %x, %y
+ %d = select nsz i1 %c, double %x, double %y
+ ret double %d
+}
+
+define double @ole(double %x, double %y) {
+;
+; RELAX-LABEL: ole:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ole:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ole double %x, %y
+ %d = select nsz i1 %c, double %x, double %y
+ ret double %d
+}
+
+define double @oge_inverse(double %x, double %y) {
+;
+; RELAX-LABEL: oge_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: oge_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp oge double %x, %y
+ %d = select nsz i1 %c, double %y, double %x
+ ret double %d
+}
+
+define double @ole_inverse(double %x, double %y) {
+;
+; RELAX-LABEL: ole_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ole_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ole double %x, %y
+ %d = select nsz i1 %c, double %y, double %x
+ ret double %d
+}
+
+define double @ogt_x(double %x) {
+; CHECK-LABEL: ogt_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ogt double %x, 0.000000e+00
+ %d = select nsz i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+define double @olt_x(double %x) {
+; CHECK-LABEL: olt_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp olt double %x, 0.000000e+00
+ %d = select nsz i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+define double @ogt_inverse_x(double %x) {
+; CHECK-LABEL: ogt_inverse_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ogt double %x, 0.000000e+00
+ %d = select nsz i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+define double @olt_inverse_x(double %x) {
+; CHECK-LABEL: olt_inverse_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp olt double %x, 0.000000e+00
+ %d = select nsz i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+define double @oge_x(double %x) {
+;
+; RELAX-LABEL: oge_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: oge_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp oge double %x, 0.000000e+00
+ %d = select nsz i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+define double @ole_x(double %x) {
+;
+; RELAX-LABEL: ole_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ole_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ole double %x, 0.000000e+00
+ %d = select nsz i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+define double @oge_inverse_x(double %x) {
+;
+; RELAX-LABEL: oge_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: oge_inverse_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp oge double %x, 0.000000e+00
+ %d = select nsz i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ole_inverse_x(double %x) {
+;
+; RELAX-LABEL: ole_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ole_inverse_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ole double %x, 0.000000e+00
+ %d = select nsz i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ugt(double %x, double %y) {
+;
+; RELAX-LABEL: ugt:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ugt:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ugt double %x, %y
+ %d = select nsz i1 %c, double %x, double %y
+ ret double %d
+}
+
+define double @ult(double %x, double %y) {
+;
+; RELAX-LABEL: ult:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ult:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ult double %x, %y
+ %d = select nsz i1 %c, double %x, double %y
+ ret double %d
+}
+
+define double @ugt_inverse(double %x, double %y) {
+;
+; RELAX-LABEL: ugt_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ugt_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ugt double %x, %y
+ %d = select nsz i1 %c, double %y, double %x
+ ret double %d
+}
+
+define double @ult_inverse(double %x, double %y) {
+;
+; RELAX-LABEL: ult_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ult_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ult double %x, %y
+ %d = select nsz i1 %c, double %y, double %x
+ ret double %d
+}
+
+define double @uge(double %x, double %y) {
+;
+; RELAX-LABEL: uge:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: uge:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp uge double %x, %y
+ %d = select nsz i1 %c, double %x, double %y
+ ret double %d
+}
+
+define double @ule(double %x, double %y) {
+;
+; RELAX-LABEL: ule:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ule:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ule double %x, %y
+ %d = select nsz i1 %c, double %x, double %y
+ ret double %d
+}
+
+define double @uge_inverse(double %x, double %y) {
+;
+; RELAX-LABEL: uge_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: uge_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp uge double %x, %y
+ %d = select nsz i1 %c, double %y, double %x
+ ret double %d
+}
+
+define double @ule_inverse(double %x, double %y) {
+;
+; RELAX-LABEL: ule_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ule_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ule double %x, %y
+ %d = select nsz i1 %c, double %y, double %x
+ ret double %d
+}
+
+define double @ugt_x(double %x) {
+;
+; RELAX-LABEL: ugt_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ugt_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ugt double %x, 0.000000e+00
+ %d = select nsz i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+define double @ult_x(double %x) {
+;
+; RELAX-LABEL: ult_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ult_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ult double %x, 0.000000e+00
+ %d = select nsz i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+define double @ugt_inverse_x(double %x) {
+;
+; RELAX-LABEL: ugt_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ugt_inverse_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ugt double %x, 0.000000e+00
+ %d = select nsz i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ult_inverse_x(double %x) {
+;
+; RELAX-LABEL: ult_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ult_inverse_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ult double %x, 0.000000e+00
+ %d = select nsz i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+define double @uge_x(double %x) {
+;
+; RELAX-LABEL: uge_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: uge_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp uge double %x, 0.000000e+00
+ %d = select nsz i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+define double @ule_x(double %x) {
+;
+; RELAX-LABEL: ule_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ule_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ule double %x, 0.000000e+00
+ %d = select nsz i1 %c, double %x, double 0.000000e+00
+ ret double %d
+}
+
+define double @uge_inverse_x(double %x) {
+;
+; RELAX-LABEL: uge_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: uge_inverse_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: minsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp uge double %x, 0.000000e+00
+ %d = select nsz i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ule_inverse_x(double %x) {
+;
+; RELAX-LABEL: ule_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ule_inverse_x:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xorpd %xmm1, %xmm1
+; CHECK-NEXT: maxsd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ule double %x, 0.000000e+00
+ %d = select nsz i1 %c, double 0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ogt_y(double %x) {
+; CHECK-LABEL: ogt_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ogt double %x, -0.000000e+00
+ %d = select nsz i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+define double @olt_y(double %x) {
+; CHECK-LABEL: olt_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp olt double %x, -0.000000e+00
+ %d = select nsz i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+define double @ogt_inverse_y(double %x) {
+; CHECK-LABEL: ogt_inverse_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ogt double %x, -0.000000e+00
+ %d = select nsz i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+define double @olt_inverse_y(double %x) {
+; CHECK-LABEL: olt_inverse_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp olt double %x, -0.000000e+00
+ %d = select nsz i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+define double @oge_y(double %x) {
+;
+; RELAX-LABEL: oge_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: oge_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp oge double %x, -0.000000e+00
+ %d = select nsz i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+define double @ole_y(double %x) {
+;
+; RELAX-LABEL: ole_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ole_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ole double %x, -0.000000e+00
+ %d = select nsz i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+define double @oge_inverse_y(double %x) {
+;
+; RELAX-LABEL: oge_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: oge_inverse_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp oge double %x, -0.000000e+00
+ %d = select nsz i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ole_inverse_y(double %x) {
+;
+; RELAX-LABEL: ole_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ole_inverse_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ole double %x, -0.000000e+00
+ %d = select nsz i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ugt_y(double %x) {
+;
+; RELAX-LABEL: ugt_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ugt_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ugt double %x, -0.000000e+00
+ %d = select nsz i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+define double @ult_y(double %x) {
+;
+; RELAX-LABEL: ult_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ult_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ult double %x, -0.000000e+00
+ %d = select nsz i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+define double @ugt_inverse_y(double %x) {
+;
+; RELAX-LABEL: ugt_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ugt_inverse_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ugt double %x, -0.000000e+00
+ %d = select nsz i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ult_inverse_y(double %x) {
+;
+; RELAX-LABEL: ult_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ult_inverse_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ult double %x, -0.000000e+00
+ %d = select nsz i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+define double @uge_y(double %x) {
+;
+; RELAX-LABEL: uge_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: uge_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp uge double %x, -0.000000e+00
+ %d = select nsz i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+define double @ule_y(double %x) {
+;
+; RELAX-LABEL: ule_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ule_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ule double %x, -0.000000e+00
+ %d = select nsz i1 %c, double %x, double -0.000000e+00
+ ret double %d
+}
+
+define double @uge_inverse_y(double %x) {
+;
+; RELAX-LABEL: uge_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: uge_inverse_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp uge double %x, -0.000000e+00
+ %d = select nsz i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+define double @ule_inverse_y(double %x) {
+;
+; RELAX-LABEL: ule_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: ule_inverse_y:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %c = fcmp ule double %x, -0.000000e+00
+ %d = select nsz i1 %c, double -0.000000e+00, double %x
+ ret double %d
+}
+
+; Test a few more misc. cases.
+
+define double @clampTo3k_a(double %x) {
+; CHECK-LABEL: clampTo3k_a:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %t0 = fcmp ogt double %x, 3.000000e+03
+ %y = select nsz i1 %t0, double 3.000000e+03, double %x
+ ret double %y
+}
+
+define double @clampTo3k_b(double %x) {
+;
+; RELAX-LABEL: clampTo3k_b:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: clampTo3k_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %t0 = fcmp uge double %x, 3.000000e+03
+ %y = select nsz i1 %t0, double 3.000000e+03, double %x
+ ret double %y
+}
+
+define double @clampTo3k_c(double %x) {
+; CHECK-LABEL: clampTo3k_c:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %t0 = fcmp olt double %x, 3.000000e+03
+ %y = select nsz i1 %t0, double 3.000000e+03, double %x
+ ret double %y
+}
+
+define double @clampTo3k_d(double %x) {
+;
+; RELAX-LABEL: clampTo3k_d:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: clampTo3k_d:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %t0 = fcmp ule double %x, 3.000000e+03
+ %y = select nsz i1 %t0, double 3.000000e+03, double %x
+ ret double %y
+}
+
+define double @clampTo3k_e(double %x) {
+; CHECK-LABEL: clampTo3k_e:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %t0 = fcmp olt double %x, 3.000000e+03
+ %y = select nsz i1 %t0, double 3.000000e+03, double %x
+ ret double %y
+}
+
+define double @clampTo3k_f(double %x) {
+;
+; RELAX-LABEL: clampTo3k_f:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: clampTo3k_f:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %t0 = fcmp ule double %x, 3.000000e+03
+ %y = select nsz i1 %t0, double 3.000000e+03, double %x
+ ret double %y
+}
+
+define double @clampTo3k_g(double %x) {
+; CHECK-LABEL: clampTo3k_g:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %t0 = fcmp ogt double %x, 3.000000e+03
+ %y = select nsz i1 %t0, double 3.000000e+03, double %x
+ ret double %y
+}
+
+define double @clampTo3k_h(double %x) {
+;
+; RELAX-LABEL: clampTo3k_h:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: clampTo3k_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: retq
+ %t0 = fcmp uge double %x, 3.000000e+03
+ %y = select nsz i1 %t0, double 3.000000e+03, double %x
+ ret double %y
+}
+
+define <2 x double> @test_maxpd(<2 x double> %x, <2 x double> %y) {
+;
+; RELAX-LABEL: test_maxpd:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxpd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: test_maxpd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxpd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %max_is_x = fcmp oge <2 x double> %x, %y
+ %max = select nsz <2 x i1> %max_is_x, <2 x double> %x, <2 x double> %y
+ ret <2 x double> %max
+}
+
+define <2 x double> @test_minpd(<2 x double> %x, <2 x double> %y) {
+;
+; RELAX-LABEL: test_minpd:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minpd %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: test_minpd:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minpd %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %min_is_x = fcmp ole <2 x double> %x, %y
+ %min = select nsz <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
+ ret <2 x double> %min
+}
+
+define <4 x float> @test_maxps(<4 x float> %x, <4 x float> %y) {
+;
+; RELAX-LABEL: test_maxps:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxps %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: test_maxps:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %max_is_x = fcmp oge <4 x float> %x, %y
+ %max = select nsz <4 x i1> %max_is_x, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %max
+}
+
+define <4 x float> @test_minps(<4 x float> %x, <4 x float> %y) {
+;
+; RELAX-LABEL: test_minps:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minps %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: test_minps:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %min_is_x = fcmp ole <4 x float> %x, %y
+ %min = select nsz <4 x i1> %min_is_x, <4 x float> %x, <4 x float> %y
+ ret <4 x float> %min
+}
+
+define <2 x float> @test_maxps_illegal_v2f32(<2 x float> %x, <2 x float> %y) {
+;
+; RELAX-LABEL: test_maxps_illegal_v2f32:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxps %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: test_maxps_illegal_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %max_is_x = fcmp oge <2 x float> %x, %y
+ %max = select nsz <2 x i1> %max_is_x, <2 x float> %x, <2 x float> %y
+ ret <2 x float> %max
+}
+
+define <2 x float> @test_minps_illegal_v2f32(<2 x float> %x, <2 x float> %y) {
+;
+; RELAX-LABEL: test_minps_illegal_v2f32:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minps %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: test_minps_illegal_v2f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %min_is_x = fcmp ole <2 x float> %x, %y
+ %min = select nsz <2 x i1> %min_is_x, <2 x float> %x, <2 x float> %y
+ ret <2 x float> %min
+}
+
+define <3 x float> @test_maxps_illegal_v3f32(<3 x float> %x, <3 x float> %y) {
+;
+; RELAX-LABEL: test_maxps_illegal_v3f32:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxps %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: test_maxps_illegal_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: maxps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %max_is_x = fcmp oge <3 x float> %x, %y
+ %max = select nsz <3 x i1> %max_is_x, <3 x float> %x, <3 x float> %y
+ ret <3 x float> %max
+}
+
+define <3 x float> @test_minps_illegal_v3f32(<3 x float> %x, <3 x float> %y) {
+;
+; RELAX-LABEL: test_minps_illegal_v3f32:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minps %xmm1, %xmm0
+; RELAX-NEXT: retq
+; CHECK-LABEL: test_minps_illegal_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: minps %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %min_is_x = fcmp ole <3 x float> %x, %y
+ %min = select nsz <3 x i1> %min_is_x, <3 x float> %x, <3 x float> %y
+ ret <3 x float> %min
+}
+
+; OSS-Fuzz #13838
+; https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=13838
+define float @ossfuzz13838(float %x) {
+; ALL-NEXT: retq
+; CHECK-LABEL: ossfuzz13838:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: movss {{.*#+}} xmm0 = [2.55E+2,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: retq
+bb:
+ %cmp2 = fcmp fast olt float %x, 2.550000e+02
+ %B1 = urem i1 %cmp2, %cmp2
+ %min = select nsz i1 %B1, float %x, float 2.550000e+02
+ %B = frem float %min, 0x47EFFFFFE0000000
+ %cmp1 = fcmp fast olt float %B, 1.000000e+00
+ %r = select nsz i1 %cmp1, float 1.000000e+00, float %min
+ ret float %r
+}
diff --git a/llvm/test/CodeGen/X86/sse-minmax.ll b/llvm/test/CodeGen/X86/sse-minmax.ll
index 7904b21a3b1fa..0baabe347902d 100644
--- a/llvm/test/CodeGen/X86/sse-minmax.ll
+++ b/llvm/test/CodeGen/X86/sse-minmax.ll
@@ -1,7 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=STRICT
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-signed-zeros-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=UNSAFE
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-nans-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=RELAX --check-prefix=FINITE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 | FileCheck %s --check-prefix=ALL --check-prefix=STRICT
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse4.2 -enable-no-nans-fp-math | FileCheck %s --check-prefix=ALL --check-prefix=RELAX
; Some of these patterns can be matched as SSE min or max. Some of
; them can be matched provided that the operands are swapped.
@@ -34,44 +33,22 @@ define double @olt(double %x, double %y) {
}
define double @ogt_inverse(double %x, double %y) {
-; STRICT-LABEL: ogt_inverse:
-; STRICT: # %bb.0:
-; STRICT-NEXT: minsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: ogt_inverse:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ogt_inverse:
-; FINITE: # %bb.0:
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: ogt_inverse:
+; ALL: # %bb.0:
+; ALL-NEXT: minsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%c = fcmp ogt double %x, %y
%d = select i1 %c, double %y, double %x
ret double %d
}
define double @olt_inverse(double %x, double %y) {
-; STRICT-LABEL: olt_inverse:
-; STRICT: # %bb.0:
-; STRICT-NEXT: maxsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: olt_inverse:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: olt_inverse:
-; FINITE: # %bb.0:
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: olt_inverse:
+; ALL: # %bb.0:
+; ALL-NEXT: maxsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%c = fcmp olt double %x, %y
%d = select i1 %c, double %y, double %x
ret double %d
@@ -124,16 +101,11 @@ define double @oge_inverse(double %x, double %y) {
; STRICT-NEXT: movapd %xmm2, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: oge_inverse:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: oge_inverse:
-; FINITE: # %bb.0:
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: oge_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp oge double %x, %y
%d = select i1 %c, double %y, double %x
ret double %d
@@ -148,16 +120,11 @@ define double @ole_inverse(double %x, double %y) {
; STRICT-NEXT: movapd %xmm2, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ole_inverse:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ole_inverse:
-; FINITE: # %bb.0:
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ole_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ole double %x, %y
%d = select i1 %c, double %y, double %x
ret double %d
@@ -186,50 +153,24 @@ define double @olt_x(double %x) {
}
define double @ogt_inverse_x(double %x) {
-; STRICT-LABEL: ogt_inverse_x:
-; STRICT: # %bb.0:
-; STRICT-NEXT: xorpd %xmm1, %xmm1
-; STRICT-NEXT: minsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: ogt_inverse_x:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: xorpd %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ogt_inverse_x:
-; FINITE: # %bb.0:
-; FINITE-NEXT: xorpd %xmm1, %xmm1
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: ogt_inverse_x:
+; ALL: # %bb.0:
+; ALL-NEXT: xorpd %xmm1, %xmm1
+; ALL-NEXT: minsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%c = fcmp ogt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
}
define double @olt_inverse_x(double %x) {
-; STRICT-LABEL: olt_inverse_x:
-; STRICT: # %bb.0:
-; STRICT-NEXT: xorpd %xmm1, %xmm1
-; STRICT-NEXT: maxsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: olt_inverse_x:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: xorpd %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: olt_inverse_x:
-; FINITE: # %bb.0:
-; FINITE-NEXT: xorpd %xmm1, %xmm1
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: olt_inverse_x:
+; ALL: # %bb.0:
+; ALL-NEXT: xorpd %xmm1, %xmm1
+; ALL-NEXT: maxsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%c = fcmp olt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
@@ -281,18 +222,12 @@ define double @oge_inverse_x(double %x) {
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: oge_inverse_x:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: xorpd %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: oge_inverse_x:
-; FINITE: # %bb.0:
-; FINITE-NEXT: xorpd %xmm1, %xmm1
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: oge_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp oge double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
@@ -308,18 +243,12 @@ define double @ole_inverse_x(double %x) {
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ole_inverse_x:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: xorpd %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ole_inverse_x:
-; FINITE: # %bb.0:
-; FINITE-NEXT: xorpd %xmm1, %xmm1
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ole_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ole double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
@@ -371,16 +300,11 @@ define double @ugt_inverse(double %x, double %y) {
; STRICT-NEXT: movapd %xmm2, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ugt_inverse:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ugt_inverse:
-; FINITE: # %bb.0:
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ugt_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ugt double %x, %y
%d = select i1 %c, double %y, double %x
ret double %d
@@ -396,16 +320,11 @@ define double @ult_inverse(double %x, double %y) {
; STRICT-NEXT: movapd %xmm2, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ult_inverse:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ult_inverse:
-; FINITE: # %bb.0:
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ult_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ult double %x, %y
%d = select i1 %c, double %y, double %x
ret double %d
@@ -449,16 +368,11 @@ define double @uge_inverse(double %x, double %y) {
; STRICT-NEXT: minsd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: uge_inverse:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: uge_inverse:
-; FINITE: # %bb.0:
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: uge_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp uge double %x, %y
%d = select i1 %c, double %y, double %x
ret double %d
@@ -470,16 +384,11 @@ define double @ule_inverse(double %x, double %y) {
; STRICT-NEXT: maxsd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ule_inverse:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ule_inverse:
-; FINITE: # %bb.0:
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ule_inverse:
+; RELAX: # %bb.0:
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ule double %x, %y
%d = select i1 %c, double %y, double %x
ret double %d
@@ -532,18 +441,12 @@ define double @ugt_inverse_x(double %x) {
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ugt_inverse_x:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: xorpd %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ugt_inverse_x:
-; FINITE: # %bb.0:
-; FINITE-NEXT: xorpd %xmm1, %xmm1
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ugt_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ugt double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
@@ -558,18 +461,12 @@ define double @ult_inverse_x(double %x) {
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ult_inverse_x:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: xorpd %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ult_inverse_x:
-; FINITE: # %bb.0:
-; FINITE-NEXT: xorpd %xmm1, %xmm1
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ult_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ult double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
@@ -618,18 +515,12 @@ define double @uge_inverse_x(double %x) {
; STRICT-NEXT: minsd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: uge_inverse_x:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: xorpd %xmm1, %xmm1
-; UNSAFE-NEXT: minsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: uge_inverse_x:
-; FINITE: # %bb.0:
-; FINITE-NEXT: xorpd %xmm1, %xmm1
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: uge_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp uge double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
@@ -642,18 +533,12 @@ define double @ule_inverse_x(double %x) {
; STRICT-NEXT: maxsd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ule_inverse_x:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: xorpd %xmm1, %xmm1
-; UNSAFE-NEXT: maxsd %xmm1, %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ule_inverse_x:
-; FINITE: # %bb.0:
-; FINITE-NEXT: xorpd %xmm1, %xmm1
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ule_inverse_x:
+; RELAX: # %bb.0:
+; RELAX-NEXT: xorpd %xmm1, %xmm1
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ule double %x, 0.000000e+00
%d = select i1 %c, double 0.000000e+00, double %x
ret double %d
@@ -680,48 +565,24 @@ define double @olt_y(double %x) {
}
define double @ogt_inverse_y(double %x) {
-; STRICT-LABEL: ogt_inverse_y:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; STRICT-NEXT: minsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: ogt_inverse_y:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ogt_inverse_y:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: ogt_inverse_y:
+; ALL: # %bb.0:
+; ALL-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; ALL-NEXT: minsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%c = fcmp ogt double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
}
define double @olt_inverse_y(double %x) {
-; STRICT-LABEL: olt_inverse_y:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; STRICT-NEXT: maxsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: olt_inverse_y:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: olt_inverse_y:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: olt_inverse_y:
+; ALL: # %bb.0:
+; ALL-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; ALL-NEXT: maxsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%c = fcmp olt double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
@@ -776,17 +637,12 @@ define double @oge_inverse_y(double %x) {
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: oge_inverse_y:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: oge_inverse_y:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: oge_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp oge double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
@@ -801,17 +657,12 @@ define double @ole_inverse_y(double %x) {
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ole_inverse_y:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ole_inverse_y:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ole_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ole double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
@@ -865,17 +716,12 @@ define double @ugt_inverse_y(double %x) {
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ugt_inverse_y:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ugt_inverse_y:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ugt_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ugt double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
@@ -891,17 +737,12 @@ define double @ult_inverse_y(double %x) {
; STRICT-NEXT: movapd %xmm1, %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ult_inverse_y:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ult_inverse_y:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ult_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ult double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
@@ -947,17 +788,12 @@ define double @uge_inverse_y(double %x) {
; STRICT-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: uge_inverse_y:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: uge_inverse_y:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: uge_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp uge double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
@@ -969,17 +805,12 @@ define double @ule_inverse_y(double %x) {
; STRICT-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: ule_inverse_y:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: ule_inverse_y:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: ule_inverse_y:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [-0.0E+0,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%c = fcmp ule double %x, -0.000000e+00
%d = select i1 %c, double -0.000000e+00, double %x
ret double %d
@@ -988,24 +819,12 @@ define double @ule_inverse_y(double %x) {
; Test a few more misc. cases.
define double @clampTo3k_a(double %x) {
-; STRICT-LABEL: clampTo3k_a:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; STRICT-NEXT: minsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: clampTo3k_a:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: clampTo3k_a:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: clampTo3k_a:
+; ALL: # %bb.0:
+; ALL-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; ALL-NEXT: minsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%t0 = fcmp ogt double %x, 3.000000e+03
%y = select i1 %t0, double 3.000000e+03, double %x
ret double %y
@@ -1017,41 +836,24 @@ define double @clampTo3k_b(double %x) {
; STRICT-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: clampTo3k_b:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: clampTo3k_b:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: clampTo3k_b:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%t0 = fcmp uge double %x, 3.000000e+03
%y = select i1 %t0, double 3.000000e+03, double %x
ret double %y
}
define double @clampTo3k_c(double %x) {
-; STRICT-LABEL: clampTo3k_c:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; STRICT-NEXT: maxsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: clampTo3k_c:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: clampTo3k_c:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: clampTo3k_c:
+; ALL: # %bb.0:
+; ALL-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; ALL-NEXT: maxsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%t0 = fcmp olt double %x, 3.000000e+03
%y = select i1 %t0, double 3.000000e+03, double %x
ret double %y
@@ -1063,41 +865,24 @@ define double @clampTo3k_d(double %x) {
; STRICT-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: clampTo3k_d:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: clampTo3k_d:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: clampTo3k_d:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%t0 = fcmp ule double %x, 3.000000e+03
%y = select i1 %t0, double 3.000000e+03, double %x
ret double %y
}
define double @clampTo3k_e(double %x) {
-; STRICT-LABEL: clampTo3k_e:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; STRICT-NEXT: maxsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: clampTo3k_e:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: clampTo3k_e:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: clampTo3k_e:
+; ALL: # %bb.0:
+; ALL-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; ALL-NEXT: maxsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%t0 = fcmp olt double %x, 3.000000e+03
%y = select i1 %t0, double 3.000000e+03, double %x
ret double %y
@@ -1109,41 +894,24 @@ define double @clampTo3k_f(double %x) {
; STRICT-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: clampTo3k_f:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: maxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: clampTo3k_f:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; FINITE-NEXT: maxsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: clampTo3k_f:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; RELAX-NEXT: maxsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%t0 = fcmp ule double %x, 3.000000e+03
%y = select i1 %t0, double 3.000000e+03, double %x
ret double %y
}
define double @clampTo3k_g(double %x) {
-; STRICT-LABEL: clampTo3k_g:
-; STRICT: # %bb.0:
-; STRICT-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; STRICT-NEXT: minsd %xmm0, %xmm1
-; STRICT-NEXT: movapd %xmm1, %xmm0
-; STRICT-NEXT: retq
-;
-; UNSAFE-LABEL: clampTo3k_g:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: clampTo3k_g:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; ALL-LABEL: clampTo3k_g:
+; ALL: # %bb.0:
+; ALL-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; ALL-NEXT: minsd %xmm0, %xmm1
+; ALL-NEXT: movapd %xmm1, %xmm0
+; ALL-NEXT: retq
%t0 = fcmp ogt double %x, 3.000000e+03
%y = select i1 %t0, double 3.000000e+03, double %x
ret double %y
@@ -1155,17 +923,12 @@ define double @clampTo3k_h(double %x) {
; STRICT-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; STRICT-NEXT: retq
;
-; UNSAFE-LABEL: clampTo3k_h:
-; UNSAFE: # %bb.0:
-; UNSAFE-NEXT: minsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; UNSAFE-NEXT: retq
-;
-; FINITE-LABEL: clampTo3k_h:
-; FINITE: # %bb.0:
-; FINITE-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
-; FINITE-NEXT: minsd %xmm0, %xmm1
-; FINITE-NEXT: movapd %xmm1, %xmm0
-; FINITE-NEXT: retq
+; RELAX-LABEL: clampTo3k_h:
+; RELAX: # %bb.0:
+; RELAX-NEXT: movsd {{.*#+}} xmm1 = [3.0E+3,0.0E+0]
+; RELAX-NEXT: minsd %xmm0, %xmm1
+; RELAX-NEXT: movapd %xmm1, %xmm0
+; RELAX-NEXT: retq
%t0 = fcmp uge double %x, 3.000000e+03
%y = select i1 %t0, double 3.000000e+03, double %x
ret double %y
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
index 43743d54c4ccc..b90b011a2286f 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512.ll
@@ -671,7 +671,7 @@ define <8 x double> @stack_fold_maxpd_zmm_commutable_k_commuted(<8 x double> %a0
; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
+ %2 = call nsz <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
%3 = bitcast i8 %mask to <8 x i1>
%4 = load <8 x double>, ptr %passthru
%5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
@@ -689,7 +689,7 @@ define <8 x double> @stack_fold_maxpd_zmm_commutable_kz(<8 x double> %a0, <8 x d
; CHECK-NEXT: vmaxpd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
+ %2 = call nsz <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
%3 = bitcast i8 %mask to <8 x i1>
%4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
ret <8 x double> %4
@@ -757,7 +757,7 @@ define <16 x float> @stack_fold_maxps_zmm_commutable_k_commuted(<16 x float> %a0
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
+ %2 = call nsz <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
%3 = bitcast i16 %mask to <16 x i1>
%4 = load <16 x float>, ptr %passthru
%5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
@@ -775,7 +775,7 @@ define <16 x float> @stack_fold_maxps_zmm_commutable_kz(<16 x float> %a0, <16 x
; CHECK-NEXT: vmaxps {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
+ %2 = call nsz <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
%3 = bitcast i16 %mask to <16 x i1>
%4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
ret <16 x float> %4
@@ -843,7 +843,7 @@ define <8 x double> @stack_fold_minpd_zmm_commutable_k_commuted(<8 x double> %a0
; CHECK-NEXT: vmovapd %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
+ %2 = call nsz <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
%3 = bitcast i8 %mask to <8 x i1>
%4 = load <8 x double>, ptr %passthru
%5 = select <8 x i1> %3, <8 x double> %2, <8 x double> %4
@@ -861,7 +861,7 @@ define <8 x double> @stack_fold_minpd_zmm_commutable_kz(<8 x double> %a0, <8 x d
; CHECK-NEXT: vminpd {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
+ %2 = call nsz <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a1, <8 x double> %a0, i32 4)
%3 = bitcast i8 %mask to <8 x i1>
%4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer
ret <8 x double> %4
@@ -929,7 +929,7 @@ define <16 x float> @stack_fold_minps_zmm_commutable_k_commuted(<16 x float> %a0
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
+ %2 = call nsz <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
%3 = bitcast i16 %mask to <16 x i1>
%4 = load <16 x float>, ptr %passthru
%5 = select <16 x i1> %3, <16 x float> %2, <16 x float> %4
@@ -947,7 +947,7 @@ define <16 x float> @stack_fold_minps_zmm_commutable_kz(<16 x float> %a0, <16 x
; CHECK-NEXT: vminps {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
+ %2 = call nsz <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a1, <16 x float> %a0, i32 4)
%3 = bitcast i16 %mask to <16 x i1>
%4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer
ret <16 x float> %4
@@ -2058,4 +2058,4 @@ define <16 x float> @stack_fold_permilpsvar_zmm_maskz(<16 x float> %a0, <16 x i3
ret <16 x float> %4
}
-attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
+attributes #1 = { "no-nans-fp-math"="true" }
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
index b715df8f71eee..9db8c7562cb0c 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-avx512fp16.ll
@@ -632,7 +632,7 @@ define <32 x half> @stack_fold_maxph_zmm_commutable_commuted(<32 x half> %a0, <3
; CHECK-NEXT: vmaxph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
+ %2 = call nsz <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
ret <32 x half> %2
}
@@ -669,7 +669,7 @@ define <32 x half> @stack_fold_maxph_zmm_commutable_k_commuted(<32 x half> %a0,
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
+ %2 = call nsz <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
%3 = bitcast i32 %mask to <32 x i1>
%4 = load <32 x half>, ptr %passthru
%5 = select <32 x i1> %3, <32 x half> %2, <32 x half> %4
@@ -704,7 +704,7 @@ define <32 x half> @stack_fold_maxph_zmm_commutable_kz_commuted(<32 x half> %a0,
; CHECK-NEXT: vmaxph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
+ %2 = call nsz <32 x half> @llvm.x86.avx512fp16.max.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
%3 = bitcast i32 %mask to <32 x i1>
%4 = select <32 x i1> %3, <32 x half> %2, <32 x half> zeroinitializer
ret <32 x half> %4
@@ -768,7 +768,7 @@ define half @stack_fold_maxsh_commutable_commuted(half %a0, half %a1) #1 {
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp ogt half %a1, %a0
- %3 = select i1 %2, half %a1, half %a0
+ %3 = select nsz i1 %2, half %a1, half %a0
ret half %3
}
@@ -950,7 +950,7 @@ define <32 x half> @stack_fold_minph_zmm_commutable_commuted(<32 x half> %a0, <3
; CHECK-NEXT: vminph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
+ %2 = call nsz <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
ret <32 x half> %2
}
@@ -987,7 +987,7 @@ define <32 x half> @stack_fold_minph_zmm_commutable_k_commuted(<32 x half> %a0,
; CHECK-NEXT: vmovaps %zmm2, %zmm0
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
+ %2 = call nsz <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
%3 = bitcast i32 %mask to <32 x i1>
%4 = load <32 x half>, ptr %passthru
%5 = select <32 x i1> %3, <32 x half> %2, <32 x half> %4
@@ -1022,7 +1022,7 @@ define <32 x half> @stack_fold_minph_zmm_commutable_kz_commuted(<32 x half> %a0,
; CHECK-NEXT: vminph {{[-0-9]+}}(%r{{[sb]}}p), %zmm0, %zmm0 {%k1} {z} # 64-byte Folded Reload
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
- %2 = call <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
+ %2 = call nsz <32 x half> @llvm.x86.avx512fp16.min.ph.512(<32 x half> %a1, <32 x half> %a0, i32 4)
%3 = bitcast i32 %mask to <32 x i1>
%4 = select <32 x i1> %3, <32 x half> %2, <32 x half> zeroinitializer
ret <32 x half> %4
@@ -1086,7 +1086,7 @@ define half @stack_fold_minsh_commutable_commuted(half %a0, half %a1) #1 {
; CHECK-NEXT: retq
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{xmm16},~{xmm17},~{xmm18},~{xmm19},~{xmm20},~{xmm21},~{xmm22},~{xmm23},~{xmm24},~{xmm25},~{xmm26},~{xmm27},~{xmm28},~{xmm29},~{xmm30},~{xmm31},~{flags}"()
%2 = fcmp olt half %a1, %a0
- %3 = select i1 %2, half %a1, half %a0
+ %3 = select nsz i1 %2, half %a1, half %a0
ret half %3
}
@@ -2316,4 +2316,4 @@ define <4 x float> @stack_fold_fcmaddcsh_maskz(<4 x float> %a0, <4 x float> %a1,
}
declare <4 x float> @llvm.x86.avx512fp16.maskz.vfcmadd.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-attributes #1 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" }
+attributes #1 = { "no-nans-fp-math"="true" }
More information about the llvm-commits
mailing list