[llvm] [SDAG[[X86] Add method to scalarize `STRICT_FSETCC` (PR #154486)

Abhishek Kaushik via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 20 00:34:20 PDT 2025


https://github.com/abhishek-kaushik22 updated https://github.com/llvm/llvm-project/pull/154486

>From e76f06213bca8fdba796f829eaf337c36a63f121 Mon Sep 17 00:00:00 2001
From: Abhishek Kaushik <abhishek.kaushik at intel.com>
Date: Wed, 20 Aug 2025 13:02:31 +0530
Subject: [PATCH] [SDAG[[X86] Add method to scalarize `STRICT_FSETCC`

Fixes #154485
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |    1 +
 .../SelectionDAG/LegalizeVectorTypes.cpp      |   41 +
 llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll  | 1298 +++++++++++++++++
 3 files changed, 1340 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 33fa3012618b3..65fd863e55ac9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -909,6 +909,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
   SDValue ScalarizeVecOp_VSELECT(SDNode *N);
   SDValue ScalarizeVecOp_VSETCC(SDNode *N);
+  SDValue ScalarizeVecOp_VSTRICT_FSETCC(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
   SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
   SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bc2dbfb4cbaae..a0fd855832a33 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -789,6 +789,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
   case ISD::SETCC:
     Res = ScalarizeVecOp_VSETCC(N);
     break;
+  case ISD::STRICT_FSETCC:
+  case ISD::STRICT_FSETCCS:
+    Res = ScalarizeVecOp_VSTRICT_FSETCC(N, OpNo);
+    break;
   case ISD::STORE:
     Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
     break;
@@ -985,6 +989,43 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_VSETCC(SDNode *N) {
   return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
 }
 
+// Similiar to ScalarizeVecOp_VSETCC, with added logic to update chains.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VSTRICT_FSETCC(SDNode *N,
+                                                        unsigned OpNo) {
+  assert(OpNo == 1 && "Wrong operand for scalarization!");
+  assert(N->getValueType(0).isVector() &&
+         N->getOperand(1).getValueType().isVector() &&
+         "Operand types must be vectors");
+  assert(N->getValueType(0) == MVT::v1i1 && "Expected v1i1 type");
+
+  EVT VT = N->getValueType(0);
+  SDValue Ch = N->getOperand(0);
+  SDValue LHS = GetScalarizedVector(N->getOperand(1));
+  SDValue RHS = GetScalarizedVector(N->getOperand(2));
+  SDValue CC = N->getOperand(3);
+
+  EVT OpVT = N->getOperand(1).getValueType();
+  EVT NVT = VT.getVectorElementType();
+  SDLoc DL(N);
+  SDValue Res = DAG.getNode(N->getOpcode(), DL, {MVT::i1, MVT::Other},
+                            {Ch, LHS, RHS, CC});
+
+  // Legalize the chain result - switch anything that used the old chain to
+  // use the new one.
+  ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+
+  ISD::NodeType ExtendCode =
+      TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+
+  Res = DAG.getNode(ExtendCode, DL, NVT, Res);
+  Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Res);
+
+  // Do our own replacement and return SDValue() to tell the caller that we
+  // handled all replacements since caller can only handle a single result.
+  ReplaceValueWith(SDValue(N, 0), Res);
+  return SDValue();
+}
+
 /// If the value to store is a vector that needs to be scalarized, it must be
 /// <1 x ty>. Just store the element.
 SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
diff --git a/llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll b/llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll
new file mode 100644
index 0000000000000..f34890c053517
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fp80-strict-vec-cmp.ll
@@ -0,0 +1,1298 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
+
+define <4 x i1> @test_oeq_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_oeq_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    andb %cl, %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    sete %dl
+; CHECK-NEXT:    andb %cl, %dl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    sete %sil
+; CHECK-NEXT:    andb %cl, %sil
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    kmovw %ecx, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"oeq", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ogt_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ogt_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ogt", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_oge_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_oge_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"oge", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_olt_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_olt_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"olt", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ole_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ole_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ole", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_one_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_one_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setne %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"one", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ord_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ord_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ord", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ueq_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ueq_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ord", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ugt_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ugt_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ugt", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_uge_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_uge_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"uge", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ult_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ult_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ult", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ule_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ule_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ule", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_une_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_une_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %cl
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    orb %cl, %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %cl
+; CHECK-NEXT:    setne %dl
+; CHECK-NEXT:    orb %cl, %dl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    setp %cl
+; CHECK-NEXT:    setne %sil
+; CHECK-NEXT:    orb %cl, %sil
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    kmovw %ecx, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"une", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_uno_q(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_uno_q:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %cl
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fucompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"uno", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_oeq_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_oeq_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    andb %cl, %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    sete %dl
+; CHECK-NEXT:    andb %cl, %dl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    sete %sil
+; CHECK-NEXT:    andb %cl, %sil
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    sete %cl
+; CHECK-NEXT:    andb %al, %cl
+; CHECK-NEXT:    kmovw %ecx, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"oeq", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ogt_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ogt_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ogt", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_oge_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_oge_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"oge", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_olt_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_olt_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    seta %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    seta %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"olt", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ole_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ole_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setae %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setae %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ole", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_one_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_one_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setne %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"one", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ord_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ord_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ord", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ueq_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ueq_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setnp %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setnp %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ord", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ugt_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ugt_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ugt", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_uge_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_uge_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"uge", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ult_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ult_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setb %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setb %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ult", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_ule_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_ule_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setbe %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setbe %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"ule", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_une_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_une_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %cl
+; CHECK-NEXT:    setne %al
+; CHECK-NEXT:    orb %cl, %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %cl
+; CHECK-NEXT:    setne %dl
+; CHECK-NEXT:    orb %cl, %dl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    setp %cl
+; CHECK-NEXT:    setne %sil
+; CHECK-NEXT:    orb %cl, %sil
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    andl $1, %esi
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %esi, %k1
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    setne %cl
+; CHECK-NEXT:    orb %al, %cl
+; CHECK-NEXT:    kmovw %ecx, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"une", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+define <4 x i1> @test_uno_s(<4 x x86_fp80> %a, <4 x x86_fp80> %b) {
+; CHECK-LABEL: test_uno_s:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %cl
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    fldt {{[0-9]+}}(%rsp)
+; CHECK-NEXT:    setp %dl
+; CHECK-NEXT:    andl $1, %edx
+; CHECK-NEXT:    fcompi %st(1), %st
+; CHECK-NEXT:    fstp %st(0)
+; CHECK-NEXT:    kmovw %eax, %k0
+; CHECK-NEXT:    kmovw %ecx, %k1
+; CHECK-NEXT:    kshiftlw $1, %k1, %k1
+; CHECK-NEXT:    korw %k1, %k0, %k0
+; CHECK-NEXT:    kshiftlw $2, %k0, %k0
+; CHECK-NEXT:    kmovw %edx, %k1
+; CHECK-NEXT:    setp %al
+; CHECK-NEXT:    kmovw %eax, %k2
+; CHECK-NEXT:    kshiftlw $1, %k2, %k2
+; CHECK-NEXT:    korw %k2, %k1, %k1
+; CHECK-NEXT:    kshiftlw $14, %k1, %k1
+; CHECK-NEXT:    kshiftrw $14, %k1, %k1
+; CHECK-NEXT:    korw %k0, %k1, %k1
+; CHECK-NEXT:    vpternlogd {{.*#+}} zmm0 {%k1} {z} = -1
+; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %cond = tail call <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80> %a, <4 x x86_fp80> %b, metadata !"uno", metadata !"fpexcept.strict")
+  ret <4 x i1> %cond
+}
+
+declare <4 x i1> @llvm.experimental.constrained.fcmp.v4f80(<4 x x86_fp80>, <4 x x86_fp80>, metadata, metadata)
+declare <4 x i1> @llvm.experimental.constrained.fcmps.v4f80(<4 x x86_fp80>, <4 x x86_fp80>, metadata, metadata)
+



More information about the llvm-commits mailing list