[clang] a10cec0 - [X86] Improve X86 cmpps/cmppd/cmpss/cmpsd intrinsics with strictfp

Craig Topper via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 29 15:52:51 PST 2020


Author: Craig Topper
Date: 2020-01-29T15:52:11-08:00
New Revision: a10cec02f79082a1da571e44e68800025b7e6554

URL: https://github.com/llvm/llvm-project/commit/a10cec02f79082a1da571e44e68800025b7e6554
DIFF: https://github.com/llvm/llvm-project/commit/a10cec02f79082a1da571e44e68800025b7e6554.diff

LOG: [X86] Improve X86 cmpps/cmppd/cmpss/cmpsd intrinsics with strictfp

The constrained fcmp intrinsics don't allow the TRUE/FALSE predicates.
Using them will assert. To workaround this I'm emitting the old X86 specific intrinsics that were never removed from the backend when we switched to using fcmp in IR. We have no way to mark them as being strict, but that's true of all target specific intrinsics so doesn't seem like we need to solve that here.

I've also added support for selecting between signaling and quiet.

Still need to support SAE which will require using a target specific
intrinsic. Also need to fix masking to not use an AND instruction
after the compare.

Differential Revision: https://reviews.llvm.org/D72906

Added: 
    clang/test/CodeGen/avx-builtins-constrained-cmp.c
    clang/test/CodeGen/avx512f-builtins-constrained-cmp.c
    clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c
    clang/test/CodeGen/sse-builtins-constrained-cmp.c
    clang/test/CodeGen/sse2-builtins-constrained-cmp.c

Modified: 
    clang/lib/CodeGen/CGBuiltin.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index d6640e5d1bdc..e77aa791ca7a 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10434,8 +10434,13 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   // TODO: The builtins could be removed if the SSE header files used vector
   // extension comparisons directly (vector ordered/unordered may need
   // additional support via __builtin_isnan()).
-  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
-    Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred,
+                                      bool IsSignaling) {
+    Value *Cmp;
+    if (IsSignaling)
+      Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
+    else
+      Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
     llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
     llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
     Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
@@ -12238,28 +12243,28 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
   // packed comparison intrinsics
   case X86::BI__builtin_ia32_cmpeqps:
   case X86::BI__builtin_ia32_cmpeqpd:
-    return getVectorFCmpIR(CmpInst::FCMP_OEQ);
+    return getVectorFCmpIR(CmpInst::FCMP_OEQ, /*IsSignaling*/false);
   case X86::BI__builtin_ia32_cmpltps:
   case X86::BI__builtin_ia32_cmpltpd:
-    return getVectorFCmpIR(CmpInst::FCMP_OLT);
+    return getVectorFCmpIR(CmpInst::FCMP_OLT, /*IsSignaling*/true);
   case X86::BI__builtin_ia32_cmpleps:
   case X86::BI__builtin_ia32_cmplepd:
-    return getVectorFCmpIR(CmpInst::FCMP_OLE);
+    return getVectorFCmpIR(CmpInst::FCMP_OLE, /*IsSignaling*/true);
   case X86::BI__builtin_ia32_cmpunordps:
   case X86::BI__builtin_ia32_cmpunordpd:
-    return getVectorFCmpIR(CmpInst::FCMP_UNO);
+    return getVectorFCmpIR(CmpInst::FCMP_UNO, /*IsSignaling*/false);
   case X86::BI__builtin_ia32_cmpneqps:
   case X86::BI__builtin_ia32_cmpneqpd:
-    return getVectorFCmpIR(CmpInst::FCMP_UNE);
+    return getVectorFCmpIR(CmpInst::FCMP_UNE, /*IsSignaling*/false);
   case X86::BI__builtin_ia32_cmpnltps:
   case X86::BI__builtin_ia32_cmpnltpd:
-    return getVectorFCmpIR(CmpInst::FCMP_UGE);
+    return getVectorFCmpIR(CmpInst::FCMP_UGE, /*IsSignaling*/true);
   case X86::BI__builtin_ia32_cmpnleps:
   case X86::BI__builtin_ia32_cmpnlepd:
-    return getVectorFCmpIR(CmpInst::FCMP_UGT);
+    return getVectorFCmpIR(CmpInst::FCMP_UGT, /*IsSignaling*/true);
   case X86::BI__builtin_ia32_cmpordps:
   case X86::BI__builtin_ia32_cmpordpd:
-    return getVectorFCmpIR(CmpInst::FCMP_ORD);
+    return getVectorFCmpIR(CmpInst::FCMP_ORD, /*IsSignaling*/false);
   case X86::BI__builtin_ia32_cmpps:
   case X86::BI__builtin_ia32_cmpps256:
   case X86::BI__builtin_ia32_cmppd:
@@ -12284,42 +12289,87 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     // Ignoring requested signaling behaviour,
     // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
     FCmpInst::Predicate Pred;
-    switch (CC) {
-    case 0x00: Pred = FCmpInst::FCMP_OEQ;   break;
-    case 0x01: Pred = FCmpInst::FCMP_OLT;   break;
-    case 0x02: Pred = FCmpInst::FCMP_OLE;   break;
-    case 0x03: Pred = FCmpInst::FCMP_UNO;   break;
-    case 0x04: Pred = FCmpInst::FCMP_UNE;   break;
-    case 0x05: Pred = FCmpInst::FCMP_UGE;   break;
-    case 0x06: Pred = FCmpInst::FCMP_UGT;   break;
-    case 0x07: Pred = FCmpInst::FCMP_ORD;   break;
-    case 0x08: Pred = FCmpInst::FCMP_UEQ;   break;
-    case 0x09: Pred = FCmpInst::FCMP_ULT;   break;
-    case 0x0a: Pred = FCmpInst::FCMP_ULE;   break;
-    case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
-    case 0x0c: Pred = FCmpInst::FCMP_ONE;   break;
-    case 0x0d: Pred = FCmpInst::FCMP_OGE;   break;
-    case 0x0e: Pred = FCmpInst::FCMP_OGT;   break;
-    case 0x0f: Pred = FCmpInst::FCMP_TRUE;  break;
-    case 0x10: Pred = FCmpInst::FCMP_OEQ;   break;
-    case 0x11: Pred = FCmpInst::FCMP_OLT;   break;
-    case 0x12: Pred = FCmpInst::FCMP_OLE;   break;
-    case 0x13: Pred = FCmpInst::FCMP_UNO;   break;
-    case 0x14: Pred = FCmpInst::FCMP_UNE;   break;
-    case 0x15: Pred = FCmpInst::FCMP_UGE;   break;
-    case 0x16: Pred = FCmpInst::FCMP_UGT;   break;
-    case 0x17: Pred = FCmpInst::FCMP_ORD;   break;
-    case 0x18: Pred = FCmpInst::FCMP_UEQ;   break;
-    case 0x19: Pred = FCmpInst::FCMP_ULT;   break;
-    case 0x1a: Pred = FCmpInst::FCMP_ULE;   break;
-    case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
-    case 0x1c: Pred = FCmpInst::FCMP_ONE;   break;
-    case 0x1d: Pred = FCmpInst::FCMP_OGE;   break;
-    case 0x1e: Pred = FCmpInst::FCMP_OGT;   break;
-    case 0x1f: Pred = FCmpInst::FCMP_TRUE;  break;
+    bool IsSignaling;
+    // Predicates for 16-31 repeat the 0-15 predicates. Only the signalling
+    // behavior is inverted. We'll handle that after the switch.
+    switch (CC & 0xf) {
+    case 0x00: Pred = FCmpInst::FCMP_OEQ;   IsSignaling = false; break;
+    case 0x01: Pred = FCmpInst::FCMP_OLT;   IsSignaling = true;  break;
+    case 0x02: Pred = FCmpInst::FCMP_OLE;   IsSignaling = true;  break;
+    case 0x03: Pred = FCmpInst::FCMP_UNO;   IsSignaling = false; break;
+    case 0x04: Pred = FCmpInst::FCMP_UNE;   IsSignaling = false; break;
+    case 0x05: Pred = FCmpInst::FCMP_UGE;   IsSignaling = true;  break;
+    case 0x06: Pred = FCmpInst::FCMP_UGT;   IsSignaling = true;  break;
+    case 0x07: Pred = FCmpInst::FCMP_ORD;   IsSignaling = false; break;
+    case 0x08: Pred = FCmpInst::FCMP_UEQ;   IsSignaling = false; break;
+    case 0x09: Pred = FCmpInst::FCMP_ULT;   IsSignaling = true;  break;
+    case 0x0a: Pred = FCmpInst::FCMP_ULE;   IsSignaling = true;  break;
+    case 0x0b: Pred = FCmpInst::FCMP_FALSE; IsSignaling = false; break;
+    case 0x0c: Pred = FCmpInst::FCMP_ONE;   IsSignaling = false; break;
+    case 0x0d: Pred = FCmpInst::FCMP_OGE;   IsSignaling = true;  break;
+    case 0x0e: Pred = FCmpInst::FCMP_OGT;   IsSignaling = true;  break;
+    case 0x0f: Pred = FCmpInst::FCMP_TRUE;  IsSignaling = false; break;
     default: llvm_unreachable("Unhandled CC");
     }
 
+    // Invert the signalling behavior for 16-31.
+    if (CC & 0x10)
+      IsSignaling = !IsSignaling;
+
+    // If the predicate is true or false and we're using constrained intrinsics,
+    // we don't have a compare intrinsic we can use. Just use the legacy X86
+    // specific intrinsic.
+    if ((Pred == FCmpInst::FCMP_TRUE || Pred == FCmpInst::FCMP_FALSE) &&
+        Builder.getIsFPConstrained()) {
+
+      Intrinsic::ID IID;
+      switch (BuiltinID) {
+      default: llvm_unreachable("Unexpected builtin");
+      case X86::BI__builtin_ia32_cmpps:
+        IID = Intrinsic::x86_sse_cmp_ps;
+        break;
+      case X86::BI__builtin_ia32_cmpps256:
+        IID = Intrinsic::x86_avx_cmp_ps_256;
+        break;
+      case X86::BI__builtin_ia32_cmppd:
+        IID = Intrinsic::x86_sse2_cmp_pd;
+        break;
+      case X86::BI__builtin_ia32_cmppd256:
+        IID = Intrinsic::x86_avx_cmp_pd_256;
+        break;
+      case X86::BI__builtin_ia32_cmpps512_mask:
+        IID = Intrinsic::x86_avx512_cmp_ps_512;
+        break;
+      case X86::BI__builtin_ia32_cmppd512_mask:
+        IID = Intrinsic::x86_avx512_cmp_pd_512;
+        break;
+      case X86::BI__builtin_ia32_cmpps128_mask:
+        IID = Intrinsic::x86_avx512_cmp_ps_128;
+        break;
+      case X86::BI__builtin_ia32_cmpps256_mask:
+        IID = Intrinsic::x86_avx512_cmp_ps_256;
+        break;
+      case X86::BI__builtin_ia32_cmppd128_mask:
+        IID = Intrinsic::x86_avx512_cmp_pd_128;
+        break;
+      case X86::BI__builtin_ia32_cmppd256_mask:
+        IID = Intrinsic::x86_avx512_cmp_pd_256;
+        break;
+      }
+
+      Function *Intr = CGM.getIntrinsic(IID);
+      if (Intr->getReturnType()->getVectorElementType()->isIntegerTy(1)) {
+        unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+        Value *MaskIn = Ops[3];
+        Ops.erase(&Ops[3]);
+
+        Value *Cmp = Builder.CreateCall(Intr, Ops);
+        return EmitX86MaskedCompareResult(*this, Cmp, NumElts, MaskIn);
+      }
+
+      return Builder.CreateCall(Intr, Ops);
+    }
+
     // Builtins without the _mask suffix return a vector of integers
     // of the same width as the input vectors
     switch (BuiltinID) {
@@ -12329,12 +12379,17 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
     case X86::BI__builtin_ia32_cmpps256_mask:
     case X86::BI__builtin_ia32_cmppd128_mask:
     case X86::BI__builtin_ia32_cmppd256_mask: {
+      // FIXME: Support SAE.
       unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
-      Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
+      Value *Cmp;
+      if (IsSignaling)
+        Cmp = Builder.CreateFCmpS(Pred, Ops[0], Ops[1]);
+      else
+        Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
       return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
     }
     default:
-      return getVectorFCmpIR(Pred);
+      return getVectorFCmpIR(Pred, IsSignaling);
     }
   }
 

diff  --git a/clang/test/CodeGen/avx-builtins-constrained-cmp.c b/clang/test/CodeGen/avx-builtins-constrained-cmp.c
new file mode 100644
index 000000000000..1d48204c4acf
--- /dev/null
+++ b/clang/test/CodeGen/avx-builtins-constrained-cmp.c
@@ -0,0 +1,772 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
+
+
+#include <immintrin.h>
+
+__m256d test_mm256_cmp_pd_eq_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: @test_mm256_cmp_pd_eq_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_EQ_OQ);
+}
+
+__m256d test_mm256_cmp_pd_lt_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_lt_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_LT_OS);
+}
+
+__m256d test_mm256_cmp_pd_le_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_le_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_LE_OS);
+}
+
+__m256d test_mm256_cmp_pd_unord_q(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_unord_q
+  // CHECK: all <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_UNORD_Q);
+}
+
+__m256d test_mm256_cmp_pd_neq_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_neq_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NEQ_UQ);
+}
+
+__m256d test_mm256_cmp_pd_nlt_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_nlt_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NLT_US);
+}
+
+__m256d test_mm256_cmp_pd_nle_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_nle_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NLE_US);
+}
+
+__m256d test_mm256_cmp_pd_ord_q(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_ord_q
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_ORD_Q);
+}
+
+__m256d test_mm256_cmp_pd_eq_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_eq_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_EQ_UQ);
+}
+
+__m256d test_mm256_cmp_pd_nge_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_nge_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NGE_US);
+}
+
+__m256d test_mm256_cmp_pd_ngt_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_ngt_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NGT_US);
+}
+
+__m256d test_mm256_cmp_pd_false_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_false_oq
+  // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 11)
+  return _mm256_cmp_pd(a, b, _CMP_FALSE_OQ);
+}
+
+__m256d test_mm256_cmp_pd_neq_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_neq_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NEQ_OQ);
+}
+
+__m256d test_mm256_cmp_pd_ge_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_ge_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_GE_OS);
+}
+
+__m256d test_mm256_cmp_pd_gt_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_gt_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_GT_OS);
+}
+
+__m256d test_mm256_cmp_pd_true_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_true_uq
+  // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 15)
+  return _mm256_cmp_pd(a, b, _CMP_TRUE_UQ);
+}
+
+__m256d test_mm256_cmp_pd_eq_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_eq_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_EQ_OS);
+}
+
+__m256d test_mm256_cmp_pd_lt_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_lt_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_LT_OQ);
+}
+
+__m256d test_mm256_cmp_pd_le_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_le_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_LE_OQ);
+}
+
+__m256d test_mm256_cmp_pd_unord_s(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_unord_s
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_UNORD_S);
+}
+
+__m256d test_mm256_cmp_pd_neq_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_neq_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NEQ_US);
+}
+
+__m256d test_mm256_cmp_pd_nlt_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_nlt_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NLT_UQ);
+}
+
+__m256d test_mm256_cmp_pd_nle_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_nle_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NLE_UQ);
+}
+
+__m256d test_mm256_cmp_pd_ord_s(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_ord_s
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_ORD_S);
+}
+
+__m256d test_mm256_cmp_pd_eq_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_eq_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_EQ_US);
+}
+
+__m256d test_mm256_cmp_pd_nge_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_nge_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NGE_UQ);
+}
+
+__m256d test_mm256_cmp_pd_ngt_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_ngt_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NGT_UQ);
+}
+
+__m256d test_mm256_cmp_pd_false_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_false_os
+  // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 27)
+  return _mm256_cmp_pd(a, b, _CMP_FALSE_OS);
+}
+
+__m256d test_mm256_cmp_pd_neq_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_neq_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_NEQ_OS);
+}
+
+__m256d test_mm256_cmp_pd_ge_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_ge_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_GE_OQ);
+}
+
+__m256d test_mm256_cmp_pd_gt_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_gt_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd(a, b, _CMP_GT_OQ);
+}
+
+__m256d test_mm256_cmp_pd_true_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_true_us
+  // CHECK: call <4 x double> @llvm.x86.avx.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 31)
+  return _mm256_cmp_pd(a, b, _CMP_TRUE_US);
+}
+
+__m256 test_mm256_cmp_ps_eq_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: @test_mm256_cmp_ps_eq_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_EQ_OQ);
+}
+
+__m256 test_mm256_cmp_ps_lt_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_lt_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_LT_OS);
+}
+
+__m256 test_mm256_cmp_ps_le_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_le_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_LE_OS);
+}
+
+__m256 test_mm256_cmp_ps_unord_q(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_unord_q
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_UNORD_Q);
+}
+
+__m256 test_mm256_cmp_ps_neq_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_neq_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NEQ_UQ);
+}
+
+__m256 test_mm256_cmp_ps_nlt_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_nlt_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NLT_US);
+}
+
+__m256 test_mm256_cmp_ps_nle_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_nle_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NLE_US);
+}
+
+__m256 test_mm256_cmp_ps_ord_q(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_ord_q
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_ORD_Q);
+}
+
+__m256 test_mm256_cmp_ps_eq_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_eq_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_EQ_UQ);
+}
+
+__m256 test_mm256_cmp_ps_nge_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_nge_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NGE_US);
+}
+
+__m256 test_mm256_cmp_ps_ngt_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_ngt_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NGT_US);
+}
+
+__m256 test_mm256_cmp_ps_false_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_false_oq
+  // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 11)
+  return _mm256_cmp_ps(a, b, _CMP_FALSE_OQ);
+}
+
+__m256 test_mm256_cmp_ps_neq_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_neq_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NEQ_OQ);
+}
+
+__m256 test_mm256_cmp_ps_ge_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_ge_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_GE_OS);
+}
+
+__m256 test_mm256_cmp_ps_gt_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_gt_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_GT_OS);
+}
+
+__m256 test_mm256_cmp_ps_true_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_true_uq
+  // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 15)
+  return _mm256_cmp_ps(a, b, _CMP_TRUE_UQ);
+}
+
+__m256 test_mm256_cmp_ps_eq_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_eq_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_EQ_OS);
+}
+
+__m256 test_mm256_cmp_ps_lt_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_lt_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_LT_OQ);
+}
+
+__m256 test_mm256_cmp_ps_le_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_le_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_LE_OQ);
+}
+
+__m256 test_mm256_cmp_ps_unord_s(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_unord_s
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_UNORD_S);
+}
+
+__m256 test_mm256_cmp_ps_neq_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_neq_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NEQ_US);
+}
+
+__m256 test_mm256_cmp_ps_nlt_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_nlt_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NLT_UQ);
+}
+
+__m256 test_mm256_cmp_ps_nle_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_nle_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NLE_UQ);
+}
+
+__m256 test_mm256_cmp_ps_ord_s(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_ord_s
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_ORD_S);
+}
+
+__m256 test_mm256_cmp_ps_eq_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_eq_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_EQ_US);
+}
+
+__m256 test_mm256_cmp_ps_nge_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_nge_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NGE_UQ);
+}
+
+__m256 test_mm256_cmp_ps_ngt_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_ngt_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NGT_UQ);
+}
+
+__m256 test_mm256_cmp_ps_false_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_false_os
+  // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 27)
+  return _mm256_cmp_ps(a, b, _CMP_FALSE_OS);
+}
+
+__m256 test_mm256_cmp_ps_neq_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_neq_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_NEQ_OS);
+}
+
+__m256 test_mm256_cmp_ps_ge_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_ge_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_GE_OQ);
+}
+
+__m256 test_mm256_cmp_ps_gt_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_gt_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps(a, b, _CMP_GT_OQ);
+}
+
+__m256 test_mm256_cmp_ps_true_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_true_us
+  // CHECK: call <8 x float> @llvm.x86.avx.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 31)
+  return _mm256_cmp_ps(a, b, _CMP_TRUE_US);
+}
+
+__m128d test_mm_cmp_pd_eq_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: @test_mm_cmp_pd_eq_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_EQ_OQ);
+}
+
+__m128d test_mm_cmp_pd_lt_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_lt_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_LT_OS);
+}
+
+__m128d test_mm_cmp_pd_le_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_le_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_LE_OS);
+}
+
+__m128d test_mm_cmp_pd_unord_q(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_unord_q
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_UNORD_Q);
+}
+
+__m128d test_mm_cmp_pd_neq_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_neq_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NEQ_UQ);
+}
+
+__m128d test_mm_cmp_pd_nlt_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_nlt_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NLT_US);
+}
+
+__m128d test_mm_cmp_pd_nle_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_nle_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NLE_US);
+}
+
+__m128d test_mm_cmp_pd_ord_q(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_ord_q
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_ORD_Q);
+}
+
+__m128d test_mm_cmp_pd_eq_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_eq_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_EQ_UQ);
+}
+
+__m128d test_mm_cmp_pd_nge_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_nge_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NGE_US);
+}
+
+__m128d test_mm_cmp_pd_ngt_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_ngt_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NGT_US);
+}
+
+__m128d test_mm_cmp_pd_false_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_false_oq
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 11)
+  return _mm_cmp_pd(a, b, _CMP_FALSE_OQ);
+}
+
+__m128d test_mm_cmp_pd_neq_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_neq_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NEQ_OQ);
+}
+
+__m128d test_mm_cmp_pd_ge_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_ge_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_GE_OS);
+}
+
+__m128d test_mm_cmp_pd_gt_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_gt_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_GT_OS);
+}
+
+__m128d test_mm_cmp_pd_true_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_true_uq
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 15)
+  return _mm_cmp_pd(a, b, _CMP_TRUE_UQ);
+}
+
+__m128d test_mm_cmp_pd_eq_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_eq_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_EQ_OS);
+}
+
+__m128d test_mm_cmp_pd_lt_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_lt_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_LT_OQ);
+}
+
+__m128d test_mm_cmp_pd_le_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_le_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_LE_OQ);
+}
+
+__m128d test_mm_cmp_pd_unord_s(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_unord_s
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_UNORD_S);
+}
+
+__m128d test_mm_cmp_pd_neq_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_neq_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NEQ_US);
+}
+
+__m128d test_mm_cmp_pd_nlt_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_nlt_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NLT_UQ);
+}
+
+__m128d test_mm_cmp_pd_nle_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_nle_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NLE_UQ);
+}
+
+__m128d test_mm_cmp_pd_ord_s(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_ord_s
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_ORD_S);
+}
+
+__m128d test_mm_cmp_pd_eq_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_eq_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_EQ_US);
+}
+
+__m128d test_mm_cmp_pd_nge_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_nge_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NGE_UQ);
+}
+
+__m128d test_mm_cmp_pd_ngt_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_ngt_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NGT_UQ);
+}
+
+__m128d test_mm_cmp_pd_false_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_false_os
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 27)
+  return _mm_cmp_pd(a, b, _CMP_FALSE_OS);
+}
+
+__m128d test_mm_cmp_pd_neq_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_neq_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_NEQ_OS);
+}
+
+__m128d test_mm_cmp_pd_ge_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_ge_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_GE_OQ);
+}
+
+__m128d test_mm_cmp_pd_gt_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_gt_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd(a, b, _CMP_GT_OQ);
+}
+
+__m128d test_mm_cmp_pd_true_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_true_us
+  // CHECK: call <2 x double> @llvm.x86.sse2.cmp.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i8 31)
+  return _mm_cmp_pd(a, b, _CMP_TRUE_US);
+}
+
+__m128 test_mm_cmp_ps_eq_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: @test_mm_cmp_ps_eq_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_EQ_OQ);
+}
+
+__m128 test_mm_cmp_ps_lt_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_lt_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_LT_OS);
+}
+
+__m128 test_mm_cmp_ps_le_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_le_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_LE_OS);
+}
+
+__m128 test_mm_cmp_ps_unord_q(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_unord_q
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_UNORD_Q);
+}
+
+__m128 test_mm_cmp_ps_neq_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_neq_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NEQ_UQ);
+}
+
+__m128 test_mm_cmp_ps_nlt_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_nlt_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NLT_US);
+}
+
+__m128 test_mm_cmp_ps_nle_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_nle_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NLE_US);
+}
+
+__m128 test_mm_cmp_ps_ord_q(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_ord_q
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_ORD_Q);
+}
+
+__m128 test_mm_cmp_ps_eq_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_eq_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_EQ_UQ);
+}
+
+__m128 test_mm_cmp_ps_nge_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_nge_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NGE_US);
+}
+
+__m128 test_mm_cmp_ps_ngt_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_ngt_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NGT_US);
+}
+
+__m128 test_mm_cmp_ps_false_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_false_oq
+  // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 11)
+  return _mm_cmp_ps(a, b, _CMP_FALSE_OQ);
+}
+
+__m128 test_mm_cmp_ps_neq_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_neq_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NEQ_OQ);
+}
+
+__m128 test_mm_cmp_ps_ge_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_ge_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_GE_OS);
+}
+
+__m128 test_mm_cmp_ps_gt_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_gt_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_GT_OS);
+}
+
+__m128 test_mm_cmp_ps_true_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_true_uq
+  // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 15)
+  return _mm_cmp_ps(a, b, _CMP_TRUE_UQ);
+}
+
+__m128 test_mm_cmp_ps_eq_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_eq_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_EQ_OS);
+}
+
+__m128 test_mm_cmp_ps_lt_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_lt_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_LT_OQ);
+}
+
+__m128 test_mm_cmp_ps_le_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_le_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_LE_OQ);
+}
+
+__m128 test_mm_cmp_ps_unord_s(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_unord_s
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_UNORD_S);
+}
+
+__m128 test_mm_cmp_ps_neq_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_neq_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NEQ_US);
+}
+
+__m128 test_mm_cmp_ps_nlt_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_nlt_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NLT_UQ);
+}
+
+__m128 test_mm_cmp_ps_nle_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_nle_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NLE_UQ);
+}
+
+__m128 test_mm_cmp_ps_ord_s(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_ord_s
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_ORD_S);
+}
+
+__m128 test_mm_cmp_ps_eq_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_eq_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_EQ_US);
+}
+
+__m128 test_mm_cmp_ps_nge_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_nge_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NGE_UQ);
+}
+
+__m128 test_mm_cmp_ps_ngt_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_ngt_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NGT_UQ);
+}
+
+__m128 test_mm_cmp_ps_false_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_false_os
+  // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 27)
+  return _mm_cmp_ps(a, b, _CMP_FALSE_OS);
+}
+
+__m128 test_mm_cmp_ps_neq_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_neq_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_NEQ_OS);
+}
+
+__m128 test_mm_cmp_ps_ge_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_ge_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_GE_OQ);
+}
+
+__m128 test_mm_cmp_ps_gt_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_gt_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps(a, b, _CMP_GT_OQ);
+}
+
+__m128 test_mm_cmp_ps_true_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_true_us
+  // CHECK: call <4 x float> @llvm.x86.sse.cmp.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 31)
+  return _mm_cmp_ps(a, b, _CMP_TRUE_US);
+}

diff  --git a/clang/test/CodeGen/avx512f-builtins-constrained-cmp.c b/clang/test/CodeGen/avx512f-builtins-constrained-cmp.c
new file mode 100644
index 000000000000..0817d3be106c
--- /dev/null
+++ b/clang/test/CodeGen/avx512f-builtins-constrained-cmp.c
@@ -0,0 +1,861 @@
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) {
+  // CHECK-LABEL: @test_mm512_cmp_round_ps_mask
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm512_cmp_round_ps_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_round_ps_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_eq_oq(__m512 a, __m512 b) {
+  // CHECK-LABEL: @test_mm512_cmp_ps_mask_eq_oq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_lt_os(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_lt_os
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_le_os(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_le_os
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_unord_q(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_unord_q
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_neq_uq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_uq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_nlt_us(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_nlt_us
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_nle_us(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_nle_us
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_ord_q(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_ord_q
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_eq_uq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_uq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_nge_us(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_nge_us
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_ngt_us(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_ngt_us
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_false_oq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_false_oq
+  // CHECK: call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 11, i32 4)
+  return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_neq_oq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_oq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_ge_os(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_ge_os
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_gt_os(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_gt_os
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_true_uq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_true_uq
+  // CHECK: call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 15, i32 4)
+  return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_eq_os(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_os
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_lt_oq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_lt_oq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_le_oq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_le_oq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_unord_s(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_unord_s
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_neq_us(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_us
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_nlt_uq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_nlt_uq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_nle_uq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_nle_uq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_ord_s(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_ord_s
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_eq_us(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_eq_us
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_nge_uq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_nge_uq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_ngt_uq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_ngt_uq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_false_os(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_false_os
+  // CHECK: call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 27, i32 4)
+  return _mm512_cmp_ps_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_neq_os(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_neq_os
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_ge_oq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_ge_oq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_gt_oq(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_gt_oq
+  // CHECK: call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm512_cmp_ps_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask16 test_mm512_cmp_ps_mask_true_us(__m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_cmp_ps_mask_true_us
+  // CHECK: call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 31, i32 4)
+  return _mm512_cmp_ps_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_eq_oq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: @test_mm512_mask_cmp_ps_mask_eq_oq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_lt_os(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_lt_os
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_le_os(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_le_os
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_unord_q(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_unord_q
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_neq_uq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_uq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_nlt_us(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nlt_us
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_nle_us(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nle_us
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_ord_q(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ord_q
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_eq_uq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_uq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_nge_us(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nge_us
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_ngt_us(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ngt_us
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_false_oq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_false_oq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 11, i32 4)
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_neq_oq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_oq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_ge_os(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ge_os
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_gt_os(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_gt_os
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_true_uq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_true_uq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 15, i32 4)
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_eq_os(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_os
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_lt_oq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_lt_oq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_le_oq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_le_oq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_unord_s(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_unord_s
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_neq_us(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_us
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_nlt_uq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nlt_uq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_nle_uq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nle_uq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_ord_s(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ord_s
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_eq_us(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_eq_us
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_nge_uq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_nge_uq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_ngt_uq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ngt_uq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_false_os(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_false_os
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 27, i32 4)
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_neq_os(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_neq_os
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmps.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_ge_oq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_ge_oq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_gt_oq(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_gt_oq
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.experimental.constrained.fcmp.v16f32(<16 x float> %{{.*}}, <16 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask16 test_mm512_mask_cmp_ps_mask_true_us(__mmask16 m, __m512 a, __m512 b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_ps_mask_true_us
+  // CHECK: [[CMP:%.*]] = call <16 x i1> @llvm.x86.avx512.cmp.ps.512(<16 x float> %{{.*}}, <16 x float> %{{.*}}, i32 31, i32 4)
+  // CHECK: and <16 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) {
+  // CHECK-LABEL: @test_mm512_cmp_round_pd_mask
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm512_cmp_round_pd_mask(a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_round_pd_mask(m, a, b, _CMP_EQ_OQ, _MM_FROUND_NO_EXC);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_eq_oq(__m512d a, __m512d b) {
+  // CHECK-LABEL: @test_mm512_cmp_pd_mask_eq_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_lt_os(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_lt_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_le_os(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_le_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_unord_q(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_unord_q
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_neq_uq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_nlt_us(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_nlt_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_nle_us(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_nle_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_ord_q(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_ord_q
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_eq_uq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_nge_us(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_nge_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_ngt_us(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_ngt_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_false_oq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_false_oq
+  // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 11, i32 4)
+  return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_neq_oq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_ge_os(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_ge_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_gt_os(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_gt_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_true_uq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_true_uq
+  // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 15, i32 4)
+  return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_eq_os(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_lt_oq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_lt_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_le_oq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_le_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_unord_s(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_unord_s
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_neq_us(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_nlt_uq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_nlt_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_nle_uq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_nle_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_ord_s(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_ord_s
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_eq_us(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_eq_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_nge_uq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_nge_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_ngt_uq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_ngt_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_false_os(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_false_os
+  // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 27, i32 4)
+  return _mm512_cmp_pd_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_neq_os(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_neq_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_ge_oq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_ge_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_gt_oq(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_gt_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm512_cmp_pd_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm512_cmp_pd_mask_true_us(__m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_cmp_pd_mask_true_us
+  // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 31, i32 4)
+  return _mm512_cmp_pd_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: @test_mm512_mask_cmp_pd_mask_eq_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_lt_os(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_lt_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_le_os(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_le_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_unord_q(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_unord_q
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nlt_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_nle_us(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nle_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_ord_q(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ord_q
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_nge_us(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nge_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ngt_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_false_oq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_false_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 11, i32 4)
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_ge_os(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ge_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_gt_os(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_gt_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_true_uq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_true_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 15, i32 4)
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_eq_os(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_lt_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_le_oq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_le_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_unord_s(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_unord_s
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_neq_us(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nlt_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nle_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_ord_s(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ord_s
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_eq_us(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_eq_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_nge_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ngt_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_false_os(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_false_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 27, i32 4)
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_neq_os(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_neq_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_ge_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_gt_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f64(<8 x double> %{{.*}}, <8 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm512_mask_cmp_pd_mask_true_us(__mmask8 m, __m512d a, __m512d b) {
+  // CHECK-LABEL: test_mm512_mask_cmp_pd_mask_true_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.pd.512(<8 x double> %{{.*}}, <8 x double> %{{.*}}, i32 31, i32 4)
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm512_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US);
+}

diff  --git a/clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c b/clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c
new file mode 100644
index 000000000000..ae052dfffbab
--- /dev/null
+++ b/clang/test/CodeGen/avx512vl-builtins-constrained-cmp.c
@@ -0,0 +1,1667 @@
+// RUN: %clang_cc1 -ffreestanding %s -fexperimental-new-pass-manager -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512vl -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__mmask8 test_mm256_cmp_ps_mask_eq_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: @test_mm256_cmp_ps_mask_eq_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_lt_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_lt_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_le_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_le_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_unord_q(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_unord_q
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_neq_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_nlt_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_nlt_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_nle_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_nle_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_ord_q(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_ord_q
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_eq_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_nge_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_nge_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_ngt_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_ngt_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_false_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_false_oq
+  // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11)
+  return _mm256_cmp_ps_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_neq_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_ge_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_ge_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_gt_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_gt_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_true_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_true_uq
+  // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 15)
+  return _mm256_cmp_ps_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_eq_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_lt_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_lt_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_le_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_le_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_unord_s(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_unord_s
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_neq_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_nlt_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_nlt_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_nle_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_nle_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_ord_s(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_ord_s
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_eq_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_eq_us
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_nge_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_nge_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_ngt_uq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_ngt_uq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_false_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_false_os
+  // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 27)
+  return _mm256_cmp_ps_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_neq_os(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_neq_os
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_ge_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_ge_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_gt_oq(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_gt_oq
+  // CHECK: call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm256_cmp_ps_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm256_cmp_ps_mask_true_us(__m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_cmp_ps_mask_true_us
+  // CHECK: call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 31)
+  return _mm256_cmp_ps_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_eq_oq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: @test_mm256_mask_cmp_ps_mask_eq_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_lt_os(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_lt_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_le_os(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_le_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_unord_q(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_unord_q
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_neq_uq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_nlt_us(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nlt_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_nle_us(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nle_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_ord_q(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ord_q
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_eq_uq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_nge_us(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nge_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_ngt_us(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ngt_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_false_oq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_false_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 11)
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_neq_oq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_ge_os(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ge_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_gt_os(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_gt_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_true_uq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_true_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 15)
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_eq_os(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_lt_oq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_lt_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_le_oq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_le_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_unord_s(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_unord_s
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_neq_us(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_nlt_uq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nlt_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_nle_uq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nle_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_ord_s(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ord_s
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_eq_us(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_eq_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_nge_uq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_nge_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_ngt_uq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ngt_uq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_false_os(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_false_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 27)
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_neq_os(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_neq_os
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmps.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_ge_oq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_ge_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_gt_oq(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_gt_oq
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.experimental.constrained.fcmp.v8f32(<8 x float> %{{.*}}, <8 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_ps_mask_true_us(__mmask8 m, __m256 a, __m256 b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_ps_mask_true_us
+  // CHECK: [[CMP:%.*]] = call <8 x i1> @llvm.x86.avx512.cmp.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i32 31)
+  // CHECK: and <8 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_eq_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: @test_mm256_cmp_pd_mask_eq_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_lt_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_lt_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_le_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_le_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_unord_q(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_unord_q
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_neq_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_nlt_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_nlt_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_nle_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_nle_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_ord_q(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_ord_q
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_eq_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_nge_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_nge_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_ngt_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_ngt_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_false_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_false_oq
+  // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11)
+  return _mm256_cmp_pd_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_neq_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_ge_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_ge_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_gt_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_gt_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_true_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_true_uq
+  // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 15)
+  return _mm256_cmp_pd_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_eq_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_lt_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_lt_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_le_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_le_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_unord_s(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_unord_s
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_neq_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_nlt_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_nlt_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_nle_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_nle_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_ord_s(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_ord_s
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_eq_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_eq_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_nge_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_nge_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_ngt_uq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_ngt_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_false_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_false_os
+  // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 27)
+  return _mm256_cmp_pd_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_neq_os(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_neq_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_ge_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_ge_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_gt_oq(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_gt_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm256_cmp_pd_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm256_cmp_pd_mask_true_us(__m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_cmp_pd_mask_true_us
+  // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 31)
+  return _mm256_cmp_pd_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: @test_mm256_mask_cmp_pd_mask_eq_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_lt_os(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_lt_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_le_os(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_le_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_unord_q(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_unord_q
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nlt_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_nle_us(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nle_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_ord_q(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ord_q
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_nge_us(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nge_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ngt_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_false_oq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_false_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 11)
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_ge_os(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ge_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_gt_os(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_gt_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_true_uq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_true_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 15)
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_eq_os(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_lt_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_le_oq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_le_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_unord_s(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_unord_s
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_neq_us(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nlt_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nle_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_ord_s(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ord_s
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_eq_us(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_eq_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_nge_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ngt_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_false_os(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_false_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 27)
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_neq_os(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_neq_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_ge_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_gt_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x double> %{{.*}}, <4 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm256_mask_cmp_pd_mask_true_us(__mmask8 m, __m256d a, __m256d b) {
+  // CHECK-LABEL: test_mm256_mask_cmp_pd_mask_true_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i32 31)
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm256_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_cmp_ps_mask_eq_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: @test_mm_cmp_ps_mask_eq_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_lt_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_lt_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm_cmp_ps_mask_le_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_le_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm_cmp_ps_mask_unord_q(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_unord_q
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm_cmp_ps_mask_neq_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_neq_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_nlt_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_nlt_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm_cmp_ps_mask_nle_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_nle_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm_cmp_ps_mask_ord_q(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_ord_q
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm_cmp_ps_mask_eq_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_eq_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_nge_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_nge_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm_cmp_ps_mask_ngt_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_ngt_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm_cmp_ps_mask_false_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_false_oq
+  // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 11)
+  return _mm_cmp_ps_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_neq_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_neq_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_ge_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_ge_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm_cmp_ps_mask_gt_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_gt_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm_cmp_ps_mask_true_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_true_uq
+  // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 15)
+  return _mm_cmp_ps_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_eq_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_eq_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm_cmp_ps_mask_lt_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_lt_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_le_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_le_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_unord_s(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_unord_s
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm_cmp_ps_mask_neq_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_neq_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm_cmp_ps_mask_nlt_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_nlt_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_nle_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_nle_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_ord_s(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_ord_s
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm_cmp_ps_mask_eq_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_eq_us
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm_cmp_ps_mask_nge_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_nge_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_ngt_uq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_ngt_uq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_false_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_false_os
+  // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 27)
+  return _mm_cmp_ps_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm_cmp_ps_mask_neq_os(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_neq_os
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm_cmp_ps_mask_ge_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_ge_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_gt_oq(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_gt_oq
+  // CHECK: call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm_cmp_ps_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm_cmp_ps_mask_true_us(__m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_cmp_ps_mask_true_us
+  // CHECK: call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 31)
+  return _mm_cmp_ps_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_eq_oq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: @test_mm_mask_cmp_ps_mask_eq_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_lt_os(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_lt_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_le_os(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_le_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_unord_q(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_unord_q
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_neq_uq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_nlt_us(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nlt_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_nle_us(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nle_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_ord_q(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ord_q
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_eq_uq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_nge_us(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nge_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_ngt_us(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ngt_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_false_oq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_false_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 11)
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_neq_oq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_ge_os(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ge_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_gt_os(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_gt_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_true_uq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_true_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 15)
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_eq_os(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_lt_oq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_lt_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_le_oq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_le_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_unord_s(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_unord_s
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_neq_us(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_nlt_uq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nlt_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_nle_uq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nle_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_ord_s(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ord_s
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_eq_us(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_eq_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_nge_uq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_nge_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_ngt_uq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ngt_uq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_false_os(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_false_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 27)
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_neq_os(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_neq_os
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_ge_oq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_ge_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_gt_oq(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_gt_oq
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_ps_mask_true_us(__mmask8 m, __m128 a, __m128 b) {
+  // CHECK-LABEL: test_mm_mask_cmp_ps_mask_true_us
+  // CHECK: [[CMP:%.*]] = call <4 x i1> @llvm.x86.avx512.cmp.ps.128(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 31)
+  // CHECK: and <4 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_ps_mask(m, a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_cmp_pd_mask_eq_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: @test_mm_cmp_pd_mask_eq_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_lt_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_lt_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm_cmp_pd_mask_le_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_le_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm_cmp_pd_mask_unord_q(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_unord_q
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm_cmp_pd_mask_neq_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_neq_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_nlt_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_nlt_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm_cmp_pd_mask_nle_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_nle_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm_cmp_pd_mask_ord_q(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_ord_q
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm_cmp_pd_mask_eq_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_eq_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_nge_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_nge_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm_cmp_pd_mask_ngt_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_ngt_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm_cmp_pd_mask_false_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_false_oq
+  // CHECK: call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 11)
+  return _mm_cmp_pd_mask(a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_neq_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_neq_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_ge_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_ge_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm_cmp_pd_mask_gt_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_gt_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm_cmp_pd_mask_true_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_true_uq
+  // CHECK: call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 15)
+  return _mm_cmp_pd_mask(a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_eq_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_eq_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm_cmp_pd_mask_lt_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_lt_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_le_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_le_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_unord_s(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_unord_s
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm_cmp_pd_mask_neq_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_neq_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm_cmp_pd_mask_nlt_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_nlt_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_nle_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_nle_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_ord_s(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_ord_s
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm_cmp_pd_mask_eq_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_eq_us
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm_cmp_pd_mask_nge_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_nge_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_ngt_uq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_ngt_uq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_false_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_false_os
+  // CHECK: call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 27)
+  return _mm_cmp_pd_mask(a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm_cmp_pd_mask_neq_os(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_neq_os
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm_cmp_pd_mask_ge_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_ge_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_gt_oq(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_gt_oq
+  // CHECK: call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  return _mm_cmp_pd_mask(a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm_cmp_pd_mask_true_us(__m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_cmp_pd_mask_true_us
+  // CHECK: call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 31)
+  return _mm_cmp_pd_mask(a, b, _CMP_TRUE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_eq_oq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: @test_mm_mask_cmp_pd_mask_eq_oq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_lt_os(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_lt_os
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LT_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_le_os(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_le_os
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_unord_q(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_unord_q
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_Q);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_neq_uq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_uq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_nlt_us(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nlt_us
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_nle_us(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nle_us
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_ord_q(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ord_q
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_ORD_Q);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_eq_uq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_uq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_nge_us(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nge_us
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGE_US);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_ngt_us(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ngt_us
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGT_US);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_false_oq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_false_oq
+  // [[CMP]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 11)
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_neq_oq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_oq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_ge_os(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ge_os
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_gt_os(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_gt_os
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GT_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_true_uq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_true_uq
+  // [[CMP]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 15)
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_eq_os(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_os
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_lt_oq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_lt_oq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LT_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_le_oq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_le_oq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_LE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_unord_s(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_unord_s
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_UNORD_S);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_neq_us(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_us
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_US);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_nlt_uq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nlt_uq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLT_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_nle_uq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nle_uq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NLE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_ord_s(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ord_s
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_ORD_S);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_eq_us(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_eq_us
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ueq", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_EQ_US);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_nge_uq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_nge_uq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ult", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGE_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_ngt_uq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ngt_uq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ule", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NGT_UQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_false_os(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_false_os
+  // [[CMP]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 27)
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_FALSE_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_neq_os(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_neq_os
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"one", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_NEQ_OS);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_ge_oq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_ge_oq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oge", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GE_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_gt_oq(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_gt_oq
+  // CHECK: [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ogt", metadata !"fpexcept.strict")
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_GT_OQ);
+}
+
+__mmask8 test_mm_mask_cmp_pd_mask_true_us(__mmask8 m, __m128d a, __m128d b) {
+  // CHECK-LABEL: test_mm_mask_cmp_pd_mask_true_us
+  // [[CMP]] = call <2 x i1> @llvm.x86.avx512.cmp.pd.128(<2 x double> %{{.*}}, <2 x double> %{{.*}}, i32 31)
+  // CHECK: and <2 x i1> [[CMP]], {{.*}}
+  return _mm_mask_cmp_pd_mask(m, a, b, _CMP_TRUE_US);
+}

diff  --git a/clang/test/CodeGen/sse-builtins-constrained-cmp.c b/clang/test/CodeGen/sse-builtins-constrained-cmp.c
new file mode 100644
index 000000000000..15698b0b92fb
--- /dev/null
+++ b/clang/test/CodeGen/sse-builtins-constrained-cmp.c
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
+
+
+#include <immintrin.h>
+
+__m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpeq_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpeq_ps(__a, __b);
+}
+
+__m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpge_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpge_ps(__a, __b);
+}
+
+__m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpgt_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpgt_ps(__a, __b);
+}
+
+__m128 test_mm_cmple_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmple_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmple_ps(__a, __b);
+}
+
+__m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmplt_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmplt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpneq_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpneq_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnge_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpnge_ps(__a, __b);
+}
+
+__m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpngt_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpngt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnle_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpnle_ps(__a, __b);
+}
+
+__m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpnlt_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpnlt_ps(__a, __b);
+}
+
+__m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpord_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpord_ps(__a, __b);
+}
+
+__m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) {
+  // CHECK-LABEL: @test_mm_cmpunord_ps
+  // CHECK:         [[CMP:%.*]] = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f32(<4 x float> %{{.*}}, <4 x float> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float>
+  // CHECK-NEXT:    ret <4 x float> [[BC]]
+  return _mm_cmpunord_ps(__a, __b);
+}

diff  --git a/clang/test/CodeGen/sse2-builtins-constrained-cmp.c b/clang/test/CodeGen/sse2-builtins-constrained-cmp.c
new file mode 100644
index 000000000000..95384a13605b
--- /dev/null
+++ b/clang/test/CodeGen/sse2-builtins-constrained-cmp.c
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -ffp-exception-behavior=strict -o - -Wall -Werror | FileCheck %s
+
+
+#include <immintrin.h>
+
+__m128d test_mm_cmpeq_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpeq_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"oeq", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpeq_pd(A, B);
+}
+
+__m128d test_mm_cmpge_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpge_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpge_pd(A, B);
+}
+
+__m128d test_mm_cmpgt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpgt_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpgt_pd(A, B);
+}
+
+__m128d test_mm_cmple_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmple_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ole", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmple_pd(A, B);
+}
+
+__m128d test_mm_cmplt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmplt_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"olt", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmplt_pd(A, B);
+}
+
+__m128d test_mm_cmpneq_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpneq_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"une", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpneq_pd(A, B);
+}
+
+__m128d test_mm_cmpnge_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnge_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpnge_pd(A, B);
+}
+
+__m128d test_mm_cmpngt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpngt_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpngt_pd(A, B);
+}
+
+__m128d test_mm_cmpnle_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnle_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ugt", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpnle_pd(A, B);
+}
+
+__m128d test_mm_cmpnlt_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpnlt_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmps.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uge", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpnlt_pd(A, B);
+}
+
+__m128d test_mm_cmpord_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpord_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"ord", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpord_pd(A, B);
+}
+
+__m128d test_mm_cmpunord_pd(__m128d A, __m128d B) {
+  // CHECK-LABEL: test_mm_cmpunord_pd
+  // CHECK:         [[CMP:%.*]] = call <2 x i1> @llvm.experimental.constrained.fcmp.v2f64(<2 x double> %{{.*}}, <2 x double> %{{.*}}, metadata !"uno", metadata !"fpexcept.strict")
+  // CHECK-NEXT:    [[SEXT:%.*]] = sext <2 x i1> [[CMP]] to <2 x i64>
+  // CHECK-NEXT:    [[BC:%.*]] = bitcast <2 x i64> [[SEXT]] to <2 x double>
+  // CHECK-NEXT:    ret <2 x double> [[BC]]
+  return _mm_cmpunord_pd(A, B);
+}


        


More information about the cfe-commits mailing list