[clang] 0377416 - [CIR][X86] Add support for cmp builtins (#174318)

Wed Feb 11 17:58:07 PST 2026

Author: Zhihui Yang
Date: 2026-02-11T17:58:02-08:00
New Revision: 0377416e36784c86db0566550773851995f13edf

URL: https://github.com/llvm/llvm-project/commit/0377416e36784c86db0566550773851995f13edf
DIFF: https://github.com/llvm/llvm-project/commit/0377416e36784c86db0566550773851995f13edf.diff

LOG: [CIR][X86] Add support for cmp builtins (#174318)

Part of https://github.com/llvm/llvm-project/issues/167765
Add support for cmp builtins

Added: 
    clang/test/CIR/CodeGenBuiltins/X86/cmp-builtins.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index a17c921b7ebb3..8e6d5d0a07a96 100644

--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -368,20 +368,22 @@ static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder,
 // TODO: The cgf parameter should be removed when all the NYI cases are
 // implemented.
 static std::optional<mlir::Value>
-emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy &builder, unsigned cc,
-                     bool isSigned, ArrayRef<mlir::Value> ops,
-                     mlir::Location loc) {
+emitX86MaskedCompare(CIRGenBuilderTy &builder, unsigned cc, bool isSigned,
+                     ArrayRef<mlir::Value> ops, mlir::Location loc) {
   assert((ops.size() == 2 || ops.size() == 4) &&
          "Unexpected number of arguments");
   unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
   mlir::Value cmp;
-
   if (cc == 3) {
-    cgf.cgm.errorNYI(loc, "emitX86MaskedCompare: cc == 3");
-    return {};
+    cmp = builder.getNullValue(
+        cir::VectorType::get(builder.getSIntNTy(1), numElts), loc);
   } else if (cc == 7) {
-    cgf.cgm.errorNYI(loc, "emitX86MaskedCompare cc == 7");
-    return {};
+    cir::VectorType resultTy =
+        cir::VectorType::get(builder.getSIntNTy(1), numElts);
+    llvm::APInt allOnes = llvm::APInt::getAllOnes(1);
+    cmp = cir::VecSplatOp::create(
+        builder, loc, resultTy,
+        builder.getConstAPInt(loc, builder.getSIntNTy(1), allOnes));
   } else {
     cir::CmpOpKind pred;
     switch (cc) {
@@ -425,7 +427,7 @@ static std::optional<mlir::Value> emitX86ConvertToMask(CIRGenFunction &cgf,
                                                        mlir::Value in,
                                                        mlir::Location loc) {
   cir::ConstantOp zero = builder.getNullValue(in.getType(), loc);
-  return emitX86MaskedCompare(cgf, builder, 1, true, {in, zero}, loc);
+  return emitX86MaskedCompare(builder, 1, true, {in, zero}, loc);
 }
 
 static std::optional<mlir::Value> emitX86SExtMask(CIRGenBuilderTy &builder,
@@ -1912,11 +1914,11 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_ucmpd512_mask:
   case X86::BI__builtin_ia32_ucmpq128_mask:
   case X86::BI__builtin_ia32_ucmpq256_mask:
-  case X86::BI__builtin_ia32_ucmpq512_mask:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented X86 builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinID));
-    return mlir::Value{};
+  case X86::BI__builtin_ia32_ucmpq512_mask: {
+    int64_t cc = CIRGenFunction::getZExtIntValueFromConstOp(ops[2]) & 0x7;
+    return emitX86MaskedCompare(builder, cc, 1, ops,
+                                getLoc(expr->getExprLoc()));
+  }
   case X86::BI__builtin_ia32_vpcomb:
   case X86::BI__builtin_ia32_vpcomw:
   case X86::BI__builtin_ia32_vpcomd:

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/cmp-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/cmp-builtins.c
new file mode 100644
index 0000000000000..6e6eb392309e6
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/cmp-builtins.c
@@ -0,0 +1,720 @@
+// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512bw -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512bw -Wno-implicit-function-declaration -fclangir -emit-cir -o %t.cir %s
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512bw -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512bw -Wno-implicit-function-declaration -fclangir -emit-llvm -o %t.ll %s
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
+
+// RUN: %clang_cc1 -x c -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512bw -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -ffreestanding -triple=x86_64-unknown-linux -target-feature +avx512vl -target-feature +avx512bw  -emit-llvm -Wall -Werror %s -o - | FileCheck %s -check-prefix=OGCG
+
+#include <immintrin.h>
+
+__mmask16 test_mm_cmp_epi8_mask(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epi8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm_cmp_epi8_mask
+  // LLVM: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm_cmp_epi8_mask
+  // OGCG: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 0);
+}
+
+__mmask16 test_mm_cmp_epi8_mask_imm3(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epi8_mask_imm3
+  // CIR: cir.const #cir.zero : !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm_cmp_epi8_mask_imm3
+  // LLVM: store i16 0, ptr %{{.*}}, align 2
+  // LLVM: load i16, ptr %{{.*}}, align 2
+  // LLVM: ret i16 %{{.*}}
+  // OGCG-LABEL: test_mm_cmp_epi8_mask_imm3
+  // OGCG: ret i16 0
+  return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 3);
+}
+
+__mmask16 test_mm_cmp_epi8_mask_imm7(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epi8_mask_imm7
+  // CIR: cir.const #cir.int<-1> : !cir.int<s, 1>
+  // CIR: cir.vec.splat {{%.*}} : !cir.int<s, 1>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm_cmp_epi8_mask_imm7
+  // LLVM: store i16 -1, ptr %{{.*}}
+  // LLVM: load i16, ptr %{{.*}}
+  // LLVM: ret i16 %{{.*}}
+  // OGCG-LABEL: test_mm_cmp_epi8_mask_imm7
+  // OGCG: ret i16 -1
+  return (__mmask16)_mm_cmp_epi8_mask(__a, __b, 7);
+}
+
+__mmask16 test_mm_mask_cmp_epi8_mask(__mmask16 __m, __m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_mask_cmp_epi8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm_mask_cmp_epi8_mask
+  // LLVM: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // LLVM: and <16 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm_mask_cmp_epi8_mask
+  // OGCG: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // OGCG: and <16 x i1> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm_mask_cmp_epi8_mask(__m, __a, __b, 0);
+}
+
+__mmask32 test_mm256_cmp_epi8_mask(__m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_cmp_epi8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<32 x !s8i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+  // LLVM-LABEL: test_mm256_cmp_epi8_mask
+  // LLVM: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_cmp_epi8_mask
+  // OGCG: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  return (__mmask32)_mm256_cmp_epi8_mask(__a, __b, 0);
+}
+
+__mmask32 test_mm256_mask_cmp_epi8_mask(__mmask32 __m, __m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_mask_cmp_epi8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<32 x !s8i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+  // LLVM-LABEL: test_mm256_mask_cmp_epi8_mask
+  // LLVM: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  // LLVM: and <32 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_mask_cmp_epi8_mask
+  // OGCG: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  // OGCG: and <32 x i1> %{{.*}}, %{{.*}}
+  return (__mmask32)_mm256_mask_cmp_epi8_mask(__m, __a, __b, 0);
+}
+
+__mmask64 test_mm512_cmp_epi8_mask(__m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_cmp_epi8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<64 x !s8i>, !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+  // LLVM-LABEL: test_mm512_cmp_epi8_mask
+  // LLVM: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_cmp_epi8_mask
+  // OGCG: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  return (__mmask64)_mm512_cmp_epi8_mask(__a, __b, 0);
+}
+
+__mmask64 test_mm512_mask_cmp_epi8_mask(__mmask64 __m, __m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_mask_cmp_epi8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<64 x !s8i>, !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+  // LLVM-LABEL: test_mm512_mask_cmp_epi8_mask
+  // LLVM: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  // LLVM: and <64 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_mask_cmp_epi8_mask
+  // OGCG: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  // OGCG: and <64 x i1> %{{.*}}, %{{.*}}
+  return (__mmask64)_mm512_mask_cmp_epi8_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm_cmp_epi16_mask(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epi16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_cmp_epi16_mask
+  // LLVM: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm_cmp_epi16_mask
+  // OGCG: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm_cmp_epi16_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm_mask_cmp_epi16_mask(__mmask8 __m, __m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_mask_cmp_epi16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_mask_cmp_epi16_mask
+  // LLVM: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // LLVM: and <8 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm_mask_cmp_epi16_mask
+  // OGCG: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // OGCG: and <8 x i1> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm_mask_cmp_epi16_mask(__m, __a, __b, 0);
+}
+
+__mmask16 test_mm256_cmp_epi16_mask(__m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_cmp_epi16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s16i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm256_cmp_epi16_mask
+  // LLVM: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_cmp_epi16_mask
+  // OGCG: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm256_cmp_epi16_mask(__a, __b, 0);
+}
+
+__mmask16 test_mm256_mask_cmp_epi16_mask(__mmask16 __m, __m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_mask_cmp_epi16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s16i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm256_mask_cmp_epi16_mask
+  // LLVM: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  // LLVM: and <16 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_mask_cmp_epi16_mask
+  // OGCG: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  // OGCG: and <16 x i1> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm256_mask_cmp_epi16_mask(__m, __a, __b, 0);
+}
+
+__mmask32 test_mm512_cmp_epi16_mask(__m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_cmp_epi16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<32 x !s16i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+  // LLVM-LABEL: test_mm512_cmp_epi16_mask
+  // LLVM: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_cmp_epi16_mask
+  // OGCG: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  return (__mmask32)_mm512_cmp_epi16_mask(__a, __b, 0);
+}
+
+__mmask32 test_mm512_mask_cmp_epi16_mask(__mmask32 __m, __m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_mask_cmp_epi16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<32 x !s16i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+  // LLVM-LABEL: test_mm512_mask_cmp_epi16_mask
+  // LLVM: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  // LLVM: and <32 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_mask_cmp_epi16_mask
+  // OGCG: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  // OGCG: and <32 x i1> %{{.*}}, %{{.*}}
+  return (__mmask32)_mm512_mask_cmp_epi16_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm_cmp_epi32_mask(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epi32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_cmp_epi32_mask
+  // LLVM: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-LABEL: test_mm_cmp_epi32_mask
+  // OGCG: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return (__mmask8)_mm_cmp_epi32_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm_mask_cmp_epi32_mask(__mmask8 __m, __m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_mask_cmp_epi32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_mask_cmp_epi32_mask
+  // LLVM: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // LLVM: bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: and <4 x i1> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-LABEL: test_mm_mask_cmp_epi32_mask
+  // OGCG: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: and <4 x i1> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return (__mmask8)_mm_mask_cmp_epi32_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm256_cmp_epi32_mask(__m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_cmp_epi32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s32i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm256_cmp_epi32_mask
+  // LLVM: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_cmp_epi32_mask
+  // OGCG: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm256_cmp_epi32_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm256_mask_cmp_epi32_mask(__mmask8 __m, __m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_mask_cmp_epi32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s32i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm256_mask_cmp_epi32_mask
+  // LLVM: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  // LLVM: and <8 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_mask_cmp_epi32_mask
+  // OGCG: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  // OGCG: and <8 x i1> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm256_mask_cmp_epi32_mask(__m, __a, __b, 0);
+}
+
+__mmask16 test_mm512_cmp_epi32_mask(__m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_cmp_epi32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s32i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm512_cmp_epi32_mask
+  // LLVM: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_cmp_epi32_mask
+  // OGCG: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm512_cmp_epi32_mask(__a, __b, 0);
+}
+
+__mmask16 test_mm512_mask_cmp_epi32_mask(__mmask16 __m, __m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_mask_cmp_epi32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s32i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm512_mask_cmp_epi32_mask
+  // LLVM: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // LLVM: and <16 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_mask_cmp_epi32_mask
+  // OGCG: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // OGCG: and <16 x i1> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm512_mask_cmp_epi32_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm_cmp_epi64_mask(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epi64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<2 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_cmp_epi64_mask
+  // LLVM: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  // OGCG-LABEL: test_mm_cmp_epi64_mask
+  // OGCG: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  return (__mmask8)_mm_cmp_epi64_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm_mask_cmp_epi64_mask(__mmask8 __m, __m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_mask_cmp_epi64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<2 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_mask_cmp_epi64_mask
+  // LLVM: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // LLVM: bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // LLVM: and <2 x i1> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  // OGCG-LABEL: test_mm_mask_cmp_epi64_mask
+  // OGCG: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // OGCG: and <2 x i1> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  return (__mmask8)_mm_mask_cmp_epi64_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm256_cmp_epi64_mask(__m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_cmp_epi64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<4 x !s64i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm256_cmp_epi64_mask
+  // LLVM: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-LABEL: test_mm256_cmp_epi64_mask
+  // OGCG: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return (__mmask8)_mm256_cmp_epi64_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm256_mask_cmp_epi64_mask(__mmask8 __m, __m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_mask_cmp_epi64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<4 x !s64i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm256_mask_cmp_epi64_mask
+  // LLVM: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // LLVM: bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: and <4 x i1> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-LABEL: test_mm256_mask_cmp_epi64_mask
+  // OGCG: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: and <4 x i1> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return (__mmask8)_mm256_mask_cmp_epi64_mask(__m, __a, __b, 0);
+}
+
+__mmask16 test_mm_cmp_epu8_mask(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epu8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm_cmp_epu8_mask
+  // LLVM: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm_cmp_epu8_mask
+  // OGCG: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm_cmp_epu8_mask(__a, __b, 0);
+}
+
+__mmask16 test_mm_mask_cmp_epu8_mask(__mmask16 __m, __m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_mask_cmp_epu8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s8i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm_mask_cmp_epu8_mask
+  // LLVM: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // LLVM: and <16 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm_mask_cmp_epu8_mask
+  // OGCG: icmp eq <16 x i8> %{{.*}}, %{{.*}}
+  // OGCG: and <16 x i1> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm_mask_cmp_epu8_mask(__m, __a, __b, 0);
+}
+
+__mmask32 test_mm256_cmp_epu8_mask(__m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_cmp_epu8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<32 x !s8i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+  // LLVM-LABEL: test_mm256_cmp_epu8_mask
+  // LLVM: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_cmp_epu8_mask
+  // OGCG: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  return (__mmask32)_mm256_cmp_epu8_mask(__a, __b, 0);
+}
+
+__mmask32 test_mm256_mask_cmp_epu8_mask(__mmask32 __m, __m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_mask_cmp_epu8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<32 x !s8i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+  // LLVM-LABEL: test_mm256_mask_cmp_epu8_mask
+  // LLVM: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  // LLVM: and <32 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_mask_cmp_epu8_mask
+  // OGCG: icmp eq <32 x i8> %{{.*}}, %{{.*}}
+  // OGCG: and <32 x i1> %{{.*}}, %{{.*}}
+  return (__mmask32)_mm256_mask_cmp_epu8_mask(__m, __a, __b, 0);
+}
+
+__mmask64 test_mm512_cmp_epu8_mask(__m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_cmp_epu8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<64 x !s8i>, !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+  // LLVM-LABEL: test_mm512_cmp_epu8_mask
+  // LLVM: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_cmp_epu8_mask
+  // OGCG: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  return (__mmask64)_mm512_cmp_epu8_mask(__a, __b, 0);
+}
+
+__mmask64 test_mm512_mask_cmp_epu8_mask(__mmask64 __m, __m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_mask_cmp_epu8_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<64 x !s8i>, !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+  // LLVM-LABEL: test_mm512_mask_cmp_epu8_mask
+  // LLVM: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  // LLVM: and <64 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_mask_cmp_epu8_mask
+  // OGCG: icmp eq <64 x i8> %{{.*}}, %{{.*}}
+  // OGCG: and <64 x i1> %{{.*}}, %{{.*}}
+  return (__mmask64)_mm512_mask_cmp_epu8_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm_cmp_epu16_mask(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epu16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_cmp_epu16_mask
+  // LLVM: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm_cmp_epu16_mask
+  // OGCG: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm_cmp_epu16_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm_mask_cmp_epu16_mask(__mmask8 __m, __m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_mask_cmp_epu16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s16i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_mask_cmp_epu16_mask
+  // LLVM: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // LLVM: and <8 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm_mask_cmp_epu16_mask
+  // OGCG: icmp eq <8 x i16> %{{.*}}, %{{.*}}
+  // OGCG: and <8 x i1> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm_mask_cmp_epu16_mask(__m, __a, __b, 0);
+}
+
+__mmask16 test_mm256_cmp_epu16_mask(__m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_cmp_epu16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s16i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm256_cmp_epu16_mask
+  // LLVM: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_cmp_epu16_mask
+  // OGCG: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm256_cmp_epu16_mask(__a, __b, 0);
+}
+
+__mmask16 test_mm256_mask_cmp_epu16_mask(__mmask16 __m, __m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_mask_cmp_epu16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s16i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm256_mask_cmp_epu16_mask
+  // LLVM: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  // LLVM: and <16 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_mask_cmp_epu16_mask
+  // OGCG: icmp eq <16 x i16> %{{.*}}, %{{.*}}
+  // OGCG: and <16 x i1> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm256_mask_cmp_epu16_mask(__m, __a, __b, 0);
+}
+
+__mmask32 test_mm512_cmp_epu16_mask(__m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_cmp_epu16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<32 x !s16i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+  // LLVM-LABEL: test_mm512_cmp_epu16_mask
+  // LLVM: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_cmp_epu16_mask
+  // OGCG: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  return (__mmask32)_mm512_cmp_epu16_mask(__a, __b, 0);
+}
+
+__mmask32 test_mm512_mask_cmp_epu16_mask(__mmask32 __m, __m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_mask_cmp_epu16_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<32 x !s16i>, !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+  // LLVM-LABEL: test_mm512_mask_cmp_epu16_mask
+  // LLVM: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  // LLVM: and <32 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_mask_cmp_epu16_mask
+  // OGCG: icmp eq <32 x i16> %{{.*}}, %{{.*}}
+  // OGCG: and <32 x i1> %{{.*}}, %{{.*}}
+  return (__mmask32)_mm512_mask_cmp_epu16_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm_cmp_epu32_mask(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epu32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_cmp_epu32_mask
+  // LLVM: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-LABEL: test_mm_cmp_epu32_mask
+  // OGCG: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return (__mmask8)_mm_cmp_epu32_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm_mask_cmp_epu32_mask(__mmask8 __m, __m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_mask_cmp_epu32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<4 x !s32i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_mask_cmp_epu32_mask
+  // LLVM: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // LLVM: bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: and <4 x i1> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-LABEL: test_mm_mask_cmp_epu32_mask
+  // OGCG: icmp eq <4 x i32> %{{.*}}, %{{.*}}
+  // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: and <4 x i1> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return (__mmask8)_mm_mask_cmp_epu32_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm256_cmp_epu32_mask(__m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_cmp_epu32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s32i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm256_cmp_epu32_mask
+  // LLVM: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_cmp_epu32_mask
+  // OGCG: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm256_cmp_epu32_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm256_mask_cmp_epu32_mask(__mmask8 __m, __m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_mask_cmp_epu32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s32i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm256_mask_cmp_epu32_mask
+  // LLVM: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  // LLVM: and <8 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm256_mask_cmp_epu32_mask
+  // OGCG: icmp eq <8 x i32> %{{.*}}, %{{.*}}
+  // OGCG: and <8 x i1> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm256_mask_cmp_epu32_mask(__m, __a, __b, 0);
+}
+
+__mmask16 test_mm512_cmp_epu32_mask(__m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_cmp_epu32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s32i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm512_cmp_epu32_mask
+  // LLVM: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_cmp_epu32_mask
+  // OGCG: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm512_cmp_epu32_mask(__a, __b, 0);
+}
+
+__mmask16 test_mm512_mask_cmp_epu32_mask(__mmask16 __m, __m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_mask_cmp_epu32_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<16 x !s32i>, !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<16 x !cir.int<s, 1>> -> !u16i
+  // LLVM-LABEL: test_mm512_mask_cmp_epu32_mask
+  // LLVM: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // LLVM: and <16 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_mask_cmp_epu32_mask
+  // OGCG: icmp eq <16 x i32> %{{.*}}, %{{.*}}
+  // OGCG: and <16 x i1> %{{.*}}, %{{.*}}
+  return (__mmask16)_mm512_mask_cmp_epu32_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm_cmp_epu64_mask(__m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_cmp_epu64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<2 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_cmp_epu64_mask
+  // LLVM: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  // OGCG-LABEL: test_mm_cmp_epu64_mask
+  // OGCG: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  return (__mmask8)_mm_cmp_epu64_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm_mask_cmp_epu64_mask(__mmask8 __m, __m128i __a, __m128i __b) {
+  // CIR-LABEL: test_mm_mask_cmp_epu64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<2 x !s64i>, !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<2 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<2 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm_mask_cmp_epu64_mask
+  // LLVM: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // LLVM: bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // LLVM: and <2 x i1> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  // OGCG-LABEL: test_mm_mask_cmp_epu64_mask
+  // OGCG: icmp eq <2 x i64> %{{.*}}, %{{.*}}
+  // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x i32> <i32 0, i32 1>
+  // OGCG: and <2 x i1> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <2 x i1> %{{.*}}, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+  return (__mmask8)_mm_mask_cmp_epu64_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm256_cmp_epu64_mask(__m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_cmp_epu64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<4 x !s64i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm256_cmp_epu64_mask
+  // LLVM: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-LABEL: test_mm256_cmp_epu64_mask
+  // OGCG: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return (__mmask8)_mm256_cmp_epu64_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm256_mask_cmp_epu64_mask(__mmask8 __m, __m256i __a, __m256i __b) {
+  // CIR-LABEL: test_mm256_mask_cmp_epu64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<4 x !s64i>, !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i] : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.const #cir.zero : !cir.vector<4 x !cir.int<s, 1>>
+  // CIR: cir.vec.shuffle({{%.*}}, {{%.*}} : !cir.vector<4 x !cir.int<s, 1>>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm256_mask_cmp_epu64_mask
+  // LLVM: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // LLVM: bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: and <4 x i1> %{{.*}}, %{{.*}}
+  // LLVM: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  // OGCG-LABEL: test_mm256_mask_cmp_epu64_mask
+  // OGCG: icmp eq <4 x i64> %{{.*}}, %{{.*}}
+  // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: and <4 x i1> %{{.*}}, %{{.*}}
+  // OGCG: shufflevector <4 x i1> %{{.*}}, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  return (__mmask8)_mm256_mask_cmp_epu64_mask(__m, __a, __b, 0);
+}
+
+__mmask8 test_mm512_cmp_epu64_mask(__m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_cmp_epu64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s64i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm512_cmp_epu64_mask
+  // LLVM: icmp eq <8 x i64> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_cmp_epu64_mask
+  // OGCG: icmp eq <8 x i64> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm512_cmp_epu64_mask(__a, __b, 0);
+}
+
+__mmask8 test_mm512_mask_cmp_epu64_mask(__mmask8 __m, __m512i __a, __m512i __b) {
+  // CIR-LABEL: test_mm512_mask_cmp_epu64_mask
+  // CIR: cir.vec.cmp(eq, {{%.*}}, {{%.*}}) : !cir.vector<8 x !s64i>, !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.binop(and, {{%.*}}, {{%.*}}) : !cir.vector<8 x !cir.int<s, 1>>
+  // CIR: cir.cast bitcast {{%.*}} : !cir.vector<8 x !cir.int<s, 1>> -> !u8i
+  // LLVM-LABEL: test_mm512_mask_cmp_epu64_mask
+  // LLVM: icmp eq <8 x i64> %{{.*}}, %{{.*}}
+  // LLVM: and <8 x i1> %{{.*}}, %{{.*}}
+  // OGCG-LABEL: test_mm512_mask_cmp_epu64_mask
+  // OGCG: icmp eq <8 x i64> %{{.*}}, %{{.*}}
+  // OGCG: and <8 x i1> %{{.*}}, %{{.*}}
+  return (__mmask8)_mm512_mask_cmp_epu64_mask(__m, __a, __b, 0);
+}