[clang] c6fc6ad - [CIR][X86] Add support for `intersect` builtins (#172554)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 12 17:04:05 PST 2026
Author: Vishruth Thimmaiah
Date: 2026-01-12T17:04:01-08:00
New Revision: c6fc6adb7e32bf6fd77e3e73ad74752881e131fb
URL: https://github.com/llvm/llvm-project/commit/c6fc6adb7e32bf6fd77e3e73ad74752881e131fb
DIFF: https://github.com/llvm/llvm-project/commit/c6fc6adb7e32bf6fd77e3e73ad74752881e131fb.diff
LOG: [CIR][X86] Add support for `intersect` builtins (#172554)
adds support for the
`__builtin_ia32_vp2intersect_d`/`__builtin_ia32_vp2intersect_q` x86
builtins.
Part of #167765
---------
Signed-off-by: vishruth-thimmaiah <vishruththimmaiah at gmail.com>
Added:
clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c
clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c
Modified:
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Removed:
################################################################################
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f88e57aca6a08..cc3af713bc8c2 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -264,15 +264,15 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc,
mlir::ValueRange{lhsVec, rhsVec});
}
-// TODO: The cgf parameter should be removed when all the NYI cases are
-// implemented.
-static std::optional<mlir::Value>
-emitX86MaskedCompareResult(CIRGenFunction &cgf, CIRGenBuilderTy &builder,
- mlir::Value cmp, unsigned numElts,
- mlir::Value maskIn, mlir::Location loc) {
+static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder,
+ mlir::Value cmp, unsigned numElts,
+ mlir::Value maskIn,
+ mlir::Location loc) {
if (maskIn) {
- cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult");
- return {};
+ auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(maskIn.getDefiningOp());
+ if (!c || !c.isAllOnesValue())
+ cmp = builder.createAnd(loc, cmp,
+ getMaskVecValue(builder, loc, maskIn, numElts));
}
if (numElts < 8) {
llvm::SmallVector<mlir::Attribute> indices;
@@ -340,7 +340,7 @@ emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy &builder, unsigned cc,
if (ops.size() == 4)
maskIn = ops[3];
- return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc);
+ return emitX86MaskedCompareResult(builder, cmp, numElts, maskIn, loc);
}
// TODO: The cgf parameter should be removed when all the NYI cases are
@@ -1840,12 +1840,66 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_fpclasspd128_mask:
case X86::BI__builtin_ia32_fpclasspd256_mask:
case X86::BI__builtin_ia32_fpclasspd512_mask:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
case X86::BI__builtin_ia32_vp2intersect_q_512:
case X86::BI__builtin_ia32_vp2intersect_q_256:
case X86::BI__builtin_ia32_vp2intersect_q_128:
case X86::BI__builtin_ia32_vp2intersect_d_512:
case X86::BI__builtin_ia32_vp2intersect_d_256:
- case X86::BI__builtin_ia32_vp2intersect_d_128:
+ case X86::BI__builtin_ia32_vp2intersect_d_128: {
+ unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ StringRef intrinsicName;
+
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unexpected builtin");
+ case X86::BI__builtin_ia32_vp2intersect_q_512:
+ intrinsicName = "x86.avx512.vp2intersect.q.512";
+ break;
+ case X86::BI__builtin_ia32_vp2intersect_q_256:
+ intrinsicName = "x86.avx512.vp2intersect.q.256";
+ break;
+ case X86::BI__builtin_ia32_vp2intersect_q_128:
+ intrinsicName = "x86.avx512.vp2intersect.q.128";
+ break;
+ case X86::BI__builtin_ia32_vp2intersect_d_512:
+ intrinsicName = "x86.avx512.vp2intersect.d.512";
+ break;
+ case X86::BI__builtin_ia32_vp2intersect_d_256:
+ intrinsicName = "x86.avx512.vp2intersect.d.256";
+ break;
+ case X86::BI__builtin_ia32_vp2intersect_d_128:
+ intrinsicName = "x86.avx512.vp2intersect.d.128";
+ break;
+ }
+
+ auto resVector = cir::VectorType::get(builder.getBoolTy(), numElts);
+
+ cir::RecordType resRecord =
+ cir::RecordType::get(&getMLIRContext(), {resVector, resVector}, false,
+ false, cir::RecordType::RecordKind::Struct);
+
+ mlir::Value call =
+ emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), intrinsicName,
+ resRecord, mlir::ValueRange{ops[0], ops[1]});
+ mlir::Value result =
+ cir::ExtractMemberOp::create(builder, loc, resVector, call, 0);
+ result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, loc);
+ Address addr = Address(
+ ops[2], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8)));
+ builder.createStore(loc, result, addr);
+
+ result = cir::ExtractMemberOp::create(builder, loc, resVector, call, 1);
+ result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, loc);
+ addr = Address(ops[3],
+ clang::CharUnits::fromQuantity(std::max(1U, numElts / 8)));
+ builder.createStore(loc, result, addr);
+ return mlir::Value{};
+ }
case X86::BI__builtin_ia32_vpmultishiftqb128:
case X86::BI__builtin_ia32_vpmultishiftqb256:
case X86::BI__builtin_ia32_vpmultishiftqb512:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c
new file mode 100644
index 0000000000000..6882d2e91961e
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c
@@ -0,0 +1,161 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+
+#include <immintrin.h>
+
+// CIR: !rec_anon_struct = !cir.record<struct {!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.bool>}>
+// CIR: !rec_anon_struct1 = !cir.record<struct {!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.bool>}>
+// CIR: !rec_anon_struct2 = !cir.record<struct {!cir.vector<2 x !cir.bool>, !cir.vector<2 x !cir.bool>}>
+void test_mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) {
+ // CIR-LABEL: mm256_2intersect_epi32
+ // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.256" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s32i>, !cir.vector<8 x !s32i>) -> !rec_anon_struct
+ // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct -> !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct -> !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+ // LLVM-LABEL: test_mm256_2intersect_epi32
+ // LLVM: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // LLVM: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0
+ // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8
+ // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // LLVM: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1
+ // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8
+ // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+ // OGCG-LABEL: test_mm256_2intersect_epi32
+ // OGCG: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ // OGCG: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0
+ // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8
+ // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // OGCG: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1
+ // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8
+ // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+ _mm256_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) {
+ // CIR-LABEL: mm256_2intersect_epi64
+ // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.256" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s64i>, !cir.vector<4 x !s64i>) -> !rec_anon_struct1
+ // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool>
+ // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool>
+ // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool>
+ // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool>
+ // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+ // LLVM-LABEL: test_mm256_2intersect_epi64
+ // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+ // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+ // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+ // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+ // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+ // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+ // OGCG-LABEL: test_mm256_2intersect_epi64
+ // OGCG: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+ // OGCG: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+ // OGCG: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+ // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // OGCG: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+ // OGCG: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+ // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+ _mm256_2intersect_epi64(a, b, m0, m1);
+}
+
+void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) {
+ // CIR-LABEL: mm_2intersect_epi32
+ // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.128" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !rec_anon_struct1
+ // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool>
+ // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool>
+ // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool>
+ // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool>
+ // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+ // LLVM-LABEL: test_mm_2intersect_epi32
+ // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+ // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+ // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+ // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+ // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+ // OGCG-LABEL: test_mm_2intersect_epi32
+ // OGCG: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // OGCG: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0
+ // OGCG: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+ // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // OGCG: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1
+ // OGCG: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+ // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+ _mm_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) {
+ // CIR-LABEL: mm_2intersect_epi64
+ // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.128" %{{.*}}, %{{.*}} : (!cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct2
+ // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool>
+ // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool>
+ // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool>
+ // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool>
+ // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+ // LLVM-LABEL: test_mm_2intersect_epi64
+ // LLVM: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // LLVM: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0
+ // LLVM: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+ // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // LLVM: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1
+ // LLVM: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+ // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+ // OGCG-LABEL: test_mm_2intersect_epi64
+ // OGCG: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // OGCG: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0
+ // OGCG: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8
+ // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // OGCG: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1
+ // OGCG: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8
+ // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+ _mm_2intersect_epi64(a, b, m0, m1);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c
new file mode 100644
index 0000000000000..384477454c43e
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c
@@ -0,0 +1,77 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+
+#include <immintrin.h>
+
+
+// CIR: !rec_anon_struct = !cir.record<struct {!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.bool>}>
+// CIR: !rec_anon_struct1 = !cir.record<struct {!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.bool>}>
+void test_mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *m0, __mmask16 *m1) {
+ // CIR-LABEL: mm512_2intersect_epi32
+ // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.512" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>) -> !rec_anon_struct
+ // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct -> !cir.vector<16 x !cir.bool>
+ // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<16 x !cir.bool> -> !u16i
+ // CIR: cir.store align(2) %[[CAST1]], %{{.*}} : !u16i, !cir.ptr<!u16i>
+ // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct -> !cir.vector<16 x !cir.bool>
+ // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<16 x !cir.bool> -> !u16i
+ // CIR: cir.store align(2) %[[CAST2]], %{{.*}} : !u16i, !cir.ptr<!u16i>
+
+ // LLVM-LABEL: test_mm512_2intersect_epi32
+ // LLVM: %[[RES:.*]] = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ // LLVM: %[[VAL1:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 0
+ // LLVM: %[[CAST1:.*]] = bitcast <16 x i1> %[[VAL1]] to i16
+ // LLVM: store i16 %[[CAST1]], ptr %{{.*}}, align 2
+ // LLVM: %[[VAL2:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 1
+ // LLVM: %[[CAST2:.*]] = bitcast <16 x i1> %[[VAL2]] to i16
+ // LLVM: store i16 %[[CAST2]], ptr %{{.*}}, align 2
+
+ // OGCG-LABEL: test_mm512_2intersect_epi32
+ // OGCG: %[[RES:.*]] = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ // OGCG: %[[VAL1:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 0
+ // OGCG: %[[CAST1:.*]] = bitcast <16 x i1> %[[VAL1]] to i16
+ // OGCG: store i16 %[[CAST1]], ptr %{{.*}}, align 2
+ // OGCG: %[[VAL2:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 1
+ // OGCG: %[[CAST2:.*]] = bitcast <16 x i1> %[[VAL2]] to i16
+ // OGCG: store i16 %[[CAST2]], ptr %{{.*}}, align 2
+ _mm512_2intersect_epi32(a, b, m0, m1);
+}
+
+void test_mm512_2intersect_epi64(__m512i a, __m512i b, __mmask8 *m0, __mmask8 *m1) {
+ // CIR-LABEL: mm512_2intersect_epi64
+ // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.512" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !rec_anon_struct1
+ // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+ // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool>
+ // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x !cir.bool> -> !u8i
+ // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i>
+
+ // LLVM-LABEL: test_mm512_2intersect_epi64
+ // LLVM: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+ // LLVM: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 0
+ // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8
+ // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // LLVM: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 1
+ // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8
+ // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+
+ // OGCG-LABEL: test_mm512_2intersect_epi64
+ // OGCG: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}})
+ // OGCG: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 0
+ // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8
+ // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1
+ // OGCG: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 1
+ // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8
+ // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1
+ _mm512_2intersect_epi64(a, b, m0, m1);
+}
More information about the cfe-commits
mailing list