[clang] [CIR][X86] Implement lowering for AVX512 mask builtins (PR #169185)
via cfe-commits
cfe-commits at lists.llvm.org
Sat Nov 22 11:21:31 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clangir
Author: AIT (GeneraluseAI)
<details>
<summary>Changes</summary>
Part of [#<!-- -->167765 ](https://github.com/llvm/llvm-project/issues/167765)
(kadd, kand, kandn, kor, kxor, knot, kmov)
adds CIR codegen support for AVX512 mask operations on X86, including kadd, kand, kandn, kor, kxor, knot, and kmov in all supported mask widths.
also adds comprehensive CIR/LLVM/OGCG tests for AVX512F, AVX512DQ, and AVX512BW to validate the lowering behavior.
---
Patch is 35.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169185.diff
4 Files Affected:
- (modified) clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp (+90-3)
- (added) clang/test/CIR/CodeGen/X86/avx512bw-builtins.c (+308)
- (added) clang/test/CIR/CodeGen/X86/avx512dq-builtins.c (+213)
- (added) clang/test/CIR/CodeGen/X86/avx512f-builtins.c (+143)
``````````diff
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index a30c79a83751a..1a19fd204f42c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -68,6 +68,45 @@ static mlir::Value emitVectorFCmp(CIRGenBuilderTy &builder,
return bitCast;
}
+// Convert the mask from an integer type to a vector of i1.
+static mlir::Value getMaskVecValue(CIRGenFunction &cgf, const CallExpr *expr,
+ mlir::Value mask, unsigned numElems) {
+ auto &builder = cgf.getBuilder();
+
+ cir::VectorType maskTy =
+ cir::VectorType::get(cgf.getBuilder().getSIntNTy(1),
+ cast<cir::IntType>(mask.getType()).getWidth());
+ mlir::Value maskVec = builder.createBitcast(mask, maskTy);
+
+ // If we have less than 8 elements, then the starting mask was an i8 and
+ // we need to extract down to the right number of elements.
+ if (numElems < 8) {
+ SmallVector<mlir::Attribute, 4> indices;
+ mlir::Type i32Ty = builder.getI32Type();
+ for (auto i : llvm::seq<unsigned>(0, numElems))
+ indices.push_back(cir::IntAttr::get(i32Ty, i));
+ maskVec = builder.createVecShuffle(cgf.getLoc(expr->getExprLoc()), maskVec,
+ maskVec, indices);
+ }
+ return maskVec;
+}
+
+static mlir::Value emitX86MaskLogic(CIRGenFunction &cgf, const CallExpr *expr,
+ cir::BinOpKind opc,
+ SmallVectorImpl<mlir::Value> &ops,
+ bool InvertLHS = false) {
+ CIRGenBuilderTy &builder = cgf.getBuilder();
+ unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth();
+ mlir::Value LHS = getMaskVecValue(cgf, expr, ops[0], numElts);
+ mlir::Value RHS = getMaskVecValue(cgf, expr, ops[1], numElts);
+
+ if (InvertLHS)
+ LHS = builder.createNot(LHS);
+ return builder.createBitcast(
+ builder.createBinop(cgf.getLoc(expr->getExprLoc()), LHS, opc, RHS),
+ ops[0].getType());
+}
+
mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
const CallExpr *expr) {
if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -672,38 +711,86 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_ktestzsi:
case X86::BI__builtin_ia32_ktestcdi:
case X86::BI__builtin_ia32_ktestzdi:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return {};
case X86::BI__builtin_ia32_kaddqi:
case X86::BI__builtin_ia32_kaddhi:
case X86::BI__builtin_ia32_kaddsi:
- case X86::BI__builtin_ia32_kadddi:
+ case X86::BI__builtin_ia32_kadddi: {
+ std::string intrinsicName;
+ switch (builtinID) {
+ default:
+ llvm_unreachable("Unsupported intrinsic!");
+ case X86::BI__builtin_ia32_kaddqi:
+ intrinsicName = "x86.avx512.kadd.b";
+ break;
+ case X86::BI__builtin_ia32_kaddhi:
+ intrinsicName = "x86.avx512.kadd.w";
+ break;
+ case X86::BI__builtin_ia32_kaddsi:
+ intrinsicName = "x86.avx512.kadd.d";
+ break;
+ case X86::BI__builtin_ia32_kadddi:
+ intrinsicName = "x86.avx512.kadd.q";
+ break;
+ }
+ auto intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value lhsVec = getMaskVecValue(*this, expr, ops[0], numElts);
+ mlir::Value rhsVec = getMaskVecValue(*this, expr, ops[1], numElts);
+ mlir::Type vecTy = lhsVec.getType();
+ mlir::Value resVec = emitIntrinsicCallOp(*this, expr, intrinsicName, vecTy,
+ mlir::ValueRange{lhsVec, rhsVec});
+ return builder.createBitcast(resVec, ops[0].getType());
+ }
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
case X86::BI__builtin_ia32_kanddi:
+ return emitX86MaskLogic(*this, expr, cir::BinOpKind::And, ops);
case X86::BI__builtin_ia32_kandnqi:
case X86::BI__builtin_ia32_kandnhi:
case X86::BI__builtin_ia32_kandnsi:
case X86::BI__builtin_ia32_kandndi:
+ return emitX86MaskLogic(*this, expr, cir::BinOpKind::And, ops, true);
case X86::BI__builtin_ia32_korqi:
case X86::BI__builtin_ia32_korhi:
case X86::BI__builtin_ia32_korsi:
case X86::BI__builtin_ia32_kordi:
+ return emitX86MaskLogic(*this, expr, cir::BinOpKind::Or, ops);
case X86::BI__builtin_ia32_kxnorqi:
case X86::BI__builtin_ia32_kxnorhi:
case X86::BI__builtin_ia32_kxnorsi:
case X86::BI__builtin_ia32_kxnordi:
+ return emitX86MaskLogic(*this, expr, cir::BinOpKind::Xor, ops, true);
case X86::BI__builtin_ia32_kxorqi:
case X86::BI__builtin_ia32_kxorhi:
case X86::BI__builtin_ia32_kxorsi:
case X86::BI__builtin_ia32_kxordi:
+ return emitX86MaskLogic(*this, expr, cir::BinOpKind::Xor, ops);
case X86::BI__builtin_ia32_knotqi:
case X86::BI__builtin_ia32_knothi:
case X86::BI__builtin_ia32_knotsi:
- case X86::BI__builtin_ia32_knotdi:
+ case X86::BI__builtin_ia32_knotdi: {
+ cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value resVec = getMaskVecValue(*this, expr, ops[0], numElts);
+ return builder.createBitcast(builder.createNot(resVec), ops[0].getType());
+ }
case X86::BI__builtin_ia32_kmovb:
case X86::BI__builtin_ia32_kmovw:
case X86::BI__builtin_ia32_kmovd:
- case X86::BI__builtin_ia32_kmovq:
+ case X86::BI__builtin_ia32_kmovq: {
+ // Bitcast to vXi1 type and then back to integer. This gets the mask
+ // register type into the IR, but might be optimized out depending on
+ // what's around it.
+ cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
+ unsigned numElts = intTy.getWidth();
+ mlir::Value resVec = getMaskVecValue(*this, expr, ops[0], numElts);
+ return builder.createBitcast(resVec, ops[0].getType());
+ }
case X86::BI__builtin_ia32_kunpckdi:
case X86::BI__builtin_ia32_kunpcksi:
case X86::BI__builtin_ia32_kunpckhi:
diff --git a/clang/test/CIR/CodeGen/X86/avx512bw-builtins.c b/clang/test/CIR/CodeGen/X86/avx512bw-builtins.c
new file mode 100644
index 0000000000000..0d5aa2918e922
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/avx512bw-builtins.c
@@ -0,0 +1,308 @@
+ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror
+ // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror
+ // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror
+ // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror
+ // RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG
+
+ #include <immintrin.h>
+
+__mmask32 test_kadd_mask32(__mmask32 A, __mmask32 B) {
+ // CIR-LABEL: _kadd_mask32
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.kadd.d"
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+
+ // LLVM-LABEL: _kadd_mask32
+ // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: [[RES:%.*]] = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> [[L]], <32 x i1> [[R]])
+ // LLVM: bitcast <32 x i1> [[RES]] to i32
+
+ // OGCG-LABEL: _kadd_mask32
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: call <32 x i1> @llvm.x86.avx512.kadd.d
+ // OGCG: bitcast <32 x i1> {{.*}} to i32
+ return _kadd_mask32(A, B);
+}
+
+__mmask64 test_kadd_mask64(__mmask64 A, __mmask64 B) {
+ // CIR-LABEL: _kadd_mask64
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.kadd.q"
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+
+ // LLVM-LABEL: _kadd_mask64
+ // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: [[RES:%.*]] = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> [[L]], <64 x i1> [[R]])
+ // LLVM: bitcast <64 x i1> [[RES]] to i64
+
+ // OGCG-LABEL: _kadd_mask64
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: call <64 x i1> @llvm.x86.avx512.kadd.q
+ // OGCG: bitcast <64 x i1> {{.*}} to i64
+ return _kadd_mask64(A, B);
+}
+
+__mmask32 test_kand_mask32(__mmask32 A, __mmask32 B) {
+ // CIR-LABEL: _kand_mask32
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+
+ // LLVM-LABEL: _kand_mask32
+ // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: [[RES:%.*]] = and <32 x i1> [[L]], [[R]]
+ // LLVM: bitcast <32 x i1> [[RES]] to i32
+
+ // OGCG-LABEL: _kand_mask32
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: and <32 x i1>
+ // OGCG: bitcast <32 x i1> {{.*}} to i32
+ return _kand_mask32(A, B);
+}
+
+__mmask64 test_kand_mask64(__mmask64 A, __mmask64 B) {
+ // CIR-LABEL: _kand_mask64
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+
+ // LLVM-LABEL: _kand_mask64
+ // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: [[RES:%.*]] = and <64 x i1> [[L]], [[R]]
+ // LLVM: bitcast <64 x i1> [[RES]] to i64
+
+ // OGCG-LABEL: _kand_mask64
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: and <64 x i1>
+ // OGCG: bitcast <64 x i1> {{.*}} to i64
+ return _kand_mask64(A, B);
+}
+
+__mmask32 test_kandn_mask32(__mmask32 A, __mmask32 B) {
+ // CIR-LABEL: _kandn_mask32
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.unary(not, {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+
+ // LLVM-LABEL: _kandn_mask32
+ // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: xor <32 x i1> [[L]], {{.*}}
+ // LLVM: and <32 x i1>
+ // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+ // OGCG-LABEL: _kandn_mask32
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: xor <32 x i1>
+ // OGCG: and <32 x i1>
+ // OGCG: bitcast <32 x i1> {{.*}} to i32
+ return _kandn_mask32(A, B);
+}
+
+__mmask64 test_kandn_mask64(__mmask64 A, __mmask64 B) {
+ // CIR-LABEL: _kandn_mask64
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.unary(not, {{.*}}) : !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+
+ // LLVM-LABEL: _kandn_mask64
+ // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: xor <64 x i1> [[L]], {{.*}}
+ // LLVM: and <64 x i1>
+ // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+ // OGCG-LABEL: _kandn_mask64
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: xor <64 x i1>
+ // OGCG: and <64 x i1>
+ // OGCG: bitcast <64 x i1> {{.*}} to i64
+ return _kandn_mask64(A, B);
+}
+
+__mmask32 test_kor_mask32(__mmask32 A, __mmask32 B) {
+ // CIR-LABEL: _kor_mask32
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.binop(or, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+
+ // LLVM-LABEL: _kor_mask32
+ // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: or <32 x i1> [[L]], [[R]]
+ // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+ // OGCG-LABEL: _kor_mask32
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: or <32 x i1>
+ // OGCG: bitcast <32 x i1> {{.*}} to i32
+ return _kor_mask32(A, B);
+}
+
+__mmask64 test_kor_mask64(__mmask64 A, __mmask64 B) {
+ // CIR-LABEL: _kor_mask64
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.binop(or, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+
+ // LLVM-LABEL: _kor_mask64
+ // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: or <64 x i1> [[L]], [[R]]
+ // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+ // OGCG-LABEL: _kor_mask64
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: or <64 x i1>
+ // OGCG: bitcast <64 x i1> {{.*}} to i64
+ return _kor_mask64(A, B);
+}
+
+__mmask32 test_kxor_mask32(__mmask32 A, __mmask32 B) {
+ // CIR-LABEL: _kxor_mask32
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+
+ // LLVM-LABEL: _kxor_mask32
+ // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: xor <32 x i1> [[L]], [[R]]
+ // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+ // OGCG-LABEL: _kxor_mask32
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: xor <32 x i1>
+ // OGCG: bitcast <32 x i1> {{.*}} to i32
+ return _kxor_mask32(A, B);
+}
+
+__mmask64 test_kxor_mask64(__mmask64 A, __mmask64 B) {
+ // CIR-LABEL: _kxor_mask64
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+
+ // LLVM-LABEL: _kxor_mask64
+ // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: xor <64 x i1> [[L]], [[R]]
+ // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+ // OGCG-LABEL: _kxor_mask64
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: xor <64 x i1>
+ // OGCG: bitcast <64 x i1> {{.*}} to i64
+ return _kxor_mask64(A, B);
+}
+
+__mmask32 test_knot_mask32(__mmask32 A) {
+ // CIR-LABEL: _knot_mask32
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.unary(not, {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+
+ // LLVM-LABEL: _knot_mask32
+ // LLVM: bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: xor <32 x i1>
+ // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+ // OGCG-LABEL: _knot_mask32
+ // OGCG: xor <32 x i1>
+
+ return _knot_mask32(A);
+}
+
+__mmask64 test_knot_mask64(__mmask64 A) {
+ // CIR-LABEL: _knot_mask64
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.unary(not, {{.*}}) : !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+
+ // LLVM-LABEL: _knot_mask64
+ // LLVM: bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: xor <64 x i1>
+ // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+ // OGCG-LABEL: _knot_mask64
+ // OGCG: xor <64 x i1>
+
+ return _knot_mask64(A);
+}
+
+// Multiple user-level mask helpers inline to this same kmov builtin.
+// CIR does not implement any special lowering for those helpers.
+//
+// Therefore, testing the builtin (__builtin_ia32_kmov*) directly is
+// sufficient to cover the CIR lowering behavior. Testing each helper
+// individually would add no new CIR paths.
+
+__mmask32 test_kmov_d(__mmask32 A) {
+ // CIR-LABEL: test_kmov_d
+ // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<s, 1>> -> !u32i
+
+ // LLVM-LABEL: test_kmov_d
+ // LLVM: bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+ // OGCG-LABEL: test_kmov_d
+ // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: bitcast <32 x i1> {{.*}} to i32
+
+ return __builtin_ia32_kmovd(A);
+}
+
+// Multiple user-level mask helpers inline to this same kmov builtin.
+// CIR does not implement any special lowering for those helpers.
+//
+// Therefore, testing the builtin (__builtin_ia32_kmov*) directly is
+// sufficient to cover the CIR lowering behavior. Testing each helper
+// individually would add no new CIR paths.
+
+__mmask64 test_kmov_q(__mmask64 A) {
+ // CIR-LABEL: test_kmov_q
+ // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<s, 1>> -> !u64i
+
+ // LLVM-LABEL: test_kmov_q
+ // LLVM: bitcast i64 %{{.*}} to <64 x i1>
+ // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+ // OGCG-LABEL: test_kmov_q
+ // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+ // OGCG: bitcast <64 x i1> {{.*}} to i64
+
+ return __builtin_ia32_kmovq(A);
+}
diff --git a/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c b/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c
new file mode 100644
index 0000000000000..21c255e28f3f3
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/avx512dq-builtins.c
@@ -0,0 +1,213 @@
+ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-cir -o %t.cir -Wall -Werror
+ // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+ // RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-llvm -o %t.ll -Wall -Werror
+ // RUN: FileCheck...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/169185
More information about the cfe-commits
mailing list