[clang] [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (PR #173802)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 2 11:57:30 PST 2026
https://github.com/DannyDaoBoYang updated https://github.com/llvm/llvm-project/pull/173802
>From df2937fa546f52af32f0db7d5d94ff5611b59f71 Mon Sep 17 00:00:00 2001
From: DannyDaoBoYang <34634047+DannyDaoBoYang at users.noreply.github.com>
Date: Sun, 28 Dec 2025 17:00:49 -0500
Subject: [PATCH 1/2] Add support for pmovqd512_mask and pmovwb512_mask
Add CIR support for pmovqd512_mask and pmovwb512_mask. And a minor type cast fix in CirGenCleanup.cpp that caused compile error on Windows.
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 16 ++++++++++
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 5 +++-
clang/lib/CIR/CodeGen/CIRGenCleanup.cpp | 2 +-
.../X86/pmovqd-mask-builtins.c | 29 +++++++++++++++++++
4 files changed, 50 insertions(+), 2 deletions(-)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index cc28941aaa079..481d06091f012 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -425,6 +425,22 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
// Cast/Conversion Operators
//===--------------------------------------------------------------------===//
+ /// Create an value truncation to a narrower type.
+ /// Returns the source if types already match. CIR casts do not
+ /// encode NUW/NSW; wrap semantics should be handled by callers.
+ /// Supports both scalar integers and vectors of integers.
+ mlir::Value createTrunc(mlir::Location loc, mlir::Value src,
+ mlir::Type newTy) {
+ auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType());
+ if (newTy == srcIntTy)
+ return src;
+ return createCast(loc, cir::CastKind::integral, src, newTy);
+ }
+
+ mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
+ return createTrunc(src.getLoc(), src, newTy);
+ }
+
mlir::Value createCast(mlir::Location loc, cir::CastKind kind,
mlir::Value src, mlir::Type newTy) {
if (newTy == src.getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..b553327f676f5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1274,7 +1274,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
mask);
}
case X86::BI__builtin_ia32_pmovqd512_mask:
- case X86::BI__builtin_ia32_pmovwb512_mask:
+ case X86::BI__builtin_ia32_pmovwb512_mask: {
+ mlir::Value Res = builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType()));
+ return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]);
+ }
case X86::BI__builtin_ia32_pblendw128:
case X86::BI__builtin_ia32_blendpd:
case X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
index 6c6cb402d1190..8d9ea7c6c22eb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
@@ -97,7 +97,7 @@ EHScopeStack::getInnermostActiveNormalCleanup() const {
char *EHScopeStack::allocate(size_t size) {
size = llvm::alignTo(size, ScopeStackAlignment);
if (!startOfBuffer) {
- unsigned capacity = llvm::PowerOf2Ceil(std::max(size, 1024ul));
+ unsigned capacity = llvm::PowerOf2Ceil(std::max<size_t>(size, 1024ul));
startOfBuffer = std::make_unique<char[]>(capacity);
startOfData = endOfBuffer = startOfBuffer.get() + capacity;
} else if (static_cast<size_t>(startOfData - startOfBuffer.get()) < size) {
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
new file mode 100644
index 0000000000000..b43d2dc8f050d
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+#include <immintrin.h>
+
+__m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) {
+ // CIR-LABEL: test_pmovqd_mask
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+ return __builtin_ia32_pmovqd512_mask(a, b, mask);
+}
+
+__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) {
+ // CIR-LABEL: test_pmovqd_maskz
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+ __m256i zero = _mm256_setzero_si256();
+ return __builtin_ia32_pmovqd512_mask(a, zero, mask);
+}
+
+__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) {
+ // CIR-LABEL: test_pmovwb_mask
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+ return __builtin_ia32_pmovwb512_mask(a, b, mask);
+}
\ No newline at end of file
>From b989427cede8fbbde25fddd0f47a334b3d88a6a0 Mon Sep 17 00:00:00 2001
From: DannyDaoBoYang <34634047+DannyDaoBoYang at users.noreply.github.com>
Date: Thu, 1 Jan 2026 19:46:15 -0500
Subject: [PATCH 2/2] add LLVM and OGCG in test, Combine Trunc function calls,
format
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 8 +--
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 ++-
.../X86/pmovqd-mask-builtins.c | 51 +++++++++++++++----
3 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 481d06091f012..b66b7171e0628 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -429,18 +429,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
/// Returns the source if types already match. CIR casts do not
/// encode NUW/NSW; wrap semantics should be handled by callers.
/// Supports both scalar integers and vectors of integers.
- mlir::Value createTrunc(mlir::Location loc, mlir::Value src,
- mlir::Type newTy) {
+ mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
+ mlir::Location loc = src.getLoc();
auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType());
if (newTy == srcIntTy)
return src;
return createCast(loc, cir::CastKind::integral, src, newTy);
}
- mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
- return createTrunc(src.getLoc(), src, newTy);
- }
-
mlir::Value createCast(mlir::Location loc, cir::CastKind kind,
mlir::Value src, mlir::Type newTy) {
if (newTy == src.getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index b553327f676f5..253fca321f742 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1275,8 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
}
case X86::BI__builtin_ia32_pmovqd512_mask:
case X86::BI__builtin_ia32_pmovwb512_mask: {
- mlir::Value Res = builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType()));
- return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]);
+ mlir::Value Res =
+ builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType()));
+ return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res,
+ ops[1]);
}
case X86::BI__builtin_ia32_pblendw128:
case X86::BI__builtin_ia32_blendpd:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
index b43d2dc8f050d..797ecf67ea9ec 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
@@ -1,5 +1,9 @@
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
#include <immintrin.h>
@@ -8,22 +12,49 @@ __m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) {
// CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
// CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
// CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+ // LLVM-LABEL: @test_pmovqd_mask
+ // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+ // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: %[[CMP:.*]] = icmp ne <8 x i1> %[[MASK_VEC]], zeroinitializer
+ // LLVM: %[[SEL:.*]] = select <8 x i1> %[[CMP]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]]
+ // LLVM: %[[RETBC:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64>
+ // LLVM: store <4 x i64> %[[RETBC]],
+ // LLVM: %[[RET:.*]] = load <4 x i64>,
+ // LLVM: ret <4 x i64> %[[RET]]
+ // OGCG-LABEL: @test_pmovqd_mask
+ // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+ // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]]
+ // OGCG: %[[RET:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64>
+ // OGCG: ret <4 x i64> %[[RET]]
return __builtin_ia32_pmovqd512_mask(a, b, mask);
}
-__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) {
- // CIR-LABEL: test_pmovqd_maskz
- // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
- __m256i zero = _mm256_setzero_si256();
- return __builtin_ia32_pmovqd512_mask(a, zero, mask);
-}
-
__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) {
// CIR-LABEL: test_pmovwb_mask
// CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i>
// CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
// CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+ // LLVM-LABEL: @test_pmovwb_mask
+ // LLVM: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16>
+ // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8>
+ // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8>
+ // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: %[[CMP:.*]] = icmp ne <32 x i1> %[[MASK_VEC]], zeroinitializer
+ // LLVM: %[[SEL:.*]] = select <32 x i1> %[[CMP]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]]
+ // LLVM: %[[RETBC:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64>
+ // LLVM: store <4 x i64> %[[RETBC]],
+ // LLVM: %[[RET:.*]] = load <4 x i64>,
+ // LLVM: ret <4 x i64> %[[RET]]
+ // OGCG-LABEL: @test_pmovwb_mask
+ // OGCG: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16>
+ // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8>
+ // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8>
+ // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]]
+ // OGCG: %[[RET:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64>
+ // OGCG: ret <4 x i64> %[[RET]]
return __builtin_ia32_pmovwb512_mask(a, b, mask);
}
\ No newline at end of file
More information about the cfe-commits
mailing list