[clang] [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (PR #173802)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Jan 9 19:51:50 PST 2026
https://github.com/DannyDaoBoYang updated https://github.com/llvm/llvm-project/pull/173802
>From 39693720f08a5d77f42bb3e4a38dab1e5a0e2fb5 Mon Sep 17 00:00:00 2001
From: DannyDaoBoYang <34634047+DannyDaoBoYang at users.noreply.github.com>
Date: Mon, 5 Jan 2026 20:55:51 -0500
Subject: [PATCH 1/3] Add support for pmovqd512_mask and pmovwb512_mask
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 16 ++++++++++
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 5 +++-
.../X86/pmovqd-mask-builtins.c | 29 +++++++++++++++++++
3 files changed, 49 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index cc28941aaa079..481d06091f012 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -425,6 +425,22 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
// Cast/Conversion Operators
//===--------------------------------------------------------------------===//
+ /// Create an value truncation to a narrower type.
+ /// Returns the source if types already match. CIR casts do not
+ /// encode NUW/NSW; wrap semantics should be handled by callers.
+ /// Supports both scalar integers and vectors of integers.
+ mlir::Value createTrunc(mlir::Location loc, mlir::Value src,
+ mlir::Type newTy) {
+ auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType());
+ if (newTy == srcIntTy)
+ return src;
+ return createCast(loc, cir::CastKind::integral, src, newTy);
+ }
+
+ mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
+ return createTrunc(src.getLoc(), src, newTy);
+ }
+
mlir::Value createCast(mlir::Location loc, cir::CastKind kind,
mlir::Value src, mlir::Type newTy) {
if (newTy == src.getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..b553327f676f5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1274,7 +1274,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
mask);
}
case X86::BI__builtin_ia32_pmovqd512_mask:
- case X86::BI__builtin_ia32_pmovwb512_mask:
+ case X86::BI__builtin_ia32_pmovwb512_mask: {
+ mlir::Value Res = builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType()));
+ return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]);
+ }
case X86::BI__builtin_ia32_pblendw128:
case X86::BI__builtin_ia32_blendpd:
case X86::BI__builtin_ia32_blendps:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
new file mode 100644
index 0000000000000..b43d2dc8f050d
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+#include <immintrin.h>
+
+__m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) {
+ // CIR-LABEL: test_pmovqd_mask
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+ return __builtin_ia32_pmovqd512_mask(a, b, mask);
+}
+
+__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) {
+ // CIR-LABEL: test_pmovqd_maskz
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+ __m256i zero = _mm256_setzero_si256();
+ return __builtin_ia32_pmovqd512_mask(a, zero, mask);
+}
+
+__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) {
+ // CIR-LABEL: test_pmovwb_mask
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+ return __builtin_ia32_pmovwb512_mask(a, b, mask);
+}
\ No newline at end of file
>From 10bf2da1dc5166e507ab2344a7a44544bad243f3 Mon Sep 17 00:00:00 2001
From: DannyDaoBoYang <34634047+DannyDaoBoYang at users.noreply.github.com>
Date: Thu, 1 Jan 2026 19:46:15 -0500
Subject: [PATCH 2/3] add LLVM and OGCG in test, Combine Trunc function calls,
format
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 8 +--
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 ++-
.../X86/pmovqd-mask-builtins.c | 51 +++++++++++++++----
3 files changed, 47 insertions(+), 18 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 481d06091f012..b66b7171e0628 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -429,18 +429,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
/// Returns the source if types already match. CIR casts do not
/// encode NUW/NSW; wrap semantics should be handled by callers.
/// Supports both scalar integers and vectors of integers.
- mlir::Value createTrunc(mlir::Location loc, mlir::Value src,
- mlir::Type newTy) {
+ mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
+ mlir::Location loc = src.getLoc();
auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType());
if (newTy == srcIntTy)
return src;
return createCast(loc, cir::CastKind::integral, src, newTy);
}
- mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
- return createTrunc(src.getLoc(), src, newTy);
- }
-
mlir::Value createCast(mlir::Location loc, cir::CastKind kind,
mlir::Value src, mlir::Type newTy) {
if (newTy == src.getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index b553327f676f5..253fca321f742 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1275,8 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
}
case X86::BI__builtin_ia32_pmovqd512_mask:
case X86::BI__builtin_ia32_pmovwb512_mask: {
- mlir::Value Res = builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType()));
- return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]);
+ mlir::Value Res =
+ builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType()));
+ return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res,
+ ops[1]);
}
case X86::BI__builtin_ia32_pblendw128:
case X86::BI__builtin_ia32_blendpd:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
index b43d2dc8f050d..797ecf67ea9ec 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
@@ -1,5 +1,9 @@
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
#include <immintrin.h>
@@ -8,22 +12,49 @@ __m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) {
// CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
// CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
// CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+ // LLVM-LABEL: @test_pmovqd_mask
+ // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+ // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: %[[CMP:.*]] = icmp ne <8 x i1> %[[MASK_VEC]], zeroinitializer
+ // LLVM: %[[SEL:.*]] = select <8 x i1> %[[CMP]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]]
+ // LLVM: %[[RETBC:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64>
+ // LLVM: store <4 x i64> %[[RETBC]],
+ // LLVM: %[[RET:.*]] = load <4 x i64>,
+ // LLVM: ret <4 x i64> %[[RET]]
+ // OGCG-LABEL: @test_pmovqd_mask
+ // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+ // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]]
+ // OGCG: %[[RET:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64>
+ // OGCG: ret <4 x i64> %[[RET]]
return __builtin_ia32_pmovqd512_mask(a, b, mask);
}
-__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) {
- // CIR-LABEL: test_pmovqd_maskz
- // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
- __m256i zero = _mm256_setzero_si256();
- return __builtin_ia32_pmovqd512_mask(a, zero, mask);
-}
-
__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) {
// CIR-LABEL: test_pmovwb_mask
// CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i>
// CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
// CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+ // LLVM-LABEL: @test_pmovwb_mask
+ // LLVM: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16>
+ // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8>
+ // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8>
+ // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: %[[CMP:.*]] = icmp ne <32 x i1> %[[MASK_VEC]], zeroinitializer
+ // LLVM: %[[SEL:.*]] = select <32 x i1> %[[CMP]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]]
+ // LLVM: %[[RETBC:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64>
+ // LLVM: store <4 x i64> %[[RETBC]],
+ // LLVM: %[[RET:.*]] = load <4 x i64>,
+ // LLVM: ret <4 x i64> %[[RET]]
+ // OGCG-LABEL: @test_pmovwb_mask
+ // OGCG: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16>
+ // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8>
+ // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8>
+ // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]]
+ // OGCG: %[[RET:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64>
+ // OGCG: ret <4 x i64> %[[RET]]
return __builtin_ia32_pmovwb512_mask(a, b, mask);
}
\ No newline at end of file
>From 0fcc6a5ef4f717ebdbbea7d0b4923a66e79e2dc9 Mon Sep 17 00:00:00 2001
From: DannyDaoBoYang <34634047+DannyDaoBoYang at users.noreply.github.com>
Date: Tue, 6 Jan 2026 01:42:03 -0500
Subject: [PATCH 3/3] Use createIntCast, move tests around, format, fix tests
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 12 ----
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
.../CodeGenBuiltins/X86/avx512bw-builtins.c | 63 +++++++++++++++++++
.../CodeGenBuiltins/X86/avx512f-builtins.c | 63 +++++++++++++++++++
.../X86/pmovqd-mask-builtins.c | 60 ------------------
5 files changed, 127 insertions(+), 73 deletions(-)
delete mode 100644 clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index b66b7171e0628..cc28941aaa079 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -425,18 +425,6 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
// Cast/Conversion Operators
//===--------------------------------------------------------------------===//
- /// Create an value truncation to a narrower type.
- /// Returns the source if types already match. CIR casts do not
- /// encode NUW/NSW; wrap semantics should be handled by callers.
- /// Supports both scalar integers and vectors of integers.
- mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
- mlir::Location loc = src.getLoc();
- auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType());
- if (newTy == srcIntTy)
- return src;
- return createCast(loc, cir::CastKind::integral, src, newTy);
- }
-
mlir::Value createCast(mlir::Location loc, cir::CastKind kind,
mlir::Value src, mlir::Type newTy) {
if (newTy == src.getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 253fca321f742..429d2b2237b01 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1276,7 +1276,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_pmovqd512_mask:
case X86::BI__builtin_ia32_pmovwb512_mask: {
mlir::Value Res =
- builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType()));
+ builder.createIntCast(ops[0], cast<cir::VectorType>(ops[1].getType()));
return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res,
ops[1]);
}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index ac740b354cb18..9693d637fc83c 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -803,4 +803,67 @@ __mmask32 test_mm512_movepi16_mask(__m512i __A) {
// OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
// OGCG: bitcast <32 x i1> [[CMP]] to i32
return _mm512_movepi16_mask(__A);
+}
+
+__m256i test_mm512_cvtepi16_epi8(__m512i __A) {
+ // CIR-LABEL: test_mm512_cvtepi16_epi8
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i>
+ // CIR: %[[RETBC:.*]] = cir.cast bitcast {{.*}} : !cir.vector<32 x !s8i> -> !cir.vector<4 x !s64i>
+ // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>
+ // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+
+ // LLVM-LABEL: test_mm512_cvtepi16_epi8
+ // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+ // LLVM: bitcast <32 x i8> %[[TRUNC]] to <4 x i64>
+
+ // OGCG-LABEL: test_mm512_cvtepi16_epi8
+ // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+ // OGCG: bitcast <32 x i8> %[[TRUNC]] to <4 x i64>
+ return _mm512_cvtepi16_epi8(__A);
+}
+
+__m256i test_mm512_mask_cvtepi16_epi8(__m256i __O, __mmask32 __M, __m512i __A) {
+ // CIR-LABEL: test_mm512_mask_cvtepi16_epi8
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: %[[TER:.*]] = cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+ // CIR: %[[RETBC:.*]] = cir.cast bitcast %[[TER]] : !cir.vector<32 x !s8i> -> !cir.vector<4 x !s64i>
+ // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>
+ // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+
+ // LLVM-LABEL: test_mm512_mask_cvtepi16_epi8
+ // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+ // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %{{.*}}
+ // LLVM: bitcast <32 x i8> %[[SEL]] to <4 x i64>
+
+ // OGCG-LABEL: test_mm512_mask_cvtepi16_epi8
+ // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+ // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %{{.*}}
+ // OGCG: bitcast <32 x i8> %[[SEL]] to <4 x i64>
+ return _mm512_mask_cvtepi16_epi8(__O, __M, __A);
+}
+
+__m256i test_mm512_maskz_cvtepi16_epi8(__mmask32 __M, __m512i __A) {
+ // CIR-LABEL: test_mm512_maskz_cvtepi16_epi8
+ // CIR: %[[CALL:.*]] = cir.call {{.*}} : (!u32i, !cir.vector<8 x !s64i>) -> !cir.vector<4 x !s64i>
+ // CIR: cir.store %[[CALL]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>
+ // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+
+ // LLVM-LABEL: test_mm512_maskz_cvtepi16_epi8
+ // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+ // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // LLVM: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> {{.*}}
+ // LLVM: bitcast <32 x i8> %[[SEL]] to <4 x i64>
+
+ // OGCG-LABEL: test_mm512_maskz_cvtepi16_epi8
+ // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %{{.*}} to <32 x i8>
+ // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+ // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> {{.*}}
+ // OGCG: bitcast <32 x i8> %[[SEL]] to <4 x i64>
+ return _mm512_maskz_cvtepi16_epi8(__M, __A);
}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index 9d5d5e67d6ad9..37a4baf6f0f9d 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -1056,3 +1056,66 @@ int test_mm512_kortestz(__mmask16 __A, __mmask16 __B) {
// OGCG: zext i1 %[[CMP]] to i32
return _mm512_kortestz(__A,__B);
}
+
+__m256i test_mm512_cvtepi64_epi32(__m512i __A) {
+ // CIR-LABEL: test_mm512_cvtepi64_epi32
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
+ // CIR: %[[RETBC:.*]] = cir.cast bitcast {{.*}} : !cir.vector<8 x !s32i> -> !cir.vector<4 x !s64i>
+ // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>
+ // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+
+ // LLVM-LABEL: test_mm512_cvtepi64_epi32
+ // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // LLVM: bitcast <8 x i32> %[[TRUNC]] to <4 x i64>
+
+ // OGCG-LABEL: test_mm512_cvtepi64_epi32
+ // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // OGCG: bitcast <8 x i32> %[[TRUNC]] to <4 x i64>
+ return _mm512_cvtepi64_epi32(__A);
+}
+
+__m256i test_mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) {
+ // CIR-LABEL: test_mm512_mask_cvtepi64_epi32
+ // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
+ // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
+ // CIR: %[[TER:.*]] = cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+ // CIR: %[[RETBC:.*]] = cir.cast bitcast %[[TER]] : !cir.vector<8 x !s32i> -> !cir.vector<4 x !s64i>
+ // CIR: cir.store %[[RETBC]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>
+ // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+
+ // LLVM-LABEL: test_mm512_mask_cvtepi64_epi32
+ // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %{{.*}}
+ // LLVM: bitcast <8 x i32> %[[SEL]] to <4 x i64>
+
+ // OGCG-LABEL: test_mm512_mask_cvtepi64_epi32
+ // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %{{.*}}
+ // OGCG: bitcast <8 x i32> %[[SEL]] to <4 x i64>
+ return _mm512_mask_cvtepi64_epi32(__O, __M, __A);
+}
+
+__m256i test_mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A) {
+ // CIR-LABEL: test_mm512_maskz_cvtepi64_epi32
+ // CIR: %[[CALL:.*]] = cir.call {{.*}} : (!u8i, !cir.vector<8 x !s64i>) -> !cir.vector<4 x !s64i>
+ // CIR: cir.store %[[CALL]], %[[RETPTR:.*]] : !cir.vector<4 x !s64i>, !cir.ptr<!cir.vector<4 x !s64i>>
+ // CIR: %[[RETLOAD:.*]] = cir.load %[[RETPTR]] : !cir.ptr<!cir.vector<4 x !s64i>>, !cir.vector<4 x !s64i>
+ // CIR: cir.return %[[RETLOAD]] : !cir.vector<4 x !s64i>
+
+ // LLVM-LABEL: test_mm512_maskz_cvtepi64_epi32
+ // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> {{.*}}
+ // LLVM: bitcast <8 x i32> %[[SEL]] to <4 x i64>
+
+ // OGCG-LABEL: test_mm512_maskz_cvtepi64_epi32
+ // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+ // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> {{.*}}
+ // OGCG: bitcast <8 x i32> %[[SEL]] to <4 x i64>
+ return _mm512_maskz_cvtepi64_epi32(__M, __A);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
deleted file mode 100644
index 797ecf67ea9ec..0000000000000
--- a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
+++ /dev/null
@@ -1,60 +0,0 @@
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
-// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
-// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
-
-#include <immintrin.h>
-
-__m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) {
- // CIR-LABEL: test_pmovqd_mask
- // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
- // LLVM-LABEL: @test_pmovqd_mask
- // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
- // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
- // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
- // LLVM: %[[CMP:.*]] = icmp ne <8 x i1> %[[MASK_VEC]], zeroinitializer
- // LLVM: %[[SEL:.*]] = select <8 x i1> %[[CMP]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]]
- // LLVM: %[[RETBC:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64>
- // LLVM: store <4 x i64> %[[RETBC]],
- // LLVM: %[[RET:.*]] = load <4 x i64>,
- // LLVM: ret <4 x i64> %[[RET]]
- // OGCG-LABEL: @test_pmovqd_mask
- // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
- // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
- // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
- // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]]
- // OGCG: %[[RET:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64>
- // OGCG: ret <4 x i64> %[[RET]]
- return __builtin_ia32_pmovqd512_mask(a, b, mask);
-}
-
-__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) {
- // CIR-LABEL: test_pmovwb_mask
- // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i>
- // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
- // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
- // LLVM-LABEL: @test_pmovwb_mask
- // LLVM: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16>
- // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8>
- // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8>
- // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
- // LLVM: %[[CMP:.*]] = icmp ne <32 x i1> %[[MASK_VEC]], zeroinitializer
- // LLVM: %[[SEL:.*]] = select <32 x i1> %[[CMP]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]]
- // LLVM: %[[RETBC:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64>
- // LLVM: store <4 x i64> %[[RETBC]],
- // LLVM: %[[RET:.*]] = load <4 x i64>,
- // LLVM: ret <4 x i64> %[[RET]]
- // OGCG-LABEL: @test_pmovwb_mask
- // OGCG: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16>
- // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8>
- // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8>
- // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
- // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]]
- // OGCG: %[[RET:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64>
- // OGCG: ret <4 x i64> %[[RET]]
- return __builtin_ia32_pmovwb512_mask(a, b, mask);
-}
\ No newline at end of file
More information about the cfe-commits
mailing list