[clang] [CIR][X86] Implement convert_half builtins (PR #171615)
Priyanshu Kumar via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 15 20:26:13 PST 2025
https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/171615
>From 286dba2d5a5ca74c84c4e221192bf51cc6a731a3 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Fri, 12 Dec 2025 10:57:40 +0000
Subject: [PATCH 01/13] Resolve conflict
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 57 ++++++++++-
.../X86/avx512vlbf16-builtins.c | 98 +++++++++++++++++++
2 files changed, 154 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 75bf25b20f1af..bd5a188fe9453 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -20,6 +20,7 @@
#include "clang/Basic/TargetBuiltins.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
using namespace clang;
@@ -362,6 +363,24 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
return builder.createMul(loc, lhs, rhs);
}
+static mlir::Value
+emitCIRX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, mlir::Location loc,
+ mlir::Type dstTy,
+ SmallVectorImpl<mlir::Value> &ops) {
+ auto src = ops[0];
+ auto passthru = ops[1];
+ auto mask = ops[2];
+
+ auto vecType = llvm::cast<mlir::VectorType>(src.getType());
+ auto numElts = vecType.getNumElements();
+ auto halfTy = mlir::VectorType::get({numElts}, builder.getF16Type());
+ auto srcF16 = builder.createBitcast(loc, src, halfTy);
+
+ auto res = builder.createFloatingCast(srcF16, dstTy);
+
+ return emitX86Select(builder, loc, mask, res, passthru);
+}
+
static mlir::Value emitX86vpcom(CIRGenBuilderTy &builder, mlir::Location loc,
llvm::SmallVector<mlir::Value> ops,
bool isSigned) {
@@ -1667,7 +1686,43 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
case X86::BI__builtin_ia32_vcvtph2ps512_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ llvm::StringRef intrinsicName;
+ switch (builtinID) {
+ case X86::BI__builtin_ia32_vcvtph2ps_mask: {
+ return emitCIRX86CvtF16ToFloatExpr(builder, loc,
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_vcvtph2ps256_mask: {
+ return emitCIRX86CvtF16ToFloatExpr(builder, loc,
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+ return emitCIRX86CvtF16ToFloatExpr(builder, loc,
+ convertType(expr->getType()), ops);
+ }
+ case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
+ intrinsicName = "x86.avx512bf16.mask.cvtneps2bf16.128";
+ break;
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: {
+ intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
+ auto intrinsicResult = emitIntrinsicCallOp(
+ builder, loc, intrinsicName, convertType(expr->getType()), ops);
+ return emitX86Select(builder, loc, ops[2], intrinsicResult, ops[1]);
+ }
+ case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
+ intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
+ auto intrinsicResult = emitIntrinsicCallOp(
+ builder, loc, intrinsicName, convertType(expr->getType()), ops);
+ return emitX86Select(builder, loc, ops[2], intrinsicResult, ops[1]);
+ }
+ default:
+ llvm_unreachable("Unexpected builtinID");
+ }
+ return emitIntrinsicCallOp(builder, loc, intrinsicName,
+ convertType(expr->getType()), ops);
+ }
case X86::BI__cpuid:
case X86::BI__cpuidex:
case X86::BI__emul:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
new file mode 100644
index 0000000000000..fee7d6ac8c9f5
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm %s -o - | FileCheck %s --check-prefix=LLVM
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm %s -o - | FileCheck %s --check-prefix=OGCG
+
+// REQUIRES: avx512fp16
+// REQUIRES: avx512bf16
+
+#include <immintrin.h>
+
+// CIR-LABEL: test_mm512_mask_cvtneps_pbh
+// CIR: cir.call @_mm512_mask_cvtneps_pbh({{.*}}, {{.*}}, {{.*}})
+// LLVM-LABEL: test_mm512_mask_cvtneps_pbh
+// LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+// OGCG-LABEL: test_mm512_mask_cvtneps_pbh
+// OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+__m256bh test_mm512_mask_cvtneps_pbh(__m256bh src, __mmask16 k, __m512 a) {
+ return _mm512_mask_cvtneps_pbh(src, k, a);
+}
+
+// CIR-LABEL: test_mm512_maskz_cvtneps_pbh
+// CIR: cir.call @_mm512_maskz_cvtneps_pbh({{.*}}, {{.*}})
+// LLVM-LABEL: test_mm512_maskz_cvtneps_pbh
+// LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+// OGCG-LABEL: test_mm512_maskz_cvtneps_pbh
+// OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+__m256bh test_mm512_maskz_cvtneps_pbh(__mmask16 k, __m512 a) {
+ return _mm512_maskz_cvtneps_pbh(k, a);
+}
+
+// CIR-LABEL: test_mm256_mask_cvtneps_pbh
+// CIR: cir.call @_mm256_mask_cvtneps_pbh({{.*}}, {{.*}}, {{.*}})
+// LLVM-LABEL: test_mm256_mask_cvtneps_pbh
+// LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256
+// OGCG-LABEL: test_mm256_mask_cvtneps_pbh
+// OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256
+__m128bh test_mm256_mask_cvtneps_pbh(__m128bh src, __mmask8 k, __m256 a) {
+ return _mm256_mask_cvtneps_pbh(src, k, a);
+}
+
+// CIR-LABEL: test_mm256_maskz_cvtneps_pbh
+// CIR: cir.call @_mm256_maskz_cvtneps_pbh({{.*}}, {{.*}})
+// LLVM-LABEL: test_mm256_maskz_cvtneps_pbh
+// LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256
+// OGCG-LABEL: test_mm256_maskz_cvtneps_pbh
+// OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256
+__m128bh test_mm256_maskz_cvtneps_pbh(__mmask8 k, __m256 a) {
+ return _mm256_maskz_cvtneps_pbh(k, a);
+}
+
+// CIR-LABEL: test_mm_mask_cvtneps_pbh
+// CIR: cir.call @_mm_mask_cvtneps_pbh({{.*}}, {{.*}}, {{.*}})
+// LLVM-LABEL: test_mm_mask_cvtneps_pbh
+// LLVM: call <4 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.128
+// OGCG-LABEL: test_mm_mask_cvtneps_pbh
+// OGCG: call <4 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.128
+__m64bh test_mm_mask_cvtneps_pbh(__m64bh src, __mmask8 k, __m128 a) {
+ return _mm_mask_cvtneps_pbh(src, k, a);
+}
+
+// CIR-LABEL: test_mm_maskz_cvtneps_pbh
+// CIR: cir.call @_mm_maskz_cvtneps_pbh({{.*}}, {{.*}})
+// LLVM-LABEL: test_mm_maskz_cvtneps_pbh
+// LLVM: call <4 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.128
+// OGCG-LABEL: test_mm_maskz_cvtneps_pbh
+// OGCG: call <4 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.128
+__m64bh test_mm_maskz_cvtneps_pbh(__mmask8 k, __m128 a) {
+ return _mm_maskz_cvtneps_pbh(k, a);
+}
+
+// CIR-LABEL: test_mm512_cvtneps_pbh
+// CIR: cir.call @_mm512_cvtneps_pbh({{.*}})
+// LLVM-LABEL: test_mm512_cvtneps_pbh
+// LLVM: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+// OGCG-LABEL: test_mm512_cvtneps_pbh
+// OGCG: call <16 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.512
+__m256bh test_mm512_cvtneps_pbh(__m512 a) {
+ return _mm512_cvtneps_pbh(a);
+}
+
+// CIR-LABEL: test_mm256_cvtneps_pbh
+// CIR: cir.call @_mm256_cvtneps_pbh({{.*}})
+// LLVM-LABEL: test_mm256_cvtneps_pbh
+// LLVM: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256
+// OGCG-LABEL: test_mm256_cvtneps_pbh
+// OGCG: call <8 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.256
+__m128bh test_mm256_cvtneps_pbh(__m256 a) {
+ return _mm256_cvtneps_pbh(a);
+}
+
+// CIR-LABEL: test_mm_cvtneps_pbh
+// CIR: cir.call @_mm_cvtneps_pbh({{.*}})
+// LLVM-LABEL: test_mm_cvtneps_pbh
+// LLVM: call <4 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.128
+// OGCG-LABEL: test_mm_cvtneps_pbh
+// OGCG: call <4 x bfloat> @llvm.x86.avx512bf16.cvtneps2bf16.128
+__m64bh test_mm_cvtneps_pbh(__m128 a) {
+ return _mm_cvtneps_pbh(a);
+}
>From 2d8c601ea05f40cc630b8bc074d8c26aa5c5ad5f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Fri, 12 Dec 2025 17:53:43 +0000
Subject: [PATCH 02/13] Rename emitCIRX86CvtF16ToFloatExpr to
emitX86CvtF16ToFloatExpr
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 20 ++++++++++----------
1 file changed, 10 insertions(+), 10 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index bd5a188fe9453..0becf411049ff 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -363,10 +363,10 @@ static mlir::Value emitX86Muldq(CIRGenBuilderTy &builder, mlir::Location loc,
return builder.createMul(loc, lhs, rhs);
}
-static mlir::Value
-emitCIRX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder, mlir::Location loc,
- mlir::Type dstTy,
- SmallVectorImpl<mlir::Value> &ops) {
+static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
+ mlir::Location loc,
+ mlir::Type dstTy,
+ SmallVectorImpl<mlir::Value> &ops) {
auto src = ops[0];
auto passthru = ops[1];
auto mask = ops[2];
@@ -1691,16 +1691,16 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
llvm::StringRef intrinsicName;
switch (builtinID) {
case X86::BI__builtin_ia32_vcvtph2ps_mask: {
- return emitCIRX86CvtF16ToFloatExpr(builder, loc,
- convertType(expr->getType()), ops);
+ return emitX86CvtF16ToFloatExpr(builder, loc,
+ convertType(expr->getType()), ops);
}
case X86::BI__builtin_ia32_vcvtph2ps256_mask: {
- return emitCIRX86CvtF16ToFloatExpr(builder, loc,
- convertType(expr->getType()), ops);
+ return emitX86CvtF16ToFloatExpr(builder, loc,
+ convertType(expr->getType()), ops);
}
case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
- return emitCIRX86CvtF16ToFloatExpr(builder, loc,
- convertType(expr->getType()), ops);
+ return emitX86CvtF16ToFloatExpr(builder, loc,
+ convertType(expr->getType()), ops);
}
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
intrinsicName = "x86.avx512bf16.mask.cvtneps2bf16.128";
>From d9b5a4c8ea9d0ee7fa3fb54ff099cce49d5a401c Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 16 Dec 2025 00:47:05 +0530
Subject: [PATCH 03/13] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 7014f1fadba60..3af411250b782 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -371,9 +371,9 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
auto passthru = ops[1];
auto mask = ops[2];
- auto vecType = llvm::cast<mlir::VectorType>(src.getType());
- auto numElts = vecType.getNumElements();
- auto halfTy = mlir::VectorType::get({numElts}, builder.getF16Type());
+ auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
+ uint64_t numElems = vecTy.getSize();
+ auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems());
auto srcF16 = builder.createBitcast(loc, src, halfTy);
auto res = builder.createFloatingCast(srcF16, dstTy);
>From 7de6f2432483a6c555a05eca7bda21bf04f58025 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 16 Dec 2025 00:47:30 +0530
Subject: [PATCH 04/13] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 3af411250b782..c9858de5c24b8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -374,7 +374,7 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
uint64_t numElems = vecTy.getSize();
auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems());
- auto srcF16 = builder.createBitcast(loc, src, halfTy);
+ mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy);
auto res = builder.createFloatingCast(srcF16, dstTy);
>From e09c005855e78e0c473ffb2b74b6d3c00d02ea72 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 16 Dec 2025 00:48:01 +0530
Subject: [PATCH 05/13] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index c9858de5c24b8..2d02bde108dcb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -376,7 +376,7 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems());
mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy);
- auto res = builder.createFloatingCast(srcF16, dstTy);
+ mlir::Value res = builder.createFloatingCast(srcF16, dstTy);
return emitX86Select(builder, loc, mask, res, passthru);
}
>From 7c4d797ce8188c02ba95f939d632e5d0dc78e5f1 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 16 Dec 2025 00:51:24 +0530
Subject: [PATCH 06/13] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 15 +++++++--------
1 file changed, 7 insertions(+), 8 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 2d02bde108dcb..aaed050a8df86 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1731,15 +1731,14 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
intrinsicName = "x86.avx512bf16.mask.cvtneps2bf16.128";
break;
- case X86::BI__builtin_ia32_cvtneps2bf16_256_mask: {
- intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
- auto intrinsicResult = emitIntrinsicCallOp(
- builder, loc, intrinsicName, convertType(expr->getType()), ops);
- return emitX86Select(builder, loc, ops[2], intrinsicResult, ops[1]);
- }
+ case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
- intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
- auto intrinsicResult = emitIntrinsicCallOp(
+ StringRef intrinName;
+ if (builtinID == builtin_ia32_cvtneps2bf16_256_mask)
+ intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
+ else
+ intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
+ mlir::Value intrinsicResult = emitIntrinsicCallOp(
builder, loc, intrinsicName, convertType(expr->getType()), ops);
return emitX86Select(builder, loc, ops[2], intrinsicResult, ops[1]);
}
>From 6d9d1fae285e168d54a421bdf5a61f48ce3a1a2f Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 16 Dec 2025 01:03:14 +0530
Subject: [PATCH 07/13] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 13 +++----------
1 file changed, 3 insertions(+), 10 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index aaed050a8df86..60021d0d67390 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1716,18 +1716,11 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
mlir::Location loc = getLoc(expr->getExprLoc());
llvm::StringRef intrinsicName;
switch (builtinID) {
- case X86::BI__builtin_ia32_vcvtph2ps_mask: {
+ case X86::BI__builtin_ia32_vcvtph2ps_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps256_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask:
return emitX86CvtF16ToFloatExpr(builder, loc,
convertType(expr->getType()), ops);
- }
- case X86::BI__builtin_ia32_vcvtph2ps256_mask: {
- return emitX86CvtF16ToFloatExpr(builder, loc,
- convertType(expr->getType()), ops);
- }
- case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
- return emitX86CvtF16ToFloatExpr(builder, loc,
- convertType(expr->getType()), ops);
- }
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
intrinsicName = "x86.avx512bf16.mask.cvtneps2bf16.128";
break;
>From bc92ffe608edd2aaa9af0fb758e6869312f43f3e Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 15 Dec 2025 19:25:27 +0000
Subject: [PATCH 08/13] Add errorNYI
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 60021d0d67390..c7113bb3f6c9f 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1707,6 +1707,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_cmpnltsd:
case X86::BI__builtin_ia32_cmpnlesd:
case X86::BI__builtin_ia32_cmpordsd:
+ cgm.errorNYI(expr->getSourceRange(),
+ std::string("unimplemented X86 builtin call: ") +
+ getContext().BuiltinInfo.getName(builtinID));
+ return mlir::Value{};
case X86::BI__builtin_ia32_vcvtph2ps_mask:
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
case X86::BI__builtin_ia32_vcvtph2ps512_mask:
>From 64fd406a4e02631897115c294a8d7ea3325e902a Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 15 Dec 2025 19:39:36 +0000
Subject: [PATCH 09/13] Update clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index c7113bb3f6c9f..6ed64f98f7093 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -367,9 +367,9 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
mlir::Location loc,
mlir::Type dstTy,
SmallVectorImpl<mlir::Value> &ops) {
- auto src = ops[0];
- auto passthru = ops[1];
- auto mask = ops[2];
+ mlir::Value src = ops[0];
+ mlir::Value passthru = ops[1];
+ mlir::Value mask = ops[2];
auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
uint64_t numElems = vecTy.getSize();
>From c71c5ed4da99fec84fa99cd7685a706f028a0b3d Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 15 Dec 2025 20:49:54 +0000
Subject: [PATCH 10/13] Update CIRGenBuiltinX86.cpp.
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 50 ++++++++++------------
1 file changed, 22 insertions(+), 28 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 6ed64f98f7093..99d8410ace960 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1713,37 +1713,31 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
return mlir::Value{};
case X86::BI__builtin_ia32_vcvtph2ps_mask:
case X86::BI__builtin_ia32_vcvtph2ps256_mask:
- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
+ case X86::BI__builtin_ia32_vcvtph2ps512_mask: {
+ return emitX86CvtF16ToFloatExpr(builder, loc, convertType(expr->getType()),
+ ops);
+ }
+ case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+ ops[2] = getMaskVecValue(builder, loc, ops[2], numElts);
+ return emitIntrinsicCallOp(builder, loc,
+ "x86.avx512bf16.mask.cvtneps2bf16.128",
+ convertType(expr->getType()), ops);
+ }
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- llvm::StringRef intrinsicName;
- switch (builtinID) {
- case X86::BI__builtin_ia32_vcvtph2ps_mask:
- case X86::BI__builtin_ia32_vcvtph2ps256_mask:
- case X86::BI__builtin_ia32_vcvtph2ps512_mask:
- return emitX86CvtF16ToFloatExpr(builder, loc,
- convertType(expr->getType()), ops);
- case X86::BI__builtin_ia32_cvtneps2bf16_128_mask:
- intrinsicName = "x86.avx512bf16.mask.cvtneps2bf16.128";
- break;
- case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
- case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
- StringRef intrinName;
- if (builtinID == builtin_ia32_cvtneps2bf16_256_mask)
- intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
- else
- intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
- mlir::Value intrinsicResult = emitIntrinsicCallOp(
- builder, loc, intrinsicName, convertType(expr->getType()), ops);
- return emitX86Select(builder, loc, ops[2], intrinsicResult, ops[1]);
- }
- default:
- llvm_unreachable("Unexpected builtinID");
+ ops[2] = getMaskVecValue(builder, loc, ops[2], numElts);
+
+ StringRef intrinsicName;
+ if (builtinID == X86::BI__builtin_ia32_cvtneps2bf16_256_mask) {
+ intrinsicName = "x86.avx512bf16.cvtneps2bf16.256";
+ } else {
+ intrinsicName = "x86.avx512bf16.cvtneps2bf16.512";
}
- return emitIntrinsicCallOp(builder, loc, intrinsicName,
- convertType(expr->getType()), ops);
+
+ mlir::Value intrinsicResult = emitIntrinsicCallOp(
+ builder, loc, intrinsicName, convertType(expr->getType()), ops);
+
+ return emitX86VectorSelect(builder, loc, ops[2], intrinsicResult, ops[1]);
}
case X86::BI__cpuid:
case X86::BI__cpuidex:
>From 7ac58ee63009c9a175613fa7a6af44f97d23f4d3 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 15 Dec 2025 21:20:25 +0000
Subject: [PATCH 11/13] Update CIRGenBuiltinX86.cpp.
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 11 +++++++----
1 file changed, 7 insertions(+), 4 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 99d8410ace960..bd65bd76902dd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -20,7 +20,6 @@
#include "clang/Basic/TargetBuiltins.h"
#include "clang/CIR/Dialect/IR/CIRTypes.h"
#include "clang/CIR/MissingFeatures.h"
-#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
using namespace clang;
@@ -367,13 +366,17 @@ static mlir::Value emitX86CvtF16ToFloatExpr(CIRGenBuilderTy &builder,
mlir::Location loc,
mlir::Type dstTy,
SmallVectorImpl<mlir::Value> &ops) {
+
mlir::Value src = ops[0];
mlir::Value passthru = ops[1];
mlir::Value mask = ops[2];
- auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
+ auto vecTy = mlir::cast<cir::VectorType>(src.getType());
uint64_t numElems = vecTy.getSize();
- auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems());
+
+ mask = getMaskVecValue(builder, loc, mask, numElems);
+
+ auto halfTy = cir::VectorType::get(builder.getF16Type(), numElems);
mlir::Value srcF16 = builder.createBitcast(loc, src, halfTy);
mlir::Value res = builder.createFloatingCast(srcF16, dstTy);
@@ -1737,7 +1740,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
mlir::Value intrinsicResult = emitIntrinsicCallOp(
builder, loc, intrinsicName, convertType(expr->getType()), ops);
- return emitX86VectorSelect(builder, loc, ops[2], intrinsicResult, ops[1]);
+ return emitX86Select(builder, loc, ops[2], intrinsicResult, ops[1]);
}
case X86::BI__cpuid:
case X86::BI__cpuidex:
>From 190703dd2862578b79dfb44f0bbfade07a8d12e9 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 15 Dec 2025 21:29:34 +0000
Subject: [PATCH 12/13] update test
---
clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
index fee7d6ac8c9f5..0948ec85a6766 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbf16-builtins.c
@@ -1,6 +1,6 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-cir %s -o - | FileCheck %s --check-prefix=CIR
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm %s -o - | FileCheck %s --check-prefix=LLVM
+// RUN: %clang_cc1 -fclangir -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm %s -o - | FileCheck %s --check-prefix=CIR
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm %s -o - | FileCheck %s --check-prefix=OGCG
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm %s -o - | FileCheck %s --check-prefix=LLVM
// REQUIRES: avx512fp16
// REQUIRES: avx512bf16
>From c71a4d3d098d0cadf22caa0cebe3a4d518b50b39 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 16 Dec 2025 04:25:40 +0000
Subject: [PATCH 13/13] updata clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index bd65bd76902dd..d112ebc49e71c 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1721,6 +1721,8 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
ops);
}
case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
ops[2] = getMaskVecValue(builder, loc, ops[2], numElts);
return emitIntrinsicCallOp(builder, loc,
"x86.avx512bf16.mask.cvtneps2bf16.128",
@@ -1728,6 +1730,8 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
}
case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
ops[2] = getMaskVecValue(builder, loc, ops[2], numElts);
StringRef intrinsicName;
More information about the cfe-commits
mailing list