[clang] [CIR][X86]Implement handling for Select/Selectsh builtins in CIR (PR #174003)
Priyanshu Kumar via cfe-commits
cfe-commits at lists.llvm.org
Mon Jan 5 10:57:25 PST 2026
https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/174003
>From ee0486aa3fbd4a86722b1c5a503b56efa83c51d4 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 30 Dec 2025 16:08:45 +0000
Subject: [PATCH 1/8] Implement handling for Select/Selectsh builtins
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 32 ++
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 46 ++-
.../X86/avx512-select-builtins.c | 357 ++++++++++++++++++
3 files changed, 434 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index cc28941aaa079..9b635258c96dc 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -487,6 +487,38 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
return createAddrSpaceCast(src.getLoc(), src, newTy);
}
+ //===--------------------------------------------------------------------===//
+ // Other Instructions
+ //===--------------------------------------------------------------------===//
+
+ mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
+ mlir::Value idx) {
+ auto vecTy = mlir::cast<cir::VectorType>(vec.getType());
+ mlir::Type eltTy = vecTy.getElementType();
+ auto op = cir::VecExtractOp::create(*this, loc, eltTy, vec, idx);
+ return op.getResult();
+ }
+
+ mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
+ uint64_t idx) {
+ auto idxVal = getConstAPInt(loc, mlir::IntegerType::get(getContext(), 64),
+ llvm::APInt(64, idx));
+ return createExtractElement(loc, vec, idxVal);
+ }
+
+ mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
+ mlir::Value newElt, mlir::Value idx) {
+ auto op = cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
+ return op.getResult();
+ }
+
+ mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
+ mlir::Value newElt, uint64_t idx) {
+ auto idxVal = getConstAPInt(loc, mlir::IntegerType::get(getContext(), 64),
+ llvm::APInt(64, idx));
+ return createInsertElement(loc, vec, newElt, idxVal);
+ }
+
//===--------------------------------------------------------------------===//
// Binary Operators
//===--------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..166aef7a32bd1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -184,6 +184,37 @@ static mlir::Value emitX86Select(CIRGenBuilderTy &builder, mlir::Location loc,
return cir::VecTernaryOp::create(builder, loc, mask, op0, op1);
}
+static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
+ mlir::Location loc, mlir::Value mask,
+ mlir::Value op0, mlir::Value op1) {
+
+ // If the mask is all ones just return first argument.
+ if (auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(mask.getDefiningOp()))
+ if (c.isAllOnesValue())
+ return op0;
+ // Cast to cir::IntType to safely get the width of a CIR integer
+ unsigned width = 0;
+ if (auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType())) {
+ width = intTy.getWidth();
+ } else {
+ // Fallback or handle unexpected type
+ width = 8;
+ }
+ mlir::Type maskTy = cir::VectorType::get(
+ cir::IntType::get(builder.getContext(), 1, false), width);
+
+ auto cirI64Ty = cir::IntType::get(builder.getContext(), 64, true);
+ mlir::Value idx0 = cir::ConstantOp::create(
+ builder, loc, cirI64Ty, cir::IntAttr::get(cirI64Ty, llvm::APInt(64, 0)));
+
+ mask = builder.createBitcast(mask, maskTy);
+ mask = builder.createExtractElement(loc, mask, idx0);
+ auto boolTy = cir::BoolType::get(builder.getContext());
+ mask = cir::CastOp::create(builder, loc, boolTy, cir::CastKind::int_to_bool,
+ mask);
+ return builder.createSelect(loc, mask, op0, op1);
+}
+
static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
mlir::Location loc,
const std::string &intrinsicName,
@@ -1474,10 +1505,23 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_selectpd_128:
case X86::BI__builtin_ia32_selectpd_256:
case X86::BI__builtin_ia32_selectpd_512:
+ return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
+ ops[2]);
case X86::BI__builtin_ia32_selectsh_128:
case X86::BI__builtin_ia32_selectsbf_128:
case X86::BI__builtin_ia32_selectss_128:
- case X86::BI__builtin_ia32_selectsd_128:
+ case X86::BI__builtin_ia32_selectsd_128: {
+ mlir::Location loc = getLoc(expr->getExprLoc());
+ auto cirI64Ty =
+ cir::IntType::get(builder.getContext(), 64, /*isSigned=*/true);
+ mlir::Value idx0 = cir::ConstantOp::create(
+ builder, loc, cirI64Ty,
+ cir::IntAttr::get(cirI64Ty, llvm::APInt(64, 0)));
+ mlir::Value a = builder.createExtractElement(loc, ops[1], idx0);
+ mlir::Value b = builder.createExtractElement(loc, ops[2], idx0);
+ a = emitX86ScalarSelect(builder, loc, ops[0], a, b);
+ return builder.createInsertElement(loc, ops[1], a, idx0);
+ }
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpb256_mask:
case X86::BI__builtin_ia32_cmpb512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
new file mode 100644
index 0000000000000..7ad0bf2120ee5
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
@@ -0,0 +1,357 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+
+__m128i test_selectb_128(__mmask16 k, __m128i a, __m128i b) {
+ // CIR-LABEL: @test_selectb_128
+ // CIR: %[[MASK_BC:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_BC]], %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s8i>
+
+ // LLVM-LABEL: @test_selectb_128
+ // LLVM: select <16 x i1> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}
+
+ // OGCG-LABEL: @test_selectb_128
+ // OGCG: select <16 x i1> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}
+ return (__m128i)__builtin_ia32_selectb_128(k, (__v16qi)a, (__v16qi)b);
+}
+
+__m256i test_selectb_256(__mmask32 k, __m256i a, __m256i b) {
+ // CIR-LABEL: @test_selectb_256
+ // CIR: %[[MASK_BC:.+]] = cir.cast bitcast %{{.+}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_BC]], %{{.+}}, %{{.+}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+
+ // LLVM-LABEL: @test_selectb_256
+ // LLVM: select <32 x i1> %{{.+}}, <32 x i8> %{{.+}}, <32 x i8> %{{.+}}
+
+ // OGCG-LABEL: @test_selectb_256
+ // OGCG: select <32 x i1> %{{.+}}, <32 x i8> %{{.+}}, <32 x i8> %{{.+}}
+ return (__m256i)__builtin_ia32_selectb_256(k, (__v32qi)a, (__v32qi)b);
+}
+
+__m512i test_selectb_512(__mmask64 k, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_selectb_512
+ // CIR: %[[MASK_BC:.+]] = cir.cast bitcast %{{.+}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+ // CIR: cir.vec.ternary(%[[MASK_BC]], %{{.+}}, %{{.+}}) : !cir.vector<64 x !cir.int<s, 1>>, !cir.vector<64 x !s8i>
+
+ // LLVM-LABEL: @test_selectb_512
+ // LLVM: select <64 x i1> %{{.+}}, <64 x i8> %{{.+}}, <64 x i8> %{{.+}}
+
+ // OGCG-LABEL: @test_selectb_512
+ // OGCG: select <64 x i1> %{{.+}}, <64 x i8> %{{.+}}, <64 x i8> %{{.+}}
+ return (__m512i)__builtin_ia32_selectb_512(k, (__v64qi)a, (__v64qi)b);
+}
+
+__m128i test_selectw_128(__mmask8 k, __m128i a, __m128i b) {
+ // CIR-LABEL: @test_selectw_128
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s16i>
+
+ // LLVM-LABEL: @test_selectw_128
+ // LLVM: select <8 x i1> %{{.+}}, <8 x i16> %{{.+}}, <8 x i16> %{{.+}}
+
+ // OGCG-LABEL: @test_selectw_128
+ // OGCG: select <8 x i1> %{{.+}}, <8 x i16> %{{.+}}, <8 x i16> %{{.+}}
+ return (__m128i)__builtin_ia32_selectw_128(k, (__v8hi)a, (__v8hi)b);
+}
+
+__m256i test_selectw_256(__mmask16 k, __m256i a, __m256i b) {
+ // CIR-LABEL: @test_selectw_256
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s16i>
+
+ // LLVM-LABEL: @test_selectw_256
+ // LLVM: select <16 x i1> %{{.+}}, <16 x i16> %{{.+}}, <16 x i16> %{{.+}}
+
+ // OGCG-LABEL: @test_selectw_256
+ // OGCG: select <16 x i1> %{{.+}}, <16 x i16> %{{.+}}, <16 x i16> %{{.+}}
+ return (__m256i)__builtin_ia32_selectw_256(k, (__v16hi)a, (__v16hi)b);
+}
+
+__m512i test_selectw_512(__mmask32 k, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_selectw_512
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+
+ // LLVM-LABEL: @test_selectw_512
+ // LLVM: select <32 x i1> %{{.+}}, <32 x i16> %{{.+}}, <32 x i16> %{{.+}}
+
+ // OGCG-LABEL: @test_selectw_512
+ // OGCG: select <32 x i1> %{{.+}}, <32 x i16> %{{.+}}, <32 x i16> %{{.+}}
+ return (__m512i)__builtin_ia32_selectw_512(k, (__v32hi)a, (__v32hi)b);
+}
+
+__m128i test_selectd_128(__mmask8 k, __m128i a, __m128i b) {
+ // CIR-LABEL: @test_selectd_128
+ // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]
+ // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !s32i>
+
+ // LLVM-LABEL: @test_selectd_128
+ // LLVM: shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ // LLVM: select <4 x i1> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> %{{.+}}
+
+ // OGCG-LABEL: @test_selectd_128
+ // OGCG: select <4 x i1> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> %{{.+}}
+ return (__m128i)__builtin_ia32_selectd_128(k, (__v4si)a, (__v4si)b);
+}
+
+__m256i test_selectd_256(__mmask8 k, __m256i a, __m256i b) {
+ // CIR-LABEL: @test_selectd_256
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+
+ // LLVM-LABEL: @test_selectd_256
+ // LLVM: select <8 x i1> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> %{{.+}}
+
+ // OGCG-LABEL: @test_selectd_256
+ // OGCG: select <8 x i1> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> %{{.+}}
+ return (__m256i)__builtin_ia32_selectd_256(k, (__v8si)a, (__v8si)b);
+}
+
+__m512i test_selectd_512(__mmask16 k, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_selectd_512
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+
+ // LLVM-LABEL: @test_selectd_512
+ // LLVM: select <16 x i1> %{{.+}}, <16 x i32> %{{.+}}, <16 x i32> %{{.+}}
+
+ // OGCG-LABEL: @test_selectd_512
+ // OGCG: select <16 x i1> %{{.+}}, <16 x i32> %{{.+}}, <16 x i32> %{{.+}}
+ return (__m512i)__builtin_ia32_selectd_512(k, (__v16si)a, (__v16si)b);
+}
+
+__m128i test_selectq_128(__mmask8 k, __m128i a, __m128i b) {
+ // CIR-LABEL: @test_selectq_128
+ // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i]
+ // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<2 x !cir.int<s, 1>>, !cir.vector<2 x !s64i>
+
+ // LLVM-LABEL: @test_selectq_128
+ // LLVM: select <2 x i1> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}
+
+ // OGCG-LABEL: @test_selectq_128
+ // OGCG: select <2 x i1> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}
+ return __builtin_ia32_selectq_128(k, a, b);
+}
+
+__m256i test_selectq_256(__mmask8 k, __m256i a, __m256i b) {
+ // CIR-LABEL: @test_selectq_256
+ // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]
+ // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !s64i>
+
+ // LLVM-LABEL: @test_selectq_256
+ // LLVM: select <4 x i1> %{{.+}}, <4 x i64> %{{.+}}, <4 x i64> %{{.+}}
+
+ // OGCG-LABEL: @test_selectq_256
+ // OGCG: select <4 x i1> %{{.+}}, <4 x i64> %{{.+}}, <4 x i64> %{{.+}}
+ return __builtin_ia32_selectq_256(k, a, b);
+}
+
+__m512i test_selectq_512(__mmask8 k, __m512i a, __m512i b) {
+ // CIR-LABEL: @test_selectq_512
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+
+ // LLVM-LABEL: @test_selectq_512
+ // LLVM: select <8 x i1> %{{.+}}, <8 x i64> %{{.+}}, <8 x i64> %{{.+}}
+
+ // OGCG-LABEL: @test_selectq_512
+ // OGCG: select <8 x i1> %{{.+}}, <8 x i64> %{{.+}}, <8 x i64> %{{.+}}
+ return __builtin_ia32_selectq_512(k, a, b);
+}
+
+__m128h test_selectph_128(__mmask8 k, __m128h a, __m128h b) {
+ // CIR-LABEL: @test_selectph_128
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.f16>
+
+ // LLVM-LABEL: @test_selectph_128
+ // LLVM: select <8 x i1> %{{.+}}, <8 x half> %{{.+}}, <8 x half> %{{.+}}
+
+ // OGCG-LABEL: @test_selectph_128
+ // OGCG: select <8 x i1> %{{.+}}, <8 x half> %{{.+}}, <8 x half> %{{.+}}
+ return __builtin_ia32_selectph_128(k, a, b);
+}
+
+__m256h test_selectph_256(__mmask16 k, __m256h a, __m256h b) {
+ // CIR-LABEL: @test_selectph_256
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.f16>
+
+ // LLVM-LABEL: @test_selectph_256
+ // LLVM: select <16 x i1> %{{.+}}, <16 x half> %{{.+}}, <16 x half> %{{.+}}
+
+ // OGCG-LABEL: @test_selectph_256
+ // OGCG: select <16 x i1> %{{.+}}, <16 x half> %{{.+}}, <16 x half> %{{.+}}
+ return __builtin_ia32_selectph_256(k, a, b);
+}
+
+__m512h test_selectph_512(__mmask32 k, __m512h a, __m512h b) {
+ // CIR-LABEL: @test_selectph_512
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !cir.f16>
+
+ // LLVM-LABEL: @test_selectph_512
+ // LLVM: select <32 x i1> %{{.+}}, <32 x half> %{{.+}}, <32 x half> %{{.+}}
+
+ // OGCG-LABEL: @test_selectph_512
+ // OGCG: select <32 x i1> %{{.+}}, <32 x half> %{{.+}}, <32 x half> %{{.+}}
+ return __builtin_ia32_selectph_512(k, a, b);
+}
+
+__m128bh test_selectsbf_128(__mmask8 k, __m128bh a, __m128bh b) {
+ // CIR-LABEL: @test_selectsbf_128
+ // CIR: %[[COND:.+]] = cir.cast int_to_bool %{{.+}} : !cir.int<u, 1> -> !cir.bool
+ // CIR: cir.select if %[[COND]] then %{{.+}} else %{{.+}} : (!cir.bool, !cir.bf16, !cir.bf16) -> !cir.bf16
+
+ // LLVM-LABEL: @test_selectsbf_128
+ // LLVM: select i1 %{{.+}}, bfloat %{{.+}}, bfloat %{{.+}}
+
+ // OGCG-LABEL: @test_selectsbf_128
+ // OGCG: select i1 %{{.+}}, bfloat %{{.+}}, bfloat %{{.+}}
+ return __builtin_ia32_selectsbf_128(k, a, b);
+}
+
+__m256bh test_selectpbf_256(__mmask16 k, __m256bh a, __m256bh b) {
+ // CIR-LABEL: @test_selectpbf_256
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.bf16>
+
+ // LLVM-LABEL: @test_selectpbf_256
+ // LLVM: select <16 x i1> %{{.+}}, <16 x bfloat> %{{.+}}, <16 x bfloat> %{{.+}}
+
+ // OGCG-LABEL: @test_selectpbf_256
+ // OGCG: select <16 x i1> %{{.+}}, <16 x bfloat> %{{.+}}, <16 x bfloat> %{{.+}}
+ return __builtin_ia32_selectpbf_256(k, a, b);
+}
+
+__m512bh test_selectpbf_512(__mmask32 k, __m512bh a, __m512bh b) {
+ // CIR-LABEL: @test_selectpbf_512
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !cir.bf16>
+
+ // LLVM-LABEL: @test_selectpbf_512
+ // LLVM: select <32 x i1> %{{.+}}, <32 x bfloat> %{{.+}}, <32 x bfloat> %{{.+}}
+
+ // OGCG-LABEL: @test_selectpbf_512
+ // OGCG: select <32 x i1> %{{.+}}, <32 x bfloat> %{{.+}}, <32 x bfloat> %{{.+}}
+ return __builtin_ia32_selectpbf_512(k, a, b);
+}
+
+__m128 test_selectps_128(__mmask8 k, __m128 a, __m128 b) {
+ // CIR-LABEL: @test_selectps_128
+ // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]
+ // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+
+ // LLVM-LABEL: @test_selectps_128
+ // LLVM: select <4 x i1> %{{.+}}, <4 x float> %{{.+}}, <4 x float> %{{.+}}
+
+ // OGCG-LABEL: @test_selectps_128
+ // OGCG: select <4 x i1> %{{.+}}, <4 x float> %{{.+}}, <4 x float> %{{.+}}
+ return __builtin_ia32_selectps_128(k, a, b);
+}
+
+__m256 test_selectps_256(__mmask8 k, __m256 a, __m256 b) {
+ // CIR-LABEL: @test_selectps_256
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
+
+ // LLVM-LABEL: @test_selectps_256
+ // LLVM: select <8 x i1> %{{.+}}, <8 x float> %{{.+}}, <8 x float> %{{.+}}
+
+ // OGCG-LABEL: @test_selectps_256
+ // OGCG: select <8 x i1> %{{.+}}, <8 x float> %{{.+}}, <8 x float> %{{.+}}
+ return __builtin_ia32_selectps_256(k, a, b);
+}
+
+__m512 test_selectps_512(__mmask16 k, __m512 a, __m512 b) {
+ // CIR-LABEL: @test_selectps_512
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
+
+ // LLVM-LABEL: @test_selectps_512
+ // LLVM: select <16 x i1> %{{.+}}, <16 x float> %{{.+}}, <16 x float> %{{.+}}
+
+ // OGCG-LABEL: @test_selectps_512
+ // OGCG: select <16 x i1> %{{.+}}, <16 x float> %{{.+}}, <16 x float> %{{.+}}
+ return __builtin_ia32_selectps_512(k, a, b);
+}
+
+__m128d test_selectpd_128(__mmask8 k, __m128d a, __m128d b) {
+ // CIR-LABEL: @test_selectpd_128
+ // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i]
+ // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<2 x !cir.int<s, 1>>, !cir.vector<2 x !cir.double>
+
+ // LLVM-LABEL: @test_selectpd_128
+ // LLVM: select <2 x i1> %{{.+}}, <2 x double> %{{.+}}, <2 x double> %{{.+}}
+
+ // OGCG-LABEL: @test_selectpd_128
+ // OGCG: select <2 x i1> %{{.+}}, <2 x double> %{{.+}}, <2 x double> %{{.+}}
+ return __builtin_ia32_selectpd_128(k, a, b);
+}
+
+__m256d test_selectpd_256(__mmask8 k, __m256d a, __m256d b) {
+ // CIR-LABEL: @test_selectpd_256
+ // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]
+ // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.double>
+
+ // LLVM-LABEL: @test_selectpd_256
+ // LLVM: select <4 x i1> %{{.+}}, <4 x double> %{{.+}}, <4 x double> %{{.+}}
+
+ // OGCG-LABEL: @test_selectpd_256
+ // OGCG: select <4 x i1> %{{.+}}, <4 x double> %{{.+}}, <4 x double> %{{.+}}
+ return __builtin_ia32_selectpd_256(k, a, b);
+}
+
+__m512d test_selectpd_512(__mmask8 k, __m512d a, __m512d b) {
+ // CIR-LABEL: @test_selectpd_512
+ // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.double>
+
+ // LLVM-LABEL: @test_selectpd_512
+ // LLVM: select <8 x i1> %{{.+}}, <8 x double> %{{.+}}, <8 x double> %{{.+}}
+
+ // OGCG-LABEL: @test_selectpd_512
+ // OGCG: select <8 x i1> %{{.+}}, <8 x double> %{{.+}}, <8 x double> %{{.+}}
+ return __builtin_ia32_selectpd_512(k, a, b);
+}
+
+// Scalar Selects
+
+__m128h test_selectsh_128(__mmask8 k, __m128h a, __m128h b) {
+ // CIR-LABEL: @test_selectsh_128
+ // CIR: %[[I0:.+]] = cir.const #cir.int<0> : !s64i
+ // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
+ // CIR: %[[EB:.+]] = cir.vec.extract %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
+ // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: %[[COND:.+]] = cir.cast int_to_bool %[[BIT0]] : !cir.int<u, 1> -> !cir.bool
+ // CIR: %[[SEL:.+]] = cir.select if %[[COND]] then %[[EA]] else %[[EB]]
+ // CIR: cir.vec.insert %[[SEL]], %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
+
+ // LLVM-LABEL: @test_selectsh_128
+ // LLVM: %[[E1:.+]] = extractelement <8 x half> %{{.+}}, i64 0
+ // LLVM: select i1 %{{.+}}, half %[[E1]], half %{{.+}}
+
+ // OGCG-LABEL: @test_selectsh_128
+ // OGCG: select i1 %{{.+}}, half %{{.+}}, half %{{.+}}
+ return __builtin_ia32_selectsh_128(k, a, b);
+}
+
+__m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
+ // CIR-LABEL: @test_selectss_128
+ // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !s64i] : !cir.vector<4 x !cir.float>
+ // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.select if %{{.+}} then %[[EA]] else %{{.+}} : (!cir.bool, !cir.float, !cir.float) -> !cir.float
+
+ // LLVM-LABEL: @test_selectss_128
+ // LLVM: select i1 %{{.+}}, float %{{.+}}, float %{{.+}}
+
+ // OGCG-LABEL: @test_selectss_128
+ // OGCG: select i1 %{{.+}}, float %{{.+}}, float %{{.+}}
+ return __builtin_ia32_selectss_128(k, a, b);
+}
+
+__m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
+ // CIR-LABEL: @test_selectsd_128
+ // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !s64i] : !cir.vector<2 x !cir.double>
+ // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.select if %{{.+}} then %[[EA]] else %{{.+}} : (!cir.bool, !cir.double, !cir.double) -> !cir.double
+
+ // LLVM-LABEL: @test_selectsd_128
+ // LLVM: select i1 %{{.+}}, double %{{.+}}, double %{{.+}}
+
+ // OGCG-LABEL: @test_selectsd_128
+ // OGCG: select i1 %{{.+}}, double %{{.+}}, double %{{.+}}
+ return __builtin_ia32_selectsd_128(k, a, b);
+}
\ No newline at end of file
>From 4d7721da3bf54f9a5307d8930a7852cb3cadc236 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 30 Dec 2025 16:17:01 +0000
Subject: [PATCH 2/8] Fix formatting
---
clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 3 ++-
clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c | 2 +-
2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 9b635258c96dc..9cfec71923b9f 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -508,7 +508,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
mlir::Value newElt, mlir::Value idx) {
- auto op = cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
+ auto op =
+ cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
return op.getResult();
}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
index 7ad0bf2120ee5..6620e91258870 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
@@ -354,4 +354,4 @@ __m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
// OGCG-LABEL: @test_selectsd_128
// OGCG: select i1 %{{.+}}, double %{{.+}}, double %{{.+}}
return __builtin_ia32_selectsd_128(k, a, b);
-}
\ No newline at end of file
+}
>From 4bbee5c420672c15ad6a47b95bff51caec736d0a Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Fri, 2 Jan 2026 05:31:23 +0000
Subject: [PATCH 3/8] Update CIRGenBuiltinX86.cpp and test file
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 29 ++++---------
.../X86/avx512-select-builtins.c | 42 +++++++++----------
2 files changed, 30 insertions(+), 41 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 166aef7a32bd1..c9e9f6cd017a0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -26,6 +26,7 @@
#include "clang/CIR/MissingFeatures.h"
#include "llvm/ADT/Sequence.h"
#include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
#include <string>
using namespace clang;
@@ -193,22 +194,15 @@ static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
if (c.isAllOnesValue())
return op0;
// Cast to cir::IntType to safely get the width of a CIR integer
- unsigned width = 0;
- if (auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType())) {
- width = intTy.getWidth();
- } else {
- // Fallback or handle unexpected type
- width = 8;
- }
+ auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType());
+ assert(intTy && "mask must be an integer type");
+ unsigned width = intTy.getWidth();
+
mlir::Type maskTy = cir::VectorType::get(
cir::IntType::get(builder.getContext(), 1, false), width);
- auto cirI64Ty = cir::IntType::get(builder.getContext(), 64, true);
- mlir::Value idx0 = cir::ConstantOp::create(
- builder, loc, cirI64Ty, cir::IntAttr::get(cirI64Ty, llvm::APInt(64, 0)));
-
mask = builder.createBitcast(mask, maskTy);
- mask = builder.createExtractElement(loc, mask, idx0);
+ mask = builder.createExtractElement(loc, mask, (uint64_t)0);
auto boolTy = cir::BoolType::get(builder.getContext());
mask = cir::CastOp::create(builder, loc, boolTy, cir::CastKind::int_to_bool,
mask);
@@ -1512,15 +1506,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_selectss_128:
case X86::BI__builtin_ia32_selectsd_128: {
mlir::Location loc = getLoc(expr->getExprLoc());
- auto cirI64Ty =
- cir::IntType::get(builder.getContext(), 64, /*isSigned=*/true);
- mlir::Value idx0 = cir::ConstantOp::create(
- builder, loc, cirI64Ty,
- cir::IntAttr::get(cirI64Ty, llvm::APInt(64, 0)));
- mlir::Value a = builder.createExtractElement(loc, ops[1], idx0);
- mlir::Value b = builder.createExtractElement(loc, ops[2], idx0);
+ mlir::Value a = builder.createExtractElement(loc, ops[1], (uint64_t)0);
+ mlir::Value b = builder.createExtractElement(loc, ops[2], (uint64_t)0);
a = emitX86ScalarSelect(builder, loc, ops[0], a, b);
- return builder.createInsertElement(loc, ops[1], a, idx0);
+ return builder.createInsertElement(loc, ops[1], a, (uint64_t)0);
}
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpb256_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
index 6620e91258870..11286adaf6653 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-cir -o %t.cir
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2 -fclangir -emit-cir -o %t.cir
// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-llvm -o %t.ll
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2 -fclangir -emit-llvm -o %t.ll
// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm -o %t.ll
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2 -emit-llvm -o %t.ll
// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
#include <immintrin.h>
@@ -131,7 +131,7 @@ __m128i test_selectq_128(__mmask8 k, __m128i a, __m128i b) {
// OGCG-LABEL: @test_selectq_128
// OGCG: select <2 x i1> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}
- return __builtin_ia32_selectq_128(k, a, b);
+ return __builtin_ia32_selectq_128(k, (__v2di)a, (__v2di)b);
}
__m256i test_selectq_256(__mmask8 k, __m256i a, __m256i b) {
@@ -144,7 +144,7 @@ __m256i test_selectq_256(__mmask8 k, __m256i a, __m256i b) {
// OGCG-LABEL: @test_selectq_256
// OGCG: select <4 x i1> %{{.+}}, <4 x i64> %{{.+}}, <4 x i64> %{{.+}}
- return __builtin_ia32_selectq_256(k, a, b);
+ return __builtin_ia32_selectq_256(k, (__v4di)a, (__v4di)b);
}
__m512i test_selectq_512(__mmask8 k, __m512i a, __m512i b) {
@@ -156,7 +156,7 @@ __m512i test_selectq_512(__mmask8 k, __m512i a, __m512i b) {
// OGCG-LABEL: @test_selectq_512
// OGCG: select <8 x i1> %{{.+}}, <8 x i64> %{{.+}}, <8 x i64> %{{.+}}
- return __builtin_ia32_selectq_512(k, a, b);
+ return __builtin_ia32_selectq_512(k, (__v8di)a, (__v8di)b);
}
__m128h test_selectph_128(__mmask8 k, __m128h a, __m128h b) {
@@ -168,7 +168,7 @@ __m128h test_selectph_128(__mmask8 k, __m128h a, __m128h b) {
// OGCG-LABEL: @test_selectph_128
// OGCG: select <8 x i1> %{{.+}}, <8 x half> %{{.+}}, <8 x half> %{{.+}}
- return __builtin_ia32_selectph_128(k, a, b);
+ return __builtin_ia32_selectph_128(k, (__v8hf)a, (__v8hf)b);
}
__m256h test_selectph_256(__mmask16 k, __m256h a, __m256h b) {
@@ -180,7 +180,7 @@ __m256h test_selectph_256(__mmask16 k, __m256h a, __m256h b) {
// OGCG-LABEL: @test_selectph_256
// OGCG: select <16 x i1> %{{.+}}, <16 x half> %{{.+}}, <16 x half> %{{.+}}
- return __builtin_ia32_selectph_256(k, a, b);
+ return __builtin_ia32_selectph_256(k, (__v16hf)a, (__v16hf)b);
}
__m512h test_selectph_512(__mmask32 k, __m512h a, __m512h b) {
@@ -192,7 +192,7 @@ __m512h test_selectph_512(__mmask32 k, __m512h a, __m512h b) {
// OGCG-LABEL: @test_selectph_512
// OGCG: select <32 x i1> %{{.+}}, <32 x half> %{{.+}}, <32 x half> %{{.+}}
- return __builtin_ia32_selectph_512(k, a, b);
+ return __builtin_ia32_selectph_512(k, (__v32hf)a, (__v32hf)b);
}
__m128bh test_selectsbf_128(__mmask8 k, __m128bh a, __m128bh b) {
@@ -205,7 +205,7 @@ __m128bh test_selectsbf_128(__mmask8 k, __m128bh a, __m128bh b) {
// OGCG-LABEL: @test_selectsbf_128
// OGCG: select i1 %{{.+}}, bfloat %{{.+}}, bfloat %{{.+}}
- return __builtin_ia32_selectsbf_128(k, a, b);
+ return __builtin_ia32_selectsbf_128(k, (__v8bf)a, (__v8bf)b);
}
__m256bh test_selectpbf_256(__mmask16 k, __m256bh a, __m256bh b) {
@@ -217,7 +217,7 @@ __m256bh test_selectpbf_256(__mmask16 k, __m256bh a, __m256bh b) {
// OGCG-LABEL: @test_selectpbf_256
// OGCG: select <16 x i1> %{{.+}}, <16 x bfloat> %{{.+}}, <16 x bfloat> %{{.+}}
- return __builtin_ia32_selectpbf_256(k, a, b);
+ return __builtin_ia32_selectpbf_256(k, (__v16bf)a, (__v16bf)b);
}
__m512bh test_selectpbf_512(__mmask32 k, __m512bh a, __m512bh b) {
@@ -229,7 +229,7 @@ __m512bh test_selectpbf_512(__mmask32 k, __m512bh a, __m512bh b) {
// OGCG-LABEL: @test_selectpbf_512
// OGCG: select <32 x i1> %{{.+}}, <32 x bfloat> %{{.+}}, <32 x bfloat> %{{.+}}
- return __builtin_ia32_selectpbf_512(k, a, b);
+ return __builtin_ia32_selectpbf_512(k, (__v32bf)a, (__v32bf)b);
}
__m128 test_selectps_128(__mmask8 k, __m128 a, __m128 b) {
@@ -242,7 +242,7 @@ __m128 test_selectps_128(__mmask8 k, __m128 a, __m128 b) {
// OGCG-LABEL: @test_selectps_128
// OGCG: select <4 x i1> %{{.+}}, <4 x float> %{{.+}}, <4 x float> %{{.+}}
- return __builtin_ia32_selectps_128(k, a, b);
+ return __builtin_ia32_selectps_128(k, (__v4sf)a, (__v4sf)b);
}
__m256 test_selectps_256(__mmask8 k, __m256 a, __m256 b) {
@@ -254,7 +254,7 @@ __m256 test_selectps_256(__mmask8 k, __m256 a, __m256 b) {
// OGCG-LABEL: @test_selectps_256
// OGCG: select <8 x i1> %{{.+}}, <8 x float> %{{.+}}, <8 x float> %{{.+}}
- return __builtin_ia32_selectps_256(k, a, b);
+ return __builtin_ia32_selectps_256(k, (__v8sf)a, (__v8sf)b);
}
__m512 test_selectps_512(__mmask16 k, __m512 a, __m512 b) {
@@ -266,7 +266,7 @@ __m512 test_selectps_512(__mmask16 k, __m512 a, __m512 b) {
// OGCG-LABEL: @test_selectps_512
// OGCG: select <16 x i1> %{{.+}}, <16 x float> %{{.+}}, <16 x float> %{{.+}}
- return __builtin_ia32_selectps_512(k, a, b);
+ return __builtin_ia32_selectps_512(k, (__v16sf)a, (__v16sf)b);
}
__m128d test_selectpd_128(__mmask8 k, __m128d a, __m128d b) {
@@ -279,7 +279,7 @@ __m128d test_selectpd_128(__mmask8 k, __m128d a, __m128d b) {
// OGCG-LABEL: @test_selectpd_128
// OGCG: select <2 x i1> %{{.+}}, <2 x double> %{{.+}}, <2 x double> %{{.+}}
- return __builtin_ia32_selectpd_128(k, a, b);
+ return __builtin_ia32_selectpd_128(k, (__v2df)a, (__v2df)b);
}
__m256d test_selectpd_256(__mmask8 k, __m256d a, __m256d b) {
@@ -292,7 +292,7 @@ __m256d test_selectpd_256(__mmask8 k, __m256d a, __m256d b) {
// OGCG-LABEL: @test_selectpd_256
// OGCG: select <4 x i1> %{{.+}}, <4 x double> %{{.+}}, <4 x double> %{{.+}}
- return __builtin_ia32_selectpd_256(k, a, b);
+ return __builtin_ia32_selectpd_256(k, (__v4df)a, (__v4df)b);
}
__m512d test_selectpd_512(__mmask8 k, __m512d a, __m512d b) {
@@ -304,7 +304,7 @@ __m512d test_selectpd_512(__mmask8 k, __m512d a, __m512d b) {
// OGCG-LABEL: @test_selectpd_512
// OGCG: select <8 x i1> %{{.+}}, <8 x double> %{{.+}}, <8 x double> %{{.+}}
- return __builtin_ia32_selectpd_512(k, a, b);
+ return __builtin_ia32_selectpd_512(k, (__v8df)a, (__v8df)b);
}
// Scalar Selects
@@ -325,7 +325,7 @@ __m128h test_selectsh_128(__mmask8 k, __m128h a, __m128h b) {
// OGCG-LABEL: @test_selectsh_128
// OGCG: select i1 %{{.+}}, half %{{.+}}, half %{{.+}}
- return __builtin_ia32_selectsh_128(k, a, b);
+ return __builtin_ia32_selectsh_128(k, (__v8hf)a, (__v8hf)b);
}
__m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
@@ -339,7 +339,7 @@ __m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
// OGCG-LABEL: @test_selectss_128
// OGCG: select i1 %{{.+}}, float %{{.+}}, float %{{.+}}
- return __builtin_ia32_selectss_128(k, a, b);
+ return __builtin_ia32_selectss_128(k, (__v4sf)a, (__v4sf)b);
}
__m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
@@ -353,5 +353,5 @@ __m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
// OGCG-LABEL: @test_selectsd_128
// OGCG: select i1 %{{.+}}, double %{{.+}}, double %{{.+}}
- return __builtin_ia32_selectsd_128(k, a, b);
+ return __builtin_ia32_selectsd_128(k, (__v2df)a, (__v2df)b);
}
>From daadcc542c7d3980ff07adf6d31488eaa0665244 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 5 Jan 2026 11:24:43 +0000
Subject: [PATCH 4/8] Update CIRGenBuiltinX86.cpp
---
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 19 ++++++++++++-------
1 file changed, 12 insertions(+), 7 deletions(-)
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index c9e9f6cd017a0..b0f81de5d828d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -198,15 +198,20 @@ static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
assert(intTy && "mask must be an integer type");
unsigned width = intTy.getWidth();
- mlir::Type maskTy = cir::VectorType::get(
- cir::IntType::get(builder.getContext(), 1, false), width);
+ auto i1Ty = builder.getSIntNTy(1);
+ auto maskVecTy = cir::VectorType::get(i1Ty, width);
- mask = builder.createBitcast(mask, maskTy);
- mask = builder.createExtractElement(loc, mask, (uint64_t)0);
+ mlir::Value maskVec = builder.createBitcast(mask, maskVecTy);
+
+ // Extract bit 0 from the mask vector
+ mlir::Value bit0 = builder.createExtractElement(loc, maskVec, (uint64_t)0);
+
+ // Convert i1 to bool for select
auto boolTy = cir::BoolType::get(builder.getContext());
- mask = cir::CastOp::create(builder, loc, boolTy, cir::CastKind::int_to_bool,
- mask);
- return builder.createSelect(loc, mask, op0, op1);
+ mlir::Value cond = cir::CastOp::create(builder, loc, boolTy,
+ cir::CastKind::int_to_bool, bit0);
+
+ return builder.createSelect(loc, cond, op0, op1);
}
static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
>From ec7e649dc8b2d660798c136dfbdea6fe03ea8329 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 5 Jan 2026 17:03:48 +0000
Subject: [PATCH 5/8] Update
---
.../CIR/Dialect/Builder/CIRBaseBuilder.h | 6 +--
clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 38 +++++++++++++------
.../X86/avx512-select-builtins.c | 20 +++++-----
3 files changed, 38 insertions(+), 26 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 9cfec71923b9f..165b89a4a15b9 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -501,8 +501,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
uint64_t idx) {
- auto idxVal = getConstAPInt(loc, mlir::IntegerType::get(getContext(), 64),
- llvm::APInt(64, idx));
+ auto idxVal = getConstAPInt(loc, getUIntNTy(64), llvm::APInt(64, idx));
return createExtractElement(loc, vec, idxVal);
}
@@ -515,8 +514,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
mlir::Value newElt, uint64_t idx) {
- auto idxVal = getConstAPInt(loc, mlir::IntegerType::get(getContext(), 64),
- llvm::APInt(64, idx));
+ auto idxVal = getConstAPInt(loc, getUIntNTy(64), llvm::APInt(64, idx));
return createInsertElement(loc, vec, newElt, idxVal);
}
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index b0f81de5d828d..50cc0297d28ed 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -193,25 +193,43 @@ static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
if (auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(mask.getDefiningOp()))
if (c.isAllOnesValue())
return op0;
- // Cast to cir::IntType to safely get the width of a CIR integer
+
+ // Extract the scalar values from the vector operands
+ auto vecTy0 = mlir::dyn_cast<cir::VectorType>(op0.getType());
+ auto vecTy1 = mlir::dyn_cast<cir::VectorType>(op1.getType());
+
+ mlir::Value scalar0 = op0;
+ mlir::Value scalar1 = op1;
+
+ if (vecTy0)
+ scalar0 = builder.createExtractElement(loc, op0, uint64_t(0));
+
+ if (vecTy1)
+ scalar1 = builder.createExtractElement(loc, op1, uint64_t(0));
+
+ // Get the mask as a vector of i1 and extract bit 0
auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType());
assert(intTy && "mask must be an integer type");
unsigned width = intTy.getWidth();
- auto i1Ty = builder.getSIntNTy(1);
+ auto i1Ty = builder.getUIntNTy(1);
auto maskVecTy = cir::VectorType::get(i1Ty, width);
-
mlir::Value maskVec = builder.createBitcast(mask, maskVecTy);
// Extract bit 0 from the mask vector
- mlir::Value bit0 = builder.createExtractElement(loc, maskVec, (uint64_t)0);
+ mlir::Value bit0 = builder.createExtractElement(loc, maskVec, uint64_t(0));
// Convert i1 to bool for select
auto boolTy = cir::BoolType::get(builder.getContext());
mlir::Value cond = cir::CastOp::create(builder, loc, boolTy,
cir::CastKind::int_to_bool, bit0);
- return builder.createSelect(loc, cond, op0, op1);
+ mlir::Value result = builder.createSelect(loc, cond, scalar0, scalar1);
+
+ if (vecTy0)
+ result = builder.createInsertElement(loc, op0, result, uint64_t(0));
+
+ return result;
}
static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
@@ -1509,13 +1527,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
case X86::BI__builtin_ia32_selectsh_128:
case X86::BI__builtin_ia32_selectsbf_128:
case X86::BI__builtin_ia32_selectss_128:
- case X86::BI__builtin_ia32_selectsd_128: {
- mlir::Location loc = getLoc(expr->getExprLoc());
- mlir::Value a = builder.createExtractElement(loc, ops[1], (uint64_t)0);
- mlir::Value b = builder.createExtractElement(loc, ops[2], (uint64_t)0);
- a = emitX86ScalarSelect(builder, loc, ops[0], a, b);
- return builder.createInsertElement(loc, ops[1], a, (uint64_t)0);
- }
+ case X86::BI__builtin_ia32_selectsd_128:
+ return emitX86ScalarSelect(builder, getLoc(expr->getExprLoc()), ops[0],
+ ops[1], ops[2]);
case X86::BI__builtin_ia32_cmpb128_mask:
case X86::BI__builtin_ia32_cmpb256_mask:
case X86::BI__builtin_ia32_cmpb512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
index 11286adaf6653..10d6e8baea4da 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
@@ -197,7 +197,8 @@ __m512h test_selectph_512(__mmask32 k, __m512h a, __m512h b) {
__m128bh test_selectsbf_128(__mmask8 k, __m128bh a, __m128bh b) {
// CIR-LABEL: @test_selectsbf_128
- // CIR: %[[COND:.+]] = cir.cast int_to_bool %{{.+}} : !cir.int<u, 1> -> !cir.bool
+ // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: %[[COND:.+]] = cir.cast int_to_bool %[[BIT0]] : !cir.int<u, 1> -> !cir.bool
// CIR: cir.select if %[[COND]] then %{{.+}} else %{{.+}} : (!cir.bool, !cir.bf16, !cir.bf16) -> !cir.bf16
// LLVM-LABEL: @test_selectsbf_128
@@ -311,13 +312,12 @@ __m512d test_selectpd_512(__mmask8 k, __m512d a, __m512d b) {
__m128h test_selectsh_128(__mmask8 k, __m128h a, __m128h b) {
// CIR-LABEL: @test_selectsh_128
- // CIR: %[[I0:.+]] = cir.const #cir.int<0> : !s64i
- // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
- // CIR: %[[EB:.+]] = cir.vec.extract %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
- // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.f16>
+ // CIR: %[[EB:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.f16>
+ // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.int<u, 1>>
// CIR: %[[COND:.+]] = cir.cast int_to_bool %[[BIT0]] : !cir.int<u, 1> -> !cir.bool
// CIR: %[[SEL:.+]] = cir.select if %[[COND]] then %[[EA]] else %[[EB]]
- // CIR: cir.vec.insert %[[SEL]], %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
+ // CIR: cir.vec.insert %[[SEL]], %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.f16>
// LLVM-LABEL: @test_selectsh_128
// LLVM: %[[E1:.+]] = extractelement <8 x half> %{{.+}}, i64 0
@@ -330,8 +330,8 @@ __m128h test_selectsh_128(__mmask8 k, __m128h a, __m128h b) {
__m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
// CIR-LABEL: @test_selectss_128
- // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !s64i] : !cir.vector<4 x !cir.float>
- // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !u64i] : !cir.vector<4 x !cir.float>
+ // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.int<u, 1>>
// CIR: cir.select if %{{.+}} then %[[EA]] else %{{.+}} : (!cir.bool, !cir.float, !cir.float) -> !cir.float
// LLVM-LABEL: @test_selectss_128
@@ -344,8 +344,8 @@ __m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
__m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
// CIR-LABEL: @test_selectsd_128
- // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !s64i] : !cir.vector<2 x !cir.double>
- // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !u64i] : !cir.vector<2 x !cir.double>
+ // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.int<u, 1>>
// CIR: cir.select if %{{.+}} then %[[EA]] else %{{.+}} : (!cir.bool, !cir.double, !cir.double) -> !cir.double
// LLVM-LABEL: @test_selectsd_128
>From a3fc7bd7fbea6f2682ab6ba59a33e4b353043807 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 6 Jan 2026 00:26:03 +0530
Subject: [PATCH 6/8] Update
clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 165b89a4a15b9..d94df879f06e4 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -495,8 +495,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
mlir::Value idx) {
auto vecTy = mlir::cast<cir::VectorType>(vec.getType());
mlir::Type eltTy = vecTy.getElementType();
- auto op = cir::VecExtractOp::create(*this, loc, eltTy, vec, idx);
- return op.getResult();
+ return cir::VecExtractOp::create(*this, loc, eltTy, vec, idx);
}
mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
>From ee7a87e70a51e89c4cd1cca5860ca26f99504111 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 6 Jan 2026 00:26:52 +0530
Subject: [PATCH 7/8] Update
clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index d94df879f06e4..cf5cc364b4f92 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -500,7 +500,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
uint64_t idx) {
- auto idxVal = getConstAPInt(loc, getUIntNTy(64), llvm::APInt(64, idx));
+ mlir::Value idxVal = getConstAPInt(loc, getUIntNTy(64), llvm::APInt(64, idx));
return createExtractElement(loc, vec, idxVal);
}
>From e165a1782fb1f827c60d046adade057c872d2af2 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 6 Jan 2026 00:27:13 +0530
Subject: [PATCH 8/8] Update
clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index cf5cc364b4f92..40fbeec5f8b55 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -506,9 +506,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
mlir::Value newElt, mlir::Value idx) {
- auto op =
- cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
- return op.getResult();
+ return cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
}
mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
More information about the cfe-commits
mailing list