[clang] [CIR][X86]Implement handling for Select/Selectsh builtins in CIR (PR #174003)

Mon Jan 5 10:57:25 PST 2026

https://github.com/Priyanshu3820 updated https://github.com/llvm/llvm-project/pull/174003

>From ee0486aa3fbd4a86722b1c5a503b56efa83c51d4 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 30 Dec 2025 16:08:45 +0000
Subject: [PATCH 1/8] Implement handling for Select/Selectsh builtins

---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      |  32 ++
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  46 ++-
 .../X86/avx512-select-builtins.c              | 357 ++++++++++++++++++
 3 files changed, 434 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index cc28941aaa079..9b635258c96dc 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -487,6 +487,38 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     return createAddrSpaceCast(src.getLoc(), src, newTy);
   }
 
+  //===--------------------------------------------------------------------===//
+  // Other Instructions
+  //===--------------------------------------------------------------------===//
+
+  mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
+                                   mlir::Value idx) {
+    auto vecTy = mlir::cast<cir::VectorType>(vec.getType());
+    mlir::Type eltTy = vecTy.getElementType();
+    auto op = cir::VecExtractOp::create(*this, loc, eltTy, vec, idx);
+    return op.getResult();
+  }
+
+  mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
+                                   uint64_t idx) {
+    auto idxVal = getConstAPInt(loc, mlir::IntegerType::get(getContext(), 64),
+                                llvm::APInt(64, idx));
+    return createExtractElement(loc, vec, idxVal);
+  }
+
+  mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
+                                  mlir::Value newElt, mlir::Value idx) {
+    auto op = cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
+    return op.getResult();
+  }
+
+  mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
+                                  mlir::Value newElt, uint64_t idx) {
+    auto idxVal = getConstAPInt(loc, mlir::IntegerType::get(getContext(), 64),
+                                llvm::APInt(64, idx));
+    return createInsertElement(loc, vec, newElt, idxVal);
+  }
+
   //===--------------------------------------------------------------------===//
   // Binary Operators
   //===--------------------------------------------------------------------===//
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..166aef7a32bd1 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -184,6 +184,37 @@ static mlir::Value emitX86Select(CIRGenBuilderTy &builder, mlir::Location loc,
   return cir::VecTernaryOp::create(builder, loc, mask, op0, op1);
 }
 
+static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
+                                       mlir::Location loc, mlir::Value mask,
+                                       mlir::Value op0, mlir::Value op1) {
+
+  // If the mask is all ones just return first argument.
+  if (auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(mask.getDefiningOp()))
+    if (c.isAllOnesValue())
+      return op0;
+  // Cast to cir::IntType to safely get the width of a CIR integer
+  unsigned width = 0;
+  if (auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType())) {
+    width = intTy.getWidth();
+  } else {
+    // Fallback or handle unexpected type
+    width = 8;
+  }
+  mlir::Type maskTy = cir::VectorType::get(
+      cir::IntType::get(builder.getContext(), 1, false), width);
+
+  auto cirI64Ty = cir::IntType::get(builder.getContext(), 64, true);
+  mlir::Value idx0 = cir::ConstantOp::create(
+      builder, loc, cirI64Ty, cir::IntAttr::get(cirI64Ty, llvm::APInt(64, 0)));
+
+  mask = builder.createBitcast(mask, maskTy);
+  mask = builder.createExtractElement(loc, mask, idx0);
+  auto boolTy = cir::BoolType::get(builder.getContext());
+  mask = cir::CastOp::create(builder, loc, boolTy, cir::CastKind::int_to_bool,
+                             mask);
+  return builder.createSelect(loc, mask, op0, op1);
+}
+
 static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
                                        mlir::Location loc,
                                        const std::string &intrinsicName,
@@ -1474,10 +1505,23 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_selectpd_128:
   case X86::BI__builtin_ia32_selectpd_256:
   case X86::BI__builtin_ia32_selectpd_512:
+    return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[0], ops[1],
+                         ops[2]);
   case X86::BI__builtin_ia32_selectsh_128:
   case X86::BI__builtin_ia32_selectsbf_128:
   case X86::BI__builtin_ia32_selectss_128:
-  case X86::BI__builtin_ia32_selectsd_128:
+  case X86::BI__builtin_ia32_selectsd_128: {
+    mlir::Location loc = getLoc(expr->getExprLoc());
+    auto cirI64Ty =
+        cir::IntType::get(builder.getContext(), 64, /*isSigned=*/true);
+    mlir::Value idx0 = cir::ConstantOp::create(
+        builder, loc, cirI64Ty,
+        cir::IntAttr::get(cirI64Ty, llvm::APInt(64, 0)));
+    mlir::Value a = builder.createExtractElement(loc, ops[1], idx0);
+    mlir::Value b = builder.createExtractElement(loc, ops[2], idx0);
+    a = emitX86ScalarSelect(builder, loc, ops[0], a, b);
+    return builder.createInsertElement(loc, ops[1], a, idx0);
+  }
   case X86::BI__builtin_ia32_cmpb128_mask:
   case X86::BI__builtin_ia32_cmpb256_mask:
   case X86::BI__builtin_ia32_cmpb512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
new file mode 100644
index 0000000000000..7ad0bf2120ee5
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
@@ -0,0 +1,357 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-cir -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+
+__m128i test_selectb_128(__mmask16 k, __m128i a, __m128i b) {
+  // CIR-LABEL: @test_selectb_128
+  // CIR: %[[MASK_BC:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_BC]], %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s8i>
+
+  // LLVM-LABEL: @test_selectb_128
+  // LLVM: select <16 x i1> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}
+
+  // OGCG-LABEL: @test_selectb_128
+  // OGCG: select <16 x i1> %{{.+}}, <16 x i8> %{{.+}}, <16 x i8> %{{.+}}
+  return (__m128i)__builtin_ia32_selectb_128(k, (__v16qi)a, (__v16qi)b);
+}
+
+__m256i test_selectb_256(__mmask32 k, __m256i a, __m256i b) {
+  // CIR-LABEL: @test_selectb_256
+  // CIR: %[[MASK_BC:.+]] = cir.cast bitcast %{{.+}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_BC]], %{{.+}}, %{{.+}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+
+  // LLVM-LABEL: @test_selectb_256
+  // LLVM: select <32 x i1> %{{.+}}, <32 x i8> %{{.+}}, <32 x i8> %{{.+}}
+
+  // OGCG-LABEL: @test_selectb_256
+  // OGCG: select <32 x i1> %{{.+}}, <32 x i8> %{{.+}}, <32 x i8> %{{.+}}
+  return (__m256i)__builtin_ia32_selectb_256(k, (__v32qi)a, (__v32qi)b);
+}
+
+__m512i test_selectb_512(__mmask64 k, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_selectb_512
+  // CIR: %[[MASK_BC:.+]] = cir.cast bitcast %{{.+}} : !u64i -> !cir.vector<64 x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_BC]], %{{.+}}, %{{.+}}) : !cir.vector<64 x !cir.int<s, 1>>, !cir.vector<64 x !s8i>
+
+  // LLVM-LABEL: @test_selectb_512
+  // LLVM: select <64 x i1> %{{.+}}, <64 x i8> %{{.+}}, <64 x i8> %{{.+}}
+
+  // OGCG-LABEL: @test_selectb_512
+  // OGCG: select <64 x i1> %{{.+}}, <64 x i8> %{{.+}}, <64 x i8> %{{.+}}
+  return (__m512i)__builtin_ia32_selectb_512(k, (__v64qi)a, (__v64qi)b);
+}
+
+__m128i test_selectw_128(__mmask8 k, __m128i a, __m128i b) {
+  // CIR-LABEL: @test_selectw_128
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s16i>
+
+  // LLVM-LABEL: @test_selectw_128
+  // LLVM: select <8 x i1> %{{.+}}, <8 x i16> %{{.+}}, <8 x i16> %{{.+}}
+
+  // OGCG-LABEL: @test_selectw_128
+  // OGCG: select <8 x i1> %{{.+}}, <8 x i16> %{{.+}}, <8 x i16> %{{.+}}
+  return (__m128i)__builtin_ia32_selectw_128(k, (__v8hi)a, (__v8hi)b);
+}
+
+__m256i test_selectw_256(__mmask16 k, __m256i a, __m256i b) {
+  // CIR-LABEL: @test_selectw_256
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s16i>
+
+  // LLVM-LABEL: @test_selectw_256
+  // LLVM: select <16 x i1> %{{.+}}, <16 x i16> %{{.+}}, <16 x i16> %{{.+}}
+
+  // OGCG-LABEL: @test_selectw_256
+  // OGCG: select <16 x i1> %{{.+}}, <16 x i16> %{{.+}}, <16 x i16> %{{.+}}
+  return (__m256i)__builtin_ia32_selectw_256(k, (__v16hi)a, (__v16hi)b);
+}
+
+__m512i test_selectw_512(__mmask32 k, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_selectw_512
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s16i>
+
+  // LLVM-LABEL: @test_selectw_512
+  // LLVM: select <32 x i1> %{{.+}}, <32 x i16> %{{.+}}, <32 x i16> %{{.+}}
+
+  // OGCG-LABEL: @test_selectw_512
+  // OGCG: select <32 x i1> %{{.+}}, <32 x i16> %{{.+}}, <32 x i16> %{{.+}}
+  return (__m512i)__builtin_ia32_selectw_512(k, (__v32hi)a, (__v32hi)b);
+}
+
+__m128i test_selectd_128(__mmask8 k, __m128i a, __m128i b) {
+  // CIR-LABEL: @test_selectd_128
+  // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]
+  // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !s32i>
+
+  // LLVM-LABEL: @test_selectd_128
+  // LLVM: shufflevector <8 x i1> %{{.+}}, <8 x i1> %{{.+}}, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: select <4 x i1> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> %{{.+}}
+
+  // OGCG-LABEL: @test_selectd_128
+  // OGCG: select <4 x i1> %{{.+}}, <4 x i32> %{{.+}}, <4 x i32> %{{.+}}
+  return (__m128i)__builtin_ia32_selectd_128(k, (__v4si)a, (__v4si)b);
+}
+
+__m256i test_selectd_256(__mmask8 k, __m256i a, __m256i b) {
+  // CIR-LABEL: @test_selectd_256
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i>
+
+  // LLVM-LABEL: @test_selectd_256
+  // LLVM: select <8 x i1> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> %{{.+}}
+
+  // OGCG-LABEL: @test_selectd_256
+  // OGCG: select <8 x i1> %{{.+}}, <8 x i32> %{{.+}}, <8 x i32> %{{.+}}
+  return (__m256i)__builtin_ia32_selectd_256(k, (__v8si)a, (__v8si)b);
+}
+
+__m512i test_selectd_512(__mmask16 k, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_selectd_512
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !s32i>
+
+  // LLVM-LABEL: @test_selectd_512
+  // LLVM: select <16 x i1> %{{.+}}, <16 x i32> %{{.+}}, <16 x i32> %{{.+}}
+
+  // OGCG-LABEL: @test_selectd_512
+  // OGCG: select <16 x i1> %{{.+}}, <16 x i32> %{{.+}}, <16 x i32> %{{.+}}
+  return (__m512i)__builtin_ia32_selectd_512(k, (__v16si)a, (__v16si)b);
+}
+
+__m128i test_selectq_128(__mmask8 k, __m128i a, __m128i b) {
+  // CIR-LABEL: @test_selectq_128
+  // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i]
+  // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<2 x !cir.int<s, 1>>, !cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: @test_selectq_128
+  // LLVM: select <2 x i1> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}
+
+  // OGCG-LABEL: @test_selectq_128
+  // OGCG: select <2 x i1> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}
+  return __builtin_ia32_selectq_128(k, a, b);
+}
+
+__m256i test_selectq_256(__mmask8 k, __m256i a, __m256i b) {
+  // CIR-LABEL: @test_selectq_256
+  // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]
+  // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !s64i>
+
+  // LLVM-LABEL: @test_selectq_256
+  // LLVM: select <4 x i1> %{{.+}}, <4 x i64> %{{.+}}, <4 x i64> %{{.+}}
+
+  // OGCG-LABEL: @test_selectq_256
+  // OGCG: select <4 x i1> %{{.+}}, <4 x i64> %{{.+}}, <4 x i64> %{{.+}}
+  return __builtin_ia32_selectq_256(k, a, b);
+}
+
+__m512i test_selectq_512(__mmask8 k, __m512i a, __m512i b) {
+  // CIR-LABEL: @test_selectq_512
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s64i>
+
+  // LLVM-LABEL: @test_selectq_512
+  // LLVM: select <8 x i1> %{{.+}}, <8 x i64> %{{.+}}, <8 x i64> %{{.+}}
+
+  // OGCG-LABEL: @test_selectq_512
+  // OGCG: select <8 x i1> %{{.+}}, <8 x i64> %{{.+}}, <8 x i64> %{{.+}}
+  return __builtin_ia32_selectq_512(k, a, b);
+}
+
+__m128h test_selectph_128(__mmask8 k, __m128h a, __m128h b) {
+  // CIR-LABEL: @test_selectph_128
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.f16>
+
+  // LLVM-LABEL: @test_selectph_128
+  // LLVM: select <8 x i1> %{{.+}}, <8 x half> %{{.+}}, <8 x half> %{{.+}}
+
+  // OGCG-LABEL: @test_selectph_128
+  // OGCG: select <8 x i1> %{{.+}}, <8 x half> %{{.+}}, <8 x half> %{{.+}}
+  return __builtin_ia32_selectph_128(k, a, b);
+}
+
+__m256h test_selectph_256(__mmask16 k, __m256h a, __m256h b) {
+  // CIR-LABEL: @test_selectph_256
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.f16>
+
+  // LLVM-LABEL: @test_selectph_256
+  // LLVM: select <16 x i1> %{{.+}}, <16 x half> %{{.+}}, <16 x half> %{{.+}}
+
+  // OGCG-LABEL: @test_selectph_256
+  // OGCG: select <16 x i1> %{{.+}}, <16 x half> %{{.+}}, <16 x half> %{{.+}}
+  return __builtin_ia32_selectph_256(k, a, b);
+}
+
+__m512h test_selectph_512(__mmask32 k, __m512h a, __m512h b) {
+  // CIR-LABEL: @test_selectph_512
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !cir.f16>
+
+  // LLVM-LABEL: @test_selectph_512
+  // LLVM: select <32 x i1> %{{.+}}, <32 x half> %{{.+}}, <32 x half> %{{.+}}
+
+  // OGCG-LABEL: @test_selectph_512
+  // OGCG: select <32 x i1> %{{.+}}, <32 x half> %{{.+}}, <32 x half> %{{.+}}
+  return __builtin_ia32_selectph_512(k, a, b);
+}
+
+__m128bh test_selectsbf_128(__mmask8 k, __m128bh a, __m128bh b) {
+  // CIR-LABEL: @test_selectsbf_128
+  // CIR: %[[COND:.+]] = cir.cast int_to_bool %{{.+}} : !cir.int<u, 1> -> !cir.bool
+  // CIR: cir.select if %[[COND]] then %{{.+}} else %{{.+}} : (!cir.bool, !cir.bf16, !cir.bf16) -> !cir.bf16
+
+  // LLVM-LABEL: @test_selectsbf_128
+  // LLVM: select i1 %{{.+}}, bfloat %{{.+}}, bfloat %{{.+}}
+
+  // OGCG-LABEL: @test_selectsbf_128
+  // OGCG: select i1 %{{.+}}, bfloat %{{.+}}, bfloat %{{.+}}
+  return __builtin_ia32_selectsbf_128(k, a, b);
+}
+
+__m256bh test_selectpbf_256(__mmask16 k, __m256bh a, __m256bh b) {
+  // CIR-LABEL: @test_selectpbf_256
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.bf16>
+
+  // LLVM-LABEL: @test_selectpbf_256
+  // LLVM: select <16 x i1> %{{.+}}, <16 x bfloat> %{{.+}}, <16 x bfloat> %{{.+}}
+
+  // OGCG-LABEL: @test_selectpbf_256
+  // OGCG: select <16 x i1> %{{.+}}, <16 x bfloat> %{{.+}}, <16 x bfloat> %{{.+}}
+  return __builtin_ia32_selectpbf_256(k, a, b);
+}
+
+__m512bh test_selectpbf_512(__mmask32 k, __m512bh a, __m512bh b) {
+  // CIR-LABEL: @test_selectpbf_512
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !cir.bf16>
+
+  // LLVM-LABEL: @test_selectpbf_512
+  // LLVM: select <32 x i1> %{{.+}}, <32 x bfloat> %{{.+}}, <32 x bfloat> %{{.+}}
+
+  // OGCG-LABEL: @test_selectpbf_512
+  // OGCG: select <32 x i1> %{{.+}}, <32 x bfloat> %{{.+}}, <32 x bfloat> %{{.+}}
+  return __builtin_ia32_selectpbf_512(k, a, b);
+}
+
+__m128 test_selectps_128(__mmask8 k, __m128 a, __m128 b) {
+  // CIR-LABEL: @test_selectps_128
+  // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]
+  // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.float>
+
+  // LLVM-LABEL: @test_selectps_128
+  // LLVM: select <4 x i1> %{{.+}}, <4 x float> %{{.+}}, <4 x float> %{{.+}}
+
+  // OGCG-LABEL: @test_selectps_128
+  // OGCG: select <4 x i1> %{{.+}}, <4 x float> %{{.+}}, <4 x float> %{{.+}}
+  return __builtin_ia32_selectps_128(k, a, b);
+}
+
+__m256 test_selectps_256(__mmask8 k, __m256 a, __m256 b) {
+  // CIR-LABEL: @test_selectps_256
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: @test_selectps_256
+  // LLVM: select <8 x i1> %{{.+}}, <8 x float> %{{.+}}, <8 x float> %{{.+}}
+
+  // OGCG-LABEL: @test_selectps_256
+  // OGCG: select <8 x i1> %{{.+}}, <8 x float> %{{.+}}, <8 x float> %{{.+}}
+  return __builtin_ia32_selectps_256(k, a, b);
+}
+
+__m512 test_selectps_512(__mmask16 k, __m512 a, __m512 b) {
+  // CIR-LABEL: @test_selectps_512
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<16 x !cir.int<s, 1>>, !cir.vector<16 x !cir.float>
+
+  // LLVM-LABEL: @test_selectps_512
+  // LLVM: select <16 x i1> %{{.+}}, <16 x float> %{{.+}}, <16 x float> %{{.+}}
+
+  // OGCG-LABEL: @test_selectps_512
+  // OGCG: select <16 x i1> %{{.+}}, <16 x float> %{{.+}}, <16 x float> %{{.+}}
+  return __builtin_ia32_selectps_512(k, a, b);
+}
+
+__m128d test_selectpd_128(__mmask8 k, __m128d a, __m128d b) {
+  // CIR-LABEL: @test_selectpd_128
+  // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i]
+  // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<2 x !cir.int<s, 1>>, !cir.vector<2 x !cir.double>
+
+  // LLVM-LABEL: @test_selectpd_128
+  // LLVM: select <2 x i1> %{{.+}}, <2 x double> %{{.+}}, <2 x double> %{{.+}}
+
+  // OGCG-LABEL: @test_selectpd_128
+  // OGCG: select <2 x i1> %{{.+}}, <2 x double> %{{.+}}, <2 x double> %{{.+}}
+  return __builtin_ia32_selectpd_128(k, a, b);
+}
+
+__m256d test_selectpd_256(__mmask8 k, __m256d a, __m256d b) {
+  // CIR-LABEL: @test_selectpd_256
+  // CIR: %[[M_SHUF:.+]] = cir.vec.shuffle(%{{.+}}, %{{.+}} : !cir.vector<8 x !cir.int<s, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i]
+  // CIR: cir.vec.ternary(%[[M_SHUF]], %{{.+}}, %{{.+}}) : !cir.vector<4 x !cir.int<s, 1>>, !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: @test_selectpd_256
+  // LLVM: select <4 x i1> %{{.+}}, <4 x double> %{{.+}}, <4 x double> %{{.+}}
+
+  // OGCG-LABEL: @test_selectpd_256
+  // OGCG: select <4 x i1> %{{.+}}, <4 x double> %{{.+}}, <4 x double> %{{.+}}
+  return __builtin_ia32_selectpd_256(k, a, b);
+}
+
+__m512d test_selectpd_512(__mmask8 k, __m512d a, __m512d b) {
+  // CIR-LABEL: @test_selectpd_512
+  // CIR: cir.vec.ternary(%{{.+}}, %{{.+}}, %{{.+}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !cir.double>
+
+  // LLVM-LABEL: @test_selectpd_512
+  // LLVM: select <8 x i1> %{{.+}}, <8 x double> %{{.+}}, <8 x double> %{{.+}}
+
+  // OGCG-LABEL: @test_selectpd_512
+  // OGCG: select <8 x i1> %{{.+}}, <8 x double> %{{.+}}, <8 x double> %{{.+}}
+  return __builtin_ia32_selectpd_512(k, a, b);
+}
+
+// Scalar Selects 
+
+__m128h test_selectsh_128(__mmask8 k, __m128h a, __m128h b) {
+  // CIR-LABEL: @test_selectsh_128
+  // CIR: %[[I0:.+]] = cir.const #cir.int<0> : !s64i
+  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
+  // CIR: %[[EB:.+]] = cir.vec.extract %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
+  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+  // CIR: %[[COND:.+]] = cir.cast int_to_bool %[[BIT0]] : !cir.int<u, 1> -> !cir.bool
+  // CIR: %[[SEL:.+]] = cir.select if %[[COND]] then %[[EA]] else %[[EB]]
+  // CIR: cir.vec.insert %[[SEL]], %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
+
+  // LLVM-LABEL: @test_selectsh_128
+  // LLVM: %[[E1:.+]] = extractelement <8 x half> %{{.+}}, i64 0
+  // LLVM: select i1 %{{.+}}, half %[[E1]], half %{{.+}}
+
+  // OGCG-LABEL: @test_selectsh_128
+  // OGCG: select i1 %{{.+}}, half %{{.+}}, half %{{.+}}
+  return __builtin_ia32_selectsh_128(k, a, b);
+}
+
+__m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
+  // CIR-LABEL: @test_selectss_128
+  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !s64i] : !cir.vector<4 x !cir.float>
+  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+  // CIR: cir.select if %{{.+}} then %[[EA]] else %{{.+}} : (!cir.bool, !cir.float, !cir.float) -> !cir.float
+
+  // LLVM-LABEL: @test_selectss_128
+  // LLVM: select i1 %{{.+}}, float %{{.+}}, float %{{.+}}
+
+  // OGCG-LABEL: @test_selectss_128
+  // OGCG: select i1 %{{.+}}, float %{{.+}}, float %{{.+}}
+  return __builtin_ia32_selectss_128(k, a, b);
+}
+
+__m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
+  // CIR-LABEL: @test_selectsd_128
+  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !s64i] : !cir.vector<2 x !cir.double>
+  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+  // CIR: cir.select if %{{.+}} then %[[EA]] else %{{.+}} : (!cir.bool, !cir.double, !cir.double) -> !cir.double
+
+  // LLVM-LABEL: @test_selectsd_128
+  // LLVM: select i1 %{{.+}}, double %{{.+}}, double %{{.+}}
+
+  // OGCG-LABEL: @test_selectsd_128
+  // OGCG: select i1 %{{.+}}, double %{{.+}}, double %{{.+}}
+  return __builtin_ia32_selectsd_128(k, a, b);
+}
\ No newline at end of file

>From 4d7721da3bf54f9a5307d8930a7852cb3cadc236 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 30 Dec 2025 16:17:01 +0000
Subject: [PATCH 2/8] Fix formatting

---
 clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h    | 3 ++-
 clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 9b635258c96dc..9cfec71923b9f 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -508,7 +508,8 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
 
   mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
                                   mlir::Value newElt, mlir::Value idx) {
-    auto op = cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
+    auto op =
+        cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
     return op.getResult();
   }
 
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
index 7ad0bf2120ee5..6620e91258870 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
@@ -354,4 +354,4 @@ __m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
   // OGCG-LABEL: @test_selectsd_128
   // OGCG: select i1 %{{.+}}, double %{{.+}}, double %{{.+}}
   return __builtin_ia32_selectsd_128(k, a, b);
-}
\ No newline at end of file
+}

>From 4bbee5c420672c15ad6a47b95bff51caec736d0a Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Fri, 2 Jan 2026 05:31:23 +0000
Subject: [PATCH 3/8] Update CIRGenBuiltinX86.cpp and test file

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 29 ++++---------
 .../X86/avx512-select-builtins.c              | 42 +++++++++----------
 2 files changed, 30 insertions(+), 41 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 166aef7a32bd1..c9e9f6cd017a0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -26,6 +26,7 @@
 #include "clang/CIR/MissingFeatures.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/Support/ErrorHandling.h"
+#include <cstdint>
 #include <string>
 
 using namespace clang;
@@ -193,22 +194,15 @@ static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
     if (c.isAllOnesValue())
       return op0;
   // Cast to cir::IntType to safely get the width of a CIR integer
-  unsigned width = 0;
-  if (auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType())) {
-    width = intTy.getWidth();
-  } else {
-    // Fallback or handle unexpected type
-    width = 8;
-  }
+  auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType());
+  assert(intTy && "mask must be an integer type");
+  unsigned width = intTy.getWidth();
+
   mlir::Type maskTy = cir::VectorType::get(
       cir::IntType::get(builder.getContext(), 1, false), width);
 
-  auto cirI64Ty = cir::IntType::get(builder.getContext(), 64, true);
-  mlir::Value idx0 = cir::ConstantOp::create(
-      builder, loc, cirI64Ty, cir::IntAttr::get(cirI64Ty, llvm::APInt(64, 0)));
-
   mask = builder.createBitcast(mask, maskTy);
-  mask = builder.createExtractElement(loc, mask, idx0);
+  mask = builder.createExtractElement(loc, mask, (uint64_t)0);
   auto boolTy = cir::BoolType::get(builder.getContext());
   mask = cir::CastOp::create(builder, loc, boolTy, cir::CastKind::int_to_bool,
                              mask);
@@ -1512,15 +1506,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_selectss_128:
   case X86::BI__builtin_ia32_selectsd_128: {
     mlir::Location loc = getLoc(expr->getExprLoc());
-    auto cirI64Ty =
-        cir::IntType::get(builder.getContext(), 64, /*isSigned=*/true);
-    mlir::Value idx0 = cir::ConstantOp::create(
-        builder, loc, cirI64Ty,
-        cir::IntAttr::get(cirI64Ty, llvm::APInt(64, 0)));
-    mlir::Value a = builder.createExtractElement(loc, ops[1], idx0);
-    mlir::Value b = builder.createExtractElement(loc, ops[2], idx0);
+    mlir::Value a = builder.createExtractElement(loc, ops[1], (uint64_t)0);
+    mlir::Value b = builder.createExtractElement(loc, ops[2], (uint64_t)0);
     a = emitX86ScalarSelect(builder, loc, ops[0], a, b);
-    return builder.createInsertElement(loc, ops[1], a, idx0);
+    return builder.createInsertElement(loc, ops[1], a, (uint64_t)0);
   }
   case X86::BI__builtin_ia32_cmpb128_mask:
   case X86::BI__builtin_ia32_cmpb256_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
index 6620e91258870..11286adaf6653 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
@@ -1,8 +1,8 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-cir -o %t.cir
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2 -fclangir -emit-cir -o %t.cir
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -fclangir -emit-llvm -o %t.ll
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2 -fclangir -emit-llvm -o %t.ll
 // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512vl -target-feature +avx512bw -target-feature +avx512dq -target-feature +avx512fp16 -target-feature +avx512bf16 -emit-llvm -o %t.ll
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx10.2 -emit-llvm -o %t.ll
 // RUN: FileCheck --check-prefixes=OGCG --input-file=%t.ll %s
 
 #include <immintrin.h>
@@ -131,7 +131,7 @@ __m128i test_selectq_128(__mmask8 k, __m128i a, __m128i b) {
 
   // OGCG-LABEL: @test_selectq_128
   // OGCG: select <2 x i1> %{{.+}}, <2 x i64> %{{.+}}, <2 x i64> %{{.+}}
-  return __builtin_ia32_selectq_128(k, a, b);
+  return __builtin_ia32_selectq_128(k, (__v2di)a, (__v2di)b);
 }
 
 __m256i test_selectq_256(__mmask8 k, __m256i a, __m256i b) {
@@ -144,7 +144,7 @@ __m256i test_selectq_256(__mmask8 k, __m256i a, __m256i b) {
 
   // OGCG-LABEL: @test_selectq_256
   // OGCG: select <4 x i1> %{{.+}}, <4 x i64> %{{.+}}, <4 x i64> %{{.+}}
-  return __builtin_ia32_selectq_256(k, a, b);
+  return __builtin_ia32_selectq_256(k, (__v4di)a, (__v4di)b);
 }
 
 __m512i test_selectq_512(__mmask8 k, __m512i a, __m512i b) {
@@ -156,7 +156,7 @@ __m512i test_selectq_512(__mmask8 k, __m512i a, __m512i b) {
 
   // OGCG-LABEL: @test_selectq_512
   // OGCG: select <8 x i1> %{{.+}}, <8 x i64> %{{.+}}, <8 x i64> %{{.+}}
-  return __builtin_ia32_selectq_512(k, a, b);
+  return __builtin_ia32_selectq_512(k, (__v8di)a, (__v8di)b);
 }
 
 __m128h test_selectph_128(__mmask8 k, __m128h a, __m128h b) {
@@ -168,7 +168,7 @@ __m128h test_selectph_128(__mmask8 k, __m128h a, __m128h b) {
 
   // OGCG-LABEL: @test_selectph_128
   // OGCG: select <8 x i1> %{{.+}}, <8 x half> %{{.+}}, <8 x half> %{{.+}}
-  return __builtin_ia32_selectph_128(k, a, b);
+  return __builtin_ia32_selectph_128(k, (__v8hf)a, (__v8hf)b);
 }
 
 __m256h test_selectph_256(__mmask16 k, __m256h a, __m256h b) {
@@ -180,7 +180,7 @@ __m256h test_selectph_256(__mmask16 k, __m256h a, __m256h b) {
 
   // OGCG-LABEL: @test_selectph_256
   // OGCG: select <16 x i1> %{{.+}}, <16 x half> %{{.+}}, <16 x half> %{{.+}}
-  return __builtin_ia32_selectph_256(k, a, b);
+  return __builtin_ia32_selectph_256(k, (__v16hf)a, (__v16hf)b);
 }
 
 __m512h test_selectph_512(__mmask32 k, __m512h a, __m512h b) {
@@ -192,7 +192,7 @@ __m512h test_selectph_512(__mmask32 k, __m512h a, __m512h b) {
 
   // OGCG-LABEL: @test_selectph_512
   // OGCG: select <32 x i1> %{{.+}}, <32 x half> %{{.+}}, <32 x half> %{{.+}}
-  return __builtin_ia32_selectph_512(k, a, b);
+  return __builtin_ia32_selectph_512(k, (__v32hf)a, (__v32hf)b);
 }
 
 __m128bh test_selectsbf_128(__mmask8 k, __m128bh a, __m128bh b) {
@@ -205,7 +205,7 @@ __m128bh test_selectsbf_128(__mmask8 k, __m128bh a, __m128bh b) {
 
   // OGCG-LABEL: @test_selectsbf_128
   // OGCG: select i1 %{{.+}}, bfloat %{{.+}}, bfloat %{{.+}}
-  return __builtin_ia32_selectsbf_128(k, a, b);
+  return __builtin_ia32_selectsbf_128(k, (__v8bf)a, (__v8bf)b);
 }
 
 __m256bh test_selectpbf_256(__mmask16 k, __m256bh a, __m256bh b) {
@@ -217,7 +217,7 @@ __m256bh test_selectpbf_256(__mmask16 k, __m256bh a, __m256bh b) {
 
   // OGCG-LABEL: @test_selectpbf_256
   // OGCG: select <16 x i1> %{{.+}}, <16 x bfloat> %{{.+}}, <16 x bfloat> %{{.+}}
-  return __builtin_ia32_selectpbf_256(k, a, b);
+  return __builtin_ia32_selectpbf_256(k, (__v16bf)a, (__v16bf)b);
 }
 
 __m512bh test_selectpbf_512(__mmask32 k, __m512bh a, __m512bh b) {
@@ -229,7 +229,7 @@ __m512bh test_selectpbf_512(__mmask32 k, __m512bh a, __m512bh b) {
 
   // OGCG-LABEL: @test_selectpbf_512
   // OGCG: select <32 x i1> %{{.+}}, <32 x bfloat> %{{.+}}, <32 x bfloat> %{{.+}}
-  return __builtin_ia32_selectpbf_512(k, a, b);
+  return __builtin_ia32_selectpbf_512(k, (__v32bf)a, (__v32bf)b);
 }
 
 __m128 test_selectps_128(__mmask8 k, __m128 a, __m128 b) {
@@ -242,7 +242,7 @@ __m128 test_selectps_128(__mmask8 k, __m128 a, __m128 b) {
 
   // OGCG-LABEL: @test_selectps_128
   // OGCG: select <4 x i1> %{{.+}}, <4 x float> %{{.+}}, <4 x float> %{{.+}}
-  return __builtin_ia32_selectps_128(k, a, b);
+  return __builtin_ia32_selectps_128(k, (__v4sf)a, (__v4sf)b);
 }
 
 __m256 test_selectps_256(__mmask8 k, __m256 a, __m256 b) {
@@ -254,7 +254,7 @@ __m256 test_selectps_256(__mmask8 k, __m256 a, __m256 b) {
 
   // OGCG-LABEL: @test_selectps_256
   // OGCG: select <8 x i1> %{{.+}}, <8 x float> %{{.+}}, <8 x float> %{{.+}}
-  return __builtin_ia32_selectps_256(k, a, b);
+  return __builtin_ia32_selectps_256(k, (__v8sf)a, (__v8sf)b);
 }
 
 __m512 test_selectps_512(__mmask16 k, __m512 a, __m512 b) {
@@ -266,7 +266,7 @@ __m512 test_selectps_512(__mmask16 k, __m512 a, __m512 b) {
 
   // OGCG-LABEL: @test_selectps_512
   // OGCG: select <16 x i1> %{{.+}}, <16 x float> %{{.+}}, <16 x float> %{{.+}}
-  return __builtin_ia32_selectps_512(k, a, b);
+  return __builtin_ia32_selectps_512(k, (__v16sf)a, (__v16sf)b);
 }
 
 __m128d test_selectpd_128(__mmask8 k, __m128d a, __m128d b) {
@@ -279,7 +279,7 @@ __m128d test_selectpd_128(__mmask8 k, __m128d a, __m128d b) {
 
   // OGCG-LABEL: @test_selectpd_128
   // OGCG: select <2 x i1> %{{.+}}, <2 x double> %{{.+}}, <2 x double> %{{.+}}
-  return __builtin_ia32_selectpd_128(k, a, b);
+  return __builtin_ia32_selectpd_128(k, (__v2df)a, (__v2df)b);
 }
 
 __m256d test_selectpd_256(__mmask8 k, __m256d a, __m256d b) {
@@ -292,7 +292,7 @@ __m256d test_selectpd_256(__mmask8 k, __m256d a, __m256d b) {
 
   // OGCG-LABEL: @test_selectpd_256
   // OGCG: select <4 x i1> %{{.+}}, <4 x double> %{{.+}}, <4 x double> %{{.+}}
-  return __builtin_ia32_selectpd_256(k, a, b);
+  return __builtin_ia32_selectpd_256(k, (__v4df)a, (__v4df)b);
 }
 
 __m512d test_selectpd_512(__mmask8 k, __m512d a, __m512d b) {
@@ -304,7 +304,7 @@ __m512d test_selectpd_512(__mmask8 k, __m512d a, __m512d b) {
 
   // OGCG-LABEL: @test_selectpd_512
   // OGCG: select <8 x i1> %{{.+}}, <8 x double> %{{.+}}, <8 x double> %{{.+}}
-  return __builtin_ia32_selectpd_512(k, a, b);
+  return __builtin_ia32_selectpd_512(k, (__v8df)a, (__v8df)b);
 }
 
 // Scalar Selects 
@@ -325,7 +325,7 @@ __m128h test_selectsh_128(__mmask8 k, __m128h a, __m128h b) {
 
   // OGCG-LABEL: @test_selectsh_128
   // OGCG: select i1 %{{.+}}, half %{{.+}}, half %{{.+}}
-  return __builtin_ia32_selectsh_128(k, a, b);
+  return __builtin_ia32_selectsh_128(k, (__v8hf)a, (__v8hf)b);
 }
 
 __m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
@@ -339,7 +339,7 @@ __m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
 
   // OGCG-LABEL: @test_selectss_128
   // OGCG: select i1 %{{.+}}, float %{{.+}}, float %{{.+}}
-  return __builtin_ia32_selectss_128(k, a, b);
+  return __builtin_ia32_selectss_128(k, (__v4sf)a, (__v4sf)b);
 }
 
 __m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
@@ -353,5 +353,5 @@ __m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
 
   // OGCG-LABEL: @test_selectsd_128
   // OGCG: select i1 %{{.+}}, double %{{.+}}, double %{{.+}}
-  return __builtin_ia32_selectsd_128(k, a, b);
+  return __builtin_ia32_selectsd_128(k, (__v2df)a, (__v2df)b);
 }

>From daadcc542c7d3980ff07adf6d31488eaa0665244 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 5 Jan 2026 11:24:43 +0000
Subject: [PATCH 4/8] Update CIRGenBuiltinX86.cpp

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index c9e9f6cd017a0..b0f81de5d828d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -198,15 +198,20 @@ static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
   assert(intTy && "mask must be an integer type");
   unsigned width = intTy.getWidth();
 
-  mlir::Type maskTy = cir::VectorType::get(
-      cir::IntType::get(builder.getContext(), 1, false), width);
+  auto i1Ty = builder.getSIntNTy(1);
+  auto maskVecTy = cir::VectorType::get(i1Ty, width);
 
-  mask = builder.createBitcast(mask, maskTy);
-  mask = builder.createExtractElement(loc, mask, (uint64_t)0);
+  mlir::Value maskVec = builder.createBitcast(mask, maskVecTy);
+
+  // Extract bit 0 from the mask vector
+  mlir::Value bit0 = builder.createExtractElement(loc, maskVec, (uint64_t)0);
+
+  // Convert i1 to bool for select
   auto boolTy = cir::BoolType::get(builder.getContext());
-  mask = cir::CastOp::create(builder, loc, boolTy, cir::CastKind::int_to_bool,
-                             mask);
-  return builder.createSelect(loc, mask, op0, op1);
+  mlir::Value cond = cir::CastOp::create(builder, loc, boolTy,
+                                         cir::CastKind::int_to_bool, bit0);
+
+  return builder.createSelect(loc, cond, op0, op1);
 }
 
 static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,

>From ec7e649dc8b2d660798c136dfbdea6fe03ea8329 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Mon, 5 Jan 2026 17:03:48 +0000
Subject: [PATCH 5/8] Update

---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      |  6 +--
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 38 +++++++++++++------
 .../X86/avx512-select-builtins.c              | 20 +++++-----
 3 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 9cfec71923b9f..165b89a4a15b9 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -501,8 +501,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
 
   mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
                                    uint64_t idx) {
-    auto idxVal = getConstAPInt(loc, mlir::IntegerType::get(getContext(), 64),
-                                llvm::APInt(64, idx));
+    auto idxVal = getConstAPInt(loc, getUIntNTy(64), llvm::APInt(64, idx));
     return createExtractElement(loc, vec, idxVal);
   }
 
@@ -515,8 +514,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
 
   mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
                                   mlir::Value newElt, uint64_t idx) {
-    auto idxVal = getConstAPInt(loc, mlir::IntegerType::get(getContext(), 64),
-                                llvm::APInt(64, idx));
+    auto idxVal = getConstAPInt(loc, getUIntNTy(64), llvm::APInt(64, idx));
     return createInsertElement(loc, vec, newElt, idxVal);
   }
 
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index b0f81de5d828d..50cc0297d28ed 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -193,25 +193,43 @@ static mlir::Value emitX86ScalarSelect(CIRGenBuilderTy &builder,
   if (auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(mask.getDefiningOp()))
     if (c.isAllOnesValue())
       return op0;
-  // Cast to cir::IntType to safely get the width of a CIR integer
+
+  // Extract the scalar values from the vector operands
+  auto vecTy0 = mlir::dyn_cast<cir::VectorType>(op0.getType());
+  auto vecTy1 = mlir::dyn_cast<cir::VectorType>(op1.getType());
+
+  mlir::Value scalar0 = op0;
+  mlir::Value scalar1 = op1;
+
+  if (vecTy0)
+    scalar0 = builder.createExtractElement(loc, op0, uint64_t(0));
+
+  if (vecTy1)
+    scalar1 = builder.createExtractElement(loc, op1, uint64_t(0));
+
+  // Get the mask as a vector of i1 and extract bit 0
   auto intTy = mlir::dyn_cast<cir::IntType>(mask.getType());
   assert(intTy && "mask must be an integer type");
   unsigned width = intTy.getWidth();
 
-  auto i1Ty = builder.getSIntNTy(1);
+  auto i1Ty = builder.getUIntNTy(1);
   auto maskVecTy = cir::VectorType::get(i1Ty, width);
-
   mlir::Value maskVec = builder.createBitcast(mask, maskVecTy);
 
   // Extract bit 0 from the mask vector
-  mlir::Value bit0 = builder.createExtractElement(loc, maskVec, (uint64_t)0);
+  mlir::Value bit0 = builder.createExtractElement(loc, maskVec, uint64_t(0));
 
   // Convert i1 to bool for select
   auto boolTy = cir::BoolType::get(builder.getContext());
   mlir::Value cond = cir::CastOp::create(builder, loc, boolTy,
                                          cir::CastKind::int_to_bool, bit0);
 
-  return builder.createSelect(loc, cond, op0, op1);
+  mlir::Value result = builder.createSelect(loc, cond, scalar0, scalar1);
+
+  if (vecTy0)
+    result = builder.createInsertElement(loc, op0, result, uint64_t(0));
+
+  return result;
 }
 
 static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
@@ -1509,13 +1527,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) {
   case X86::BI__builtin_ia32_selectsh_128:
   case X86::BI__builtin_ia32_selectsbf_128:
   case X86::BI__builtin_ia32_selectss_128:
-  case X86::BI__builtin_ia32_selectsd_128: {
-    mlir::Location loc = getLoc(expr->getExprLoc());
-    mlir::Value a = builder.createExtractElement(loc, ops[1], (uint64_t)0);
-    mlir::Value b = builder.createExtractElement(loc, ops[2], (uint64_t)0);
-    a = emitX86ScalarSelect(builder, loc, ops[0], a, b);
-    return builder.createInsertElement(loc, ops[1], a, (uint64_t)0);
-  }
+  case X86::BI__builtin_ia32_selectsd_128:
+    return emitX86ScalarSelect(builder, getLoc(expr->getExprLoc()), ops[0],
+                               ops[1], ops[2]);
   case X86::BI__builtin_ia32_cmpb128_mask:
   case X86::BI__builtin_ia32_cmpb256_mask:
   case X86::BI__builtin_ia32_cmpb512_mask:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
index 11286adaf6653..10d6e8baea4da 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512-select-builtins.c
@@ -197,7 +197,8 @@ __m512h test_selectph_512(__mmask32 k, __m512h a, __m512h b) {
 
 __m128bh test_selectsbf_128(__mmask8 k, __m128bh a, __m128bh b) {
   // CIR-LABEL: @test_selectsbf_128
-  // CIR: %[[COND:.+]] = cir.cast int_to_bool %{{.+}} : !cir.int<u, 1> -> !cir.bool
+  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.int<u, 1>>
+  // CIR: %[[COND:.+]] = cir.cast int_to_bool %[[BIT0]] : !cir.int<u, 1> -> !cir.bool
   // CIR: cir.select if %[[COND]] then %{{.+}} else %{{.+}} : (!cir.bool, !cir.bf16, !cir.bf16) -> !cir.bf16
 
   // LLVM-LABEL: @test_selectsbf_128
@@ -311,13 +312,12 @@ __m512d test_selectpd_512(__mmask8 k, __m512d a, __m512d b) {
 
 __m128h test_selectsh_128(__mmask8 k, __m128h a, __m128h b) {
   // CIR-LABEL: @test_selectsh_128
-  // CIR: %[[I0:.+]] = cir.const #cir.int<0> : !s64i
-  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
-  // CIR: %[[EB:.+]] = cir.vec.extract %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
-  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.f16>
+  // CIR: %[[EB:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.f16>
+  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.int<u, 1>>
   // CIR: %[[COND:.+]] = cir.cast int_to_bool %[[BIT0]] : !cir.int<u, 1> -> !cir.bool
   // CIR: %[[SEL:.+]] = cir.select if %[[COND]] then %[[EA]] else %[[EB]]
-  // CIR: cir.vec.insert %[[SEL]], %{{.+}}[%[[I0]] : !s64i] : !cir.vector<8 x !cir.f16>
+  // CIR: cir.vec.insert %[[SEL]], %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.f16>
 
   // LLVM-LABEL: @test_selectsh_128
   // LLVM: %[[E1:.+]] = extractelement <8 x half> %{{.+}}, i64 0
@@ -330,8 +330,8 @@ __m128h test_selectsh_128(__mmask8 k, __m128h a, __m128h b) {
 
 __m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
   // CIR-LABEL: @test_selectss_128
-  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !s64i] : !cir.vector<4 x !cir.float>
-  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !u64i] : !cir.vector<4 x !cir.float>
+  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.int<u, 1>>
   // CIR: cir.select if %{{.+}} then %[[EA]] else %{{.+}} : (!cir.bool, !cir.float, !cir.float) -> !cir.float
 
   // LLVM-LABEL: @test_selectss_128
@@ -344,8 +344,8 @@ __m128 test_selectss_128(__mmask8 k, __m128 a, __m128 b) {
 
 __m128d test_selectsd_128(__mmask8 k, __m128d a, __m128d b) {
   // CIR-LABEL: @test_selectsd_128
-  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !s64i] : !cir.vector<2 x !cir.double>
-  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !s64i] : !cir.vector<8 x !cir.int<u, 1>>
+  // CIR: %[[EA:.+]] = cir.vec.extract %{{.+}}[%[[I0:.+]] : !u64i] : !cir.vector<2 x !cir.double>
+  // CIR: %[[BIT0:.+]] = cir.vec.extract %{{.+}}[%{{.+}} : !u64i] : !cir.vector<8 x !cir.int<u, 1>>
   // CIR: cir.select if %{{.+}} then %[[EA]] else %{{.+}} : (!cir.bool, !cir.double, !cir.double) -> !cir.double
 
   // LLVM-LABEL: @test_selectsd_128

>From a3fc7bd7fbea6f2682ab6ba59a33e4b353043807 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 6 Jan 2026 00:26:03 +0530
Subject: [PATCH 6/8] Update
 clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
 clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 165b89a4a15b9..d94df879f06e4 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -495,8 +495,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
                                    mlir::Value idx) {
     auto vecTy = mlir::cast<cir::VectorType>(vec.getType());
     mlir::Type eltTy = vecTy.getElementType();
-    auto op = cir::VecExtractOp::create(*this, loc, eltTy, vec, idx);
-    return op.getResult();
+    return cir::VecExtractOp::create(*this, loc, eltTy, vec, idx);
   }
 
   mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,

>From ee7a87e70a51e89c4cd1cca5860ca26f99504111 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 6 Jan 2026 00:26:52 +0530
Subject: [PATCH 7/8] Update
 clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
 clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index d94df879f06e4..cf5cc364b4f92 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -500,7 +500,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
 
   mlir::Value createExtractElement(mlir::Location loc, mlir::Value vec,
                                    uint64_t idx) {
-    auto idxVal = getConstAPInt(loc, getUIntNTy(64), llvm::APInt(64, idx));
+    mlir::Value idxVal = getConstAPInt(loc, getUIntNTy(64), llvm::APInt(64, idx));
     return createExtractElement(loc, vec, idxVal);
   }
 

>From e165a1782fb1f827c60d046adade057c872d2af2 Mon Sep 17 00:00:00 2001
From: Priyanshu Kumar <10b.priyanshu at gmail.com>
Date: Tue, 6 Jan 2026 00:27:13 +0530
Subject: [PATCH 8/8] Update
 clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h

Co-authored-by: Andy Kaylor <akaylor at nvidia.com>
---
 clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index cf5cc364b4f92..40fbeec5f8b55 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -506,9 +506,7 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
 
   mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,
                                   mlir::Value newElt, mlir::Value idx) {
-    auto op =
-        cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
-    return op.getResult();
+    return cir::VecInsertOp::create(*this, loc, vec.getType(), vec, newElt, idx);
   }
 
   mlir::Value createInsertElement(mlir::Location loc, mlir::Value vec,