[llvm-branch-commits] [clang] [ConstantTime][Clang] Add __builtin_ct_select for constant-time selection (PR #166703)

Fri May 22 18:09:53 PDT 2026

https://github.com/wizardengineer updated https://github.com/llvm/llvm-project/pull/166703

>From 4a82a13a2532ce2e71787c666dbdfd11585e20ed Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 10:56:34 -0500
Subject: [PATCH 1/2] [ConstantTime][Clang] Add __builtin_ct_select for
 constant-time selection

---
 clang/docs/LanguageExtensions.rst             |  44 ++
 clang/include/clang/Basic/Builtins.td         |   8 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  13 +
 clang/lib/Sema/SemaChecking.cpp               |  64 ++
 .../test/Sema/builtin-ct-select-edge-cases.c  | 373 ++++++++++
 clang/test/Sema/builtin-ct-select.c           | 683 ++++++++++++++++++
 6 files changed, 1185 insertions(+)
 create mode 100644 clang/test/Sema/builtin-ct-select-edge-cases.c
 create mode 100644 clang/test/Sema/builtin-ct-select.c

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 03cb02deb5e7f..6f5cd5f95cdb0 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -7332,3 +7332,47 @@ Clang fails to reject some code that should be rejected. e.g.,
   // own initializer rather than rejecting the code with an undeclared identifier
   // diagnostic.
   auto x = x;
+
+.. _langext-__builtin_ct_select:
+
+``__builtin_ct_select``
+-----------------------
+
+``__builtin_ct_select`` performs a constant-time conditional selection between
+two values. Unlike the ternary operator ``?:``, this builtin is designed to
+execute in constant time regardless of the condition value, making it suitable
+for cryptographic and security-sensitive code where timing side-channels must
+be avoided.
+
+**Syntax**:
+
+.. code-block:: c++
+
+  __builtin_ct_select(condition, true_value, false_value)
+
+**Examples**:
+
+.. code-block:: c++
+
+  // Select between two integers
+  int result = __builtin_ct_select(secret_bit, value_a, value_b);
+
+  // Select between two pointers
+  int *ptr = __builtin_ct_select(condition, ptr_a, ptr_b);
+
+  // Select between two floating-point values
+  double d = __builtin_ct_select(flag, 1.0, 2.0);
+
+**Description**:
+
+The first argument is an integer condition that is converted to a boolean
+(non-zero is true, zero is false). The second and third arguments must have
+the same scalar or vector type. The builtin returns the second argument if
+the condition is true, otherwise the third argument.
+
+The operation is guaranteed to be lowered to constant-time machine code that
+does not branch on the condition value, preventing timing-based side-channel
+attacks.
+
+Query for this feature with ``__has_builtin(__builtin_ct_select)``.
+
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 40ec94ab75046..389754a37f7e3 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5810,3 +5810,11 @@ def CountedByRef : Builtin {
   let Attributes = [NoThrow, CustomTypeChecking];
   let Prototype = "int(...)";
 }
+
+// Constant-time select builtin
+def CtSelect : Builtin {
+  let Spellings = ["__builtin_ct_select"];
+  let Attributes = [NoThrow, Const, UnevaluatedArguments,
+                    ConstIgnoringExceptions, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cac1628e68721..f69390b4ace57 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6668,6 +6668,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     auto Str = CGM.GetAddrOfConstantCString(Name, "");
     return RValue::get(Str.getPointer());
   }
+  case Builtin::BI__builtin_ct_select: {
+    auto *Cond = EmitScalarExpr(E->getArg(0));
+    auto *A = EmitScalarExpr(E->getArg(1));
+    auto *B = EmitScalarExpr(E->getArg(2));
+
+    if (Cond->getType()->getIntegerBitWidth() != 1)
+      Cond = Builder.CreateICmpNE(
+          Cond, llvm::ConstantInt::get(Cond->getType(), 0), "cond.bool");
+
+    llvm::Function *Fn =
+        CGM.getIntrinsic(llvm::Intrinsic::ct_select, {A->getType()});
+    return RValue::get(Builder.CreateCall(Fn, {Cond, A, B}));
+  }
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cc834bbee23c4..e5a15c84de8d3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3928,6 +3928,70 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     if (BuiltinCountedByRef(TheCall))
       return ExprError();
     break;
+
+  case Builtin::BI__builtin_ct_select: {
+    if (TheCall->getNumArgs() != 3) {
+      // Simple argument count check without complex diagnostics
+      if (TheCall->getNumArgs() < 3) {
+        return Diag(TheCall->getEndLoc(),
+                    diag::err_typecheck_call_too_few_args_at_least)
+               << 0 << 3 << TheCall->getNumArgs() << 0
+               << TheCall->getCallee()->getSourceRange();
+      } else {
+        return Diag(TheCall->getEndLoc(),
+                    diag::err_typecheck_call_too_many_args)
+               << 0 << 3 << TheCall->getNumArgs() << 0
+               << TheCall->getCallee()->getSourceRange();
+      }
+    }
+    auto *Cond = TheCall->getArg(0);
+    auto *A = TheCall->getArg(1);
+    auto *B = TheCall->getArg(2);
+
+    QualType CondTy = Cond->getType();
+    if (!CondTy->isIntegerType()) {
+      return Diag(Cond->getBeginLoc(), diag::err_typecheck_cond_expect_scalar)
+             << CondTy << Cond->getSourceRange();
+    }
+
+    ExprResult ARes = DefaultFunctionArrayLvalueConversion(A);
+    ExprResult BRes = DefaultFunctionArrayLvalueConversion(B);
+    if (ARes.isInvalid() || BRes.isInvalid())
+      return ExprError();
+
+    A = ARes.get();
+    B = BRes.get();
+    TheCall->setArg(1, A);
+    TheCall->setArg(2, B);
+
+    QualType ATy = A->getType();
+    QualType BTy = B->getType();
+
+    // check for scalar or vector scalar type
+    if ((!ATy->isScalarType() && !ATy->isVectorType()) ||
+        (!BTy->isScalarType() && !BTy->isVectorType())) {
+      return Diag(A->getBeginLoc(),
+                  diag::err_typecheck_cond_incompatible_operands)
+             << ATy << BTy << A->getSourceRange() << B->getSourceRange();
+    }
+
+    // Check if both operands have the same type or can be implicitly converted
+    if (!Context.hasSameType(ATy, BTy)) {
+      // For non-arithmetic types, they must be exactly the same
+      return Diag(A->getBeginLoc(),
+                  diag::err_typecheck_cond_incompatible_operands)
+             << ATy << BTy << A->getSourceRange() << B->getSourceRange();
+    }
+
+    QualType ResultTy = ATy;
+    ExprResult CondRes = PerformContextuallyConvertToBool(Cond);
+    if (CondRes.isInvalid())
+      return ExprError();
+
+    TheCall->setArg(0, CondRes.get());
+    TheCall->setType(ResultTy);
+    return TheCall;
+  }
   }
 
   if (getLangOpts().HLSL && HLSL().CheckBuiltinFunctionCall(BuiltinID, TheCall))
diff --git a/clang/test/Sema/builtin-ct-select-edge-cases.c b/clang/test/Sema/builtin-ct-select-edge-cases.c
new file mode 100644
index 0000000000000..167b19bf20663
--- /dev/null
+++ b/clang/test/Sema/builtin-ct-select-edge-cases.c
@@ -0,0 +1,373 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
+
+// Test with various condition expressions
+int test_conditional_expressions(int x, int y, int a, int b) {
+  // Logical expressions
+  int result1 = __builtin_ct_select(x && y, a, b);
+  int result2 = __builtin_ct_select(x || y, a, b);
+  int result3 = __builtin_ct_select(!x, a, b);
+  
+  // Comparison expressions
+  int result4 = __builtin_ct_select(x == y, a, b);
+  int result5 = __builtin_ct_select(x != y, a, b);
+  int result6 = __builtin_ct_select(x < y, a, b);
+  int result7 = __builtin_ct_select(x > y, a, b);
+  int result8 = __builtin_ct_select(x <= y, a, b);
+  int result9 = __builtin_ct_select(x >= y, a, b);
+  
+  // Bitwise expressions
+  int result10 = __builtin_ct_select(x & y, a, b);
+  int result11 = __builtin_ct_select(x | y, a, b);
+  int result12 = __builtin_ct_select(x ^ y, a, b);
+  int result13 = __builtin_ct_select(~x, a, b);
+  
+  // Arithmetic expressions
+  int result14 = __builtin_ct_select(x + y, a, b);
+  int result15 = __builtin_ct_select(x - y, a, b);
+  int result16 = __builtin_ct_select(x * y, a, b);
+  int result17 = __builtin_ct_select(x / y, a, b);
+  int result18 = __builtin_ct_select(x % y, a, b);
+  
+  return result1 + result2 + result3 + result4 + result5 + result6 + result7 + result8 + result9 + result10 + result11 + result12 + result13 + result14 + result15 + result16 + result17 + result18;
+}
+
+// Test with extreme values
+int test_extreme_values(int cond) {
+  // Maximum and minimum values
+  int max_int = __builtin_ct_select(cond, __INT_MAX__, -__INT_MAX__ - 1);
+  
+  // Very large numbers
+  long long max_ll = __builtin_ct_select(cond, __LONG_LONG_MAX__, -__LONG_LONG_MAX__ - 1);
+  
+  // Floating point extremes
+  float max_float = __builtin_ct_select(cond, __FLT_MAX__, -__FLT_MAX__);
+  double max_double = __builtin_ct_select(cond, __DBL_MAX__, -__DBL_MAX__);
+  
+  return max_int;
+}
+
+// Test with zero and negative zero
+int test_zero_values(int cond) {
+  // Integer zeros
+  int zero_int = __builtin_ct_select(cond, 0, -0);
+  
+  // Floating point zeros
+  float zero_float = __builtin_ct_select(cond, 0.0f, -0.0f);
+  double zero_double = __builtin_ct_select(cond, 0.0, -0.0);
+  
+  return zero_int;
+}
+
+// Test with infinity and NaN
+int test_special_float_values(int cond) {
+  // Infinity
+  float inf_float = __builtin_ct_select(cond, __builtin_inff(), -__builtin_inff());
+  double inf_double = __builtin_ct_select(cond, __builtin_inf(), -__builtin_inf());
+  
+  // NaN
+  float nan_float = __builtin_ct_select(cond, __builtin_nanf(""), __builtin_nanf(""));
+  double nan_double = __builtin_ct_select(cond, __builtin_nan(""), __builtin_nan(""));
+  
+  return 0;
+}
+
+// Test with complex pointer scenarios
+int test_pointer_edge_cases(int cond) {
+  int arr[10];
+  int *ptr1 = arr;
+  int *ptr2 = arr + 5;
+  
+  // Array pointers
+  int *result1 = __builtin_ct_select(cond, ptr1, ptr2);
+  
+  // Pointer arithmetic
+  int *result2 = __builtin_ct_select(cond, arr + 1, arr + 2);
+  
+  // NULL vs non-NULL
+  int *result3 = __builtin_ct_select(cond, ptr1, (int*)0);
+  
+  // Different pointer types (should fail)
+  float *fptr = (float*)0;
+  int *result4 = __builtin_ct_select(cond, ptr1, fptr); // expected-error {{incompatible operand types ('int *' and 'float *')}}
+  
+  return *result1;
+}
+
+// Test with function pointers
+int func1(int x) { return x; }
+int func2(int x) { return x * 2; }
+float func3(float x) { return x; }
+
+int test_function_pointers(int cond, int x) {
+  // Same signature function pointer 
+  int (*fptr)(int) = __builtin_ct_select(cond, &func1, &func2);
+  
+  // Different signature function pointers (should fail)
+  int (*bad_fptr)(int) = __builtin_ct_select(cond, &func1, &func3); // expected-error {{incompatible operand types ('int (*)(int)' and 'float (*)(float)')}}
+  
+  return fptr(x);
+}
+
+// Test with void pointers
+void *test_void_pointers(int cond, void *a, void *b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with const/volatile qualifiers
+int test_qualifiers(int cond) {
+  const int ca = 10;
+  const int cb = 20;
+  volatile int va = 30;
+  volatile int vb = 40;
+  const volatile int cva = 50;
+  const volatile int cvb = 60;
+  
+  // const to const
+  const int result1 = __builtin_ct_select(cond, ca, cb);
+  
+  // volatile to volatile
+  volatile int result2 = __builtin_ct_select(cond, va, vb);
+  
+  // const volatile to const volatile
+  const volatile int result3 = __builtin_ct_select(cond, cva, cvb);
+  
+  return result1 + result2 + result3;
+}
+
+// Test with arrays (should fail as they're not arithmetic or pointer)
+int test_arrays(int cond) {
+  int arr1[5] = {1, 2, 3, 4, 5};
+  int arr2[5] = {6, 7, 8, 9, 10};
+  
+  // This should fail??
+  int *result = __builtin_ct_select(cond, arr1, arr2); 
+  
+  return result[0];
+}
+
+// Test with structures (should fail)
+struct Point {
+  int x, y;
+};
+
+struct Point test_structs(int cond) {
+  struct Point p1 = {1, 2};
+  struct Point p2 = {3, 4};
+  
+  return __builtin_ct_select(cond, p1, p2); // expected-error {{incompatible operand types ('struct Point' and 'struct Point')}}
+}
+
+// Test with unions (should fail)
+union Data {
+  int i;
+  float f;
+};
+
+union Data test_unions(int cond) {
+  union Data d1 = {.i = 10};
+  union Data d2 = {.i = 20};
+  
+  return __builtin_ct_select(cond, d1, d2); // expected-error {{incompatible operand types ('union Data' and 'union Data')}}
+}
+
+// Test with bit fields (should work as they're integers)
+struct BitField {
+  int a : 4;
+  int b : 4;
+};
+
+int test_bit_fields(int cond) {
+  struct BitField bf1 = {1, 2};
+  struct BitField bf2 = {3, 4};
+  
+  // Individual bit fields should work
+  int result1 = __builtin_ct_select(cond, bf1.a, bf2.a);
+  int result2 = __builtin_ct_select(cond, bf1.b, bf2.b);
+  
+  return result1 + result2;
+}
+
+// Test with designated initializers
+int test_designated_init(int cond) {
+  int arr1[3] = {[0] = 1, [1] = 2, [2] = 3};
+  int arr2[3] = {[0] = 4, [1] = 5, [2] = 6};
+  
+  // Access specific elements
+  int result1 = __builtin_ct_select(cond, arr1[0], arr2[0]);
+  int result2 = __builtin_ct_select(cond, arr1[1], arr2[1]);
+  
+  return result1 + result2;
+}
+
+// Test with complex expressions in arguments
+int complex_expr(int x) { return x * x; }
+
+int test_complex_arguments(int cond, int x, int y) {
+  // Function calls as arguments
+  int result1 = __builtin_ct_select(cond, complex_expr(x), complex_expr(y));
+  
+  // Ternary operator as arguments
+  int result2 = __builtin_ct_select(cond, x > 0 ? x : -x, y > 0 ? y : -y);
+  
+  // Compound literals
+  int result3 = __builtin_ct_select(cond, (int){x}, (int){y});
+  
+  return result1 + result2 + result3;
+}
+
+// Test with preprocessor macros
+#define MACRO_A 42
+#define MACRO_B 24
+#define MACRO_COND(x) (x > 0)
+
+int test_macros(int x) {
+  int result1 = __builtin_ct_select(MACRO_COND(x), MACRO_A, MACRO_B);
+  
+  // Nested macros
+  #define NESTED_SELECT(c, a, b) __builtin_ct_select(c, a, b)
+  int result2 = NESTED_SELECT(x, 10, 20);
+  
+  return result1 + result2;
+}
+
+// Test with string literals (should fail)
+const char *test_strings(int cond) {
+  return __builtin_ct_select(cond, "hello", "world"); 
+}
+
+// Test with variable length arrays (VLA)
+int test_vla(int cond, int n) {
+  int vla1[n];
+  int vla2[n];
+  
+  // Individual elements should work
+  vla1[0] = 1;
+  vla2[0] = 2;
+  int result = __builtin_ct_select(cond, vla1[0], vla2[0]); 
+  
+  return result;
+}
+
+// Test with typedef
+typedef int MyInt;
+typedef float MyFloat;
+
+MyInt test_typedef(int cond, MyInt a, MyInt b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with different typedef types (should fail)
+MyInt test_different_typedef(int cond, MyInt a, MyFloat b) {
+  return __builtin_ct_select(cond, a, b); // expected-error {{incompatible operand types ('MyInt' (aka 'int') and 'MyFloat' (aka 'float'))}}
+}
+
+// Test with side effects (should be evaluated)
+int side_effect_counter = 0;
+int side_effect_func(int x) {
+  side_effect_counter++;
+  return x;
+}
+
+int test_side_effects(int cond) {
+  // Both arguments should be evaluated
+  int result = __builtin_ct_select(cond, side_effect_func(10), side_effect_func(20));
+  return result;
+}
+
+// Test with goto labels (context where expressions are used)
+int test_goto_context(int cond, int a, int b) {
+  int result = __builtin_ct_select(cond, a, b);
+  
+  if (result > 0) {
+    goto positive;
+  } else {
+    goto negative;
+  }
+  
+positive:
+  return result;
+  
+negative:
+  return -result;
+}
+
+// Test with switch statements
+int test_switch_context(int cond, int a, int b) {
+  int result = __builtin_ct_select(cond, a, b);
+  
+  switch (result) {
+    case 0:
+      return 0;
+    case 1:
+      return 1;
+    default:
+      return -1;
+  }
+}
+
+// Test with loops
+int test_loop_context(int cond, int a, int b) {
+  int result = __builtin_ct_select(cond, a, b);
+  int sum = 0;
+  
+  for (int i = 0; i < result; i++) {
+    sum += i;
+  }
+  
+  return sum;
+}
+
+// Test with recursive functions
+int factorial(int n) {
+  if (n <= 1) return 1;
+  return n * factorial(n - 1);
+}
+
+int test_recursive(int cond, int n) {
+  int result = __builtin_ct_select(cond, n, n + 1);
+  return factorial(result);
+}
+
+// Test with inline functions
+static inline int inline_func(int x) {
+  return x * 2;
+}
+
+int test_inline(int cond, int a, int b) {
+  return __builtin_ct_select(cond, inline_func(a), inline_func(b));
+}
+
+// Test with static variables
+int test_static_vars(int cond) {
+  static int static_a = 10;
+  static int static_b = 20;
+  
+  return __builtin_ct_select(cond, static_a, static_b);
+}
+
+// Test with extern variables
+extern int extern_a;
+extern int extern_b;
+
+int test_extern_vars(int cond) {
+  return __builtin_ct_select(cond, extern_a, extern_b);
+}
+
+// Test with register variables
+int test_register_vars(int cond) {
+  register int reg_a = 30;
+  register int reg_b = 40;
+  
+  return __builtin_ct_select(cond, reg_a, reg_b);
+}
+
+// Test with thread-local variables (C11)
+#if __STDC_VERSION__ >= 201112L
+_Thread_local int tls_a = 50;
+_Thread_local int tls_b = 60;
+
+int test_tls_vars(int cond) {
+  return __builtin_ct_select(cond, tls_a, tls_b);
+}
+#endif
diff --git a/clang/test/Sema/builtin-ct-select.c b/clang/test/Sema/builtin-ct-select.c
new file mode 100644
index 0000000000000..7f2d9291299d6
--- /dev/null
+++ b/clang/test/Sema/builtin-ct-select.c
@@ -0,0 +1,683 @@
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+
+// Test integer types
+int test_int(int cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_int
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+long long test_long(int cond, long long a, long long b) {
+  // CHECK-LABEL: define {{.*}} @test_long
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+  // CHECK: ret i64 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+short test_short(int cond, short a, short b) {
+  // CHECK-LABEL: define {{.*}} @test_short
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i16 @llvm.ct.select.i16(i1 [[COND]], i16 %{{.*}}, i16 %{{.*}})
+  // CHECK: ret i16 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+unsigned char test_uchar(int cond, unsigned char a, unsigned char b) {
+  // CHECK-LABEL: define {{.*}} @test_uchar
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i8 @llvm.ct.select.i8(i1 [[COND]], i8 %{{.*}}, i8 %{{.*}})
+  // CHECK: ret i8 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+long long test_longlong(int cond, long long a, long long b) {
+  // CHECK-LABEL: define {{.*}} @test_longlong
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+  // CHECK: ret i64 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test floating point types
+float test_float(int cond, float a, float b) {
+  // CHECK-LABEL: define {{.*}} @test_float
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+  // CHECK: ret float [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+double test_double(int cond, double a, double b) {
+  // CHECK-LABEL: define {{.*}} @test_double
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+  // CHECK: ret double [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test pointer types
+int *test_pointer(int cond, int *a, int *b) {
+  // CHECK-LABEL: define {{.*}} @test_pointer
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[COND]], ptr %{{.*}}, ptr %{{.*}})
+  // CHECK: ret ptr [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with different condition types
+int test_char_cond(char cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_char_cond
+  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+int test_long_cond(long long cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_long_cond
+  // CHECK: [[COND:%.*]] = icmp ne i64 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with boolean condition
+int test_bool_cond(_Bool cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_bool_cond
+  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with constants
+int test_constant_cond(void) {
+  // CHECK-LABEL: define {{.*}} @test_constant_cond
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 true, i32 42, i32 24)
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(1, 42, 24);
+}
+
+int test_zero_cond(void) {
+  // CHECK-LABEL: define {{.*}} @test_zero_cond
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 false, i32 42, i32 24)
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(0, 42, 24);
+}
+
+// Test type promotion
+int test_promotion(int cond, short a, short b) {
+  // CHECK-LABEL: define {{.*}} @test_promotion
+  // CHECK-DAG: [[A_EXT:%.*]] = sext i16 %{{.*}} to i32
+  // CHECK-DAG: [[B_EXT:%.*]] = sext i16 %{{.*}} to i32
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 [[A_EXT]], i32 [[B_EXT]])
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, (int)a, (int)b);
+}
+
+// Test mixed signedness
+unsigned int test_mixed_signedness(int cond, int a, unsigned int b) {
+  // CHECK-LABEL: define {{.*}} @test_mixed_signedness
+  // CHECK-DAG: [[A_EXT:%.*]] = sext i32 %{{.*}} to i64
+  // CHECK-DAG: [[B_EXT:%.*]] = zext i32 %{{.*}} to i64
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 [[A_EXT]], i64 [[B_EXT]])
+  // CHECK: [[RESULT_TRUNC:%.*]] = trunc i64 [[RESULT]] to i32
+  // CHECK: ret i32 [[RESULT_TRUNC]]
+  return __builtin_ct_select(cond, (long long)a, (long long)b);
+}
+
+// Test complex expression
+int test_complex_expr_alt(int x, int y) {
+  // CHECK-LABEL: define {{.*}} @test_complex_expr_alt
+  // CHECK-DAG: [[CMP:%.*]] = icmp sgt i32 %{{.*}}, 0
+  // CHECK-DAG: [[ADD:%.*]] = add nsw i32 %{{.*}}, %{{.*}}
+  // CHECK-DAG: [[SUB:%.*]] = sub nsw i32 %{{.*}}, %{{.*}}
+  // Separate the final sequence to ensure proper ordering
+  // CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 [[ADD]], i32 [[SUB]])
+  // CHECK-NEXT: ret i32 [[RESULT]]
+  return __builtin_ct_select(x > 0, x + y, x - y);
+}
+
+// Test nested calls
+int test_nested_structured(int cond1, int cond2, int a, int b, int c) {
+  // CHECK-LABEL: define {{.*}} @test_nested_structured
+  // Phase 1: Conditions (order doesn't matter)
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  
+  // Phase 2: Inner select (must happen before outer)
+  // CHECK: [[INNER:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+  
+  // Phase 3: Outer select (must use inner result)
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 [[INNER]], i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
+}
+
+// Test with function calls
+int helper(int x) { return x * 2; }
+int test_function_calls(int cond, int x, int y) {
+  // CHECK-LABEL: define {{.*}} @test_function_calls
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[CALL1:%.*]] = call i32 @helper(i32 noundef %{{.*}})
+  // CHECK-DAG: [[CALL2:%.*]] = call i32 @helper(i32 noundef %{{.*}})
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 [[CALL1]], i32 [[CALL2]])
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, helper(x), helper(y));
+}
+
+// Test using ct_select as condition for another ct_select
+int test_intrinsic_condition(int cond1, int cond2, int a, int b, int c, int d) {
+  // CHECK-LABEL: define {{.*}} @test_intrinsic_condition
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[INNER_COND:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 [[INNER_COND]], 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond1, cond2, a), b, c);
+}
+
+// Test using comparison result of ct_select as condition
+int test_comparison_condition(int cond, int a, int b, int c, int d) {
+  // CHECK-LABEL: define {{.*}} @test_comparison_condition
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: [[CMP:%.*]] = icmp sgt i32 [[FIRST_SELECT]], %{{.*}}
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond, a, b) > c, d, a);
+}
+
+// Test using ct_select result in arithmetic as condition
+int test_arithmetic_condition(int cond, int a, int b, int c, int d) {
+  // CHECK-LABEL: define {{.*}} @test_arithmetic_condition
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: [[ADD:%.*]] = add nsw i32 [[FIRST_SELECT]], %{{.*}}
+  // CHECK: [[FINAL_COND:%.*]] = icmp ne i32 [[ADD]], 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond, a, b) + c, d, a);
+}
+
+// Test chained ct_select as conditions
+int test_chained_conditions(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e) {
+  // CHECK-LABEL: define {{.*}} @test_chained_conditions
+  // CHECK: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[FIRST:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[SECOND:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  int first_select = __builtin_ct_select(cond1, a, b);
+  int second_select = __builtin_ct_select(cond2, first_select, c);
+  return __builtin_ct_select(second_select, d, e);
+}
+
+// Test using ct_select with pointer condition
+//int test_pointer_condition(int *ptr1, int *ptr2, int a, int b, int c) {
+  // NO-CHECK-LABEL: define {{.*}} @test_pointer_condition
+  // NO-CHECK: [[PTR_COND:%.*]] = icmp ne ptr %{{.*}}, null
+  // NO-CHECK: [[PTR_SELECT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[PTR_COND]], ptr %{{.*}}, ptr %{{.*}})
+  // NO-CHECK: [[FINAL_COND:%.*]] = icmp ne ptr [[PTR_SELECT]], null
+  // NO-CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // NO-CHECK: ret i32 [[RESULT]]
+//  return __builtin_ct_select(__builtin_ct_select(ptr1, ptr1, ptr2), a, b);
+//}
+
+
+// Test using ct_select result in logical operations as condition
+int test_logical_condition(int cond1, int cond2, int a, int b, int c, int d) {
+  // CHECK-LABEL: define {{.*}} @test_logical_condition
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[SELECT_BOOL:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond1, a, b) && cond2, c, d);
+}
+
+// Test multiple levels of ct_select as conditions
+int test_deep_condition_nesting(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e, int f) {
+  // CHECK-LABEL: define {{.*}} @test_deep_condition_nesting
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[INNER1:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[INNER1_COND:%.*]] = icmp ne i32 [[INNER1]], 0
+  // CHECK-DAG: [[INNER2:%.*]] = call i32 @llvm.ct.select.i32(i1 [[INNER1_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[OUTER:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 [[INNER2]], i32 %{{.*}})
+  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 [[OUTER]], 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond1, __builtin_ct_select(__builtin_ct_select(cond2, a, b), c, d), e), f, a);
+}
+
+// Test ct_select with complex condition expressions
+int test_complex_condition_expr(int x, int y, int z, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_complex_condition_expr
+  // CHECK: [[CMP1:%.*]] = icmp sgt i32 %{{.*}}, %{{.*}}
+  // CHECK: [[SELECT1:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP1]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: [[CMP2:%.*]] = icmp slt i32 [[SELECT1]], %{{.*}}
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP2]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(x > y, x, y) < z, a, b);
+}
+
+// Test vector types - 128-bit vectors
+typedef int __attribute__((vector_size(16))) int4;
+typedef float __attribute__((vector_size(16))) float4;
+typedef short __attribute__((vector_size(16))) short8;
+typedef char __attribute__((vector_size(16))) char16;
+
+int4 test_vector_int4(int cond, int4 a, int4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_int4
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+float4 test_vector_float4(int cond, float4 a, float4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_float4
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+short8 test_vector_short8(int cond, short8 a, short8 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_short8
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <8 x i16> @llvm.ct.select.v8i16(i1 [[COND]], <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: ret <8 x i16> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+char16 test_vector_char16(int cond, char16 a, char16 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_char16
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <16 x i8> @llvm.ct.select.v16i8(i1 [[COND]], <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: ret <16 x i8> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test 256-bit vectors
+typedef int __attribute__((vector_size(32))) int8;
+typedef float __attribute__((vector_size(32))) float8;
+typedef double __attribute__((vector_size(32))) double4;
+
+int8 test_vector_int8(int cond, int8 a, int8 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_int8
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call <8 x i32> @llvm.ct.select.v8i32(i1 [[COND]], <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+float8 test_vector_float8(int cond, float8 a, float8 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_float8
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call <8 x float> @llvm.ct.select.v8f32(i1 [[COND]], <8 x float> %{{.*}}, <8 x float> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+double4 test_vector_double4(int cond, double4 a, double4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_double4
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call <4 x double> @llvm.ct.select.v4f64(i1 [[COND]], <4 x double> %{{.*}}, <4 x double> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test 512-bit vectors
+typedef int __attribute__((vector_size(64))) int16;
+typedef float __attribute__((vector_size(64))) float16;
+
+int16 test_vector_int16(int cond, int16 a, int16 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_int16
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <16 x i32> @llvm.ct.select.v16i32(i1 [[COND]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+float16 test_vector_float16(int cond, float16 a, float16 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_float16
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <16 x float> @llvm.ct.select.v16f32(i1 [[COND]], <16 x float> %{{.*}}, <16 x float> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test vector operations with different condition types
+int4 test_vector_char_cond(char cond, int4 a, int4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_char_cond
+  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+float4 test_vector_long_cond(long long cond, float4 a, float4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_long_cond
+  // CHECK: [[COND:%.*]] = icmp ne i64 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test vector constants
+int4 test_vector_constant_cond(void) {
+  // CHECK-LABEL: define {{.*}} @test_vector_constant_cond
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 true, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  int4 a = {1, 2, 3, 4};
+  int4 b = {5, 6, 7, 8};
+  return __builtin_ct_select(1, a, b);
+}
+
+float4 test_vector_zero_cond(void) {
+  // CHECK-LABEL: define {{.*}} @test_vector_zero_cond
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 false, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  float4 a = {1.0f, 2.0f, 3.0f, 4.0f};
+  float4 b = {5.0f, 6.0f, 7.0f, 8.0f};
+  return __builtin_ct_select(0, a, b);
+}
+
+// Test nested vector selections
+int4 test_vector_nested(int cond1, int cond2, int4 a, int4 b, int4 c) {
+  // CHECK-LABEL: define {{.*}} @test_vector_nested
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[INNER:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND2]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND1]], <4 x i32> [[INNER]], <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
+}
+
+// Test vector selection with complex expressions
+float4 test_vector_complex_expr(int x, int y, float4 a, float4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_complex_expr
+  // CHECK: [[CMP:%.*]] = icmp sgt i32 %{{.*}}, %{{.*}}
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[CMP]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  return __builtin_ct_select(x > y, a, b);
+}
+
+// Test vector with different element sizes
+typedef long long __attribute__((vector_size(16))) long2;
+typedef double __attribute__((vector_size(16))) double2;
+
+long2 test_vector_long2(int cond, long2 a, long2 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_long2
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <2 x i64> @llvm.ct.select.v2i64(i1 [[COND]], <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK: ret <2 x i64> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+double2 test_vector_double2(int cond, double2 a, double2 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_double2
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <2 x double> @llvm.ct.select.v2f64(i1 [[COND]], <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  // CHECK: ret <2 x double> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test mixed vector operations
+int4 test_vector_from_scalar_condition(int4 vec_cond, int4 a, int4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_from_scalar_condition
+  // Extract first element and use as condition
+  int scalar_cond = vec_cond[0];
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  return __builtin_ct_select(scalar_cond, a, b);
+}
+
+// Test vector chaining
+float4 test_vector_chaining(int cond1, int cond2, int cond3, float4 a, float4 b, float4 c, float4 d) {
+  // CHECK-LABEL: define {{.*}} @test_vector_chaining
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND3:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[FIRST:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND1]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK-DAG: [[SECOND:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND2]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK-DAG: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND3]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  float4 first = __builtin_ct_select(cond1, a, b);
+  float4 second = __builtin_ct_select(cond2, first, c);
+  return __builtin_ct_select(cond3, second, d);
+}
+
+// Test special floating point values - NaN
+float test_nan_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_nan_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float  %{{.*}}, float 1.000000e+00)
+  // CHECK: ret float [[RESULT]]
+  float nan_val = __builtin_nanf("");
+  return __builtin_ct_select(cond, nan_val, 1.0f);
+}
+
+double test_nan_double_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_nan_double_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double 2.000000e+00)
+  // CHECK: ret double [[RESULT]]
+  double nan_val = __builtin_nan("");
+  return __builtin_ct_select(cond, nan_val, 2.0);
+}
+
+// Test infinity values
+float test_infinity_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_infinity_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+  // CHECK: ret float [[RESULT]]
+  float pos_inf = __builtin_inff();
+  float neg_inf = -__builtin_inff();
+  return __builtin_ct_select(cond, pos_inf, neg_inf);
+}
+
+double test_infinity_double_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_infinity_double_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+  // CHECK: ret double [[RESULT]]
+  double pos_inf = __builtin_inf();
+  double neg_inf = -__builtin_inf();
+  return __builtin_ct_select(cond, pos_inf, neg_inf);
+}
+
+// Test subnormal/denormal values
+float test_subnormal_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_subnormal_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+  // CHECK: ret float [[RESULT]]
+  // Very small subnormal values
+  float subnormal1 = 1e-40f;
+  float subnormal2 = 1e-45f;
+  return __builtin_ct_select(cond, subnormal1, subnormal2);
+}
+
+// Test integer overflow boundaries
+int test_integer_overflow_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_integer_overflow_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  int max_int = __INT_MAX__;
+  int min_int = (-__INT_MAX__ - 1);
+  return __builtin_ct_select(cond, max_int, min_int);
+}
+
+long long test_longlong_overflow_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_longlong_overflow_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+  // CHECK: ret i64 [[RESULT]]
+  long long max_ll = __LONG_LONG_MAX__;
+  long long min_ll = (-__LONG_LONG_MAX__ - 1);
+  return __builtin_ct_select(cond, max_ll, min_ll);
+}
+
+// Test unsigned overflow boundaries
+unsigned int test_unsigned_overflow_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_unsigned_overflow_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  unsigned int max_uint = 4294967295;
+  unsigned int min_uint = 0;
+  return __builtin_ct_select(cond, max_uint, min_uint);
+}
+
+// Test null pointer dereference avoidance
+int* test_null_pointer_operands(int cond, int* valid_ptr) {
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[COND]], ptr %{{.*}}, ptr %{{.*}})
+  // CHECK: ret ptr [[RESULT]]
+  int* null_ptr = (int*)0;
+  return __builtin_ct_select(cond, null_ptr, valid_ptr);
+}
+
+// Test volatile operations
+volatile int global_volatile = 42;
+int test_volatile_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_volatile_operands
+  // CHECK-DAG: [[VOLATILE_LOAD:%.*]] = load volatile i32, ptr {{.*}}
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 100)
+  // CHECK: ret i32 [[RESULT]]
+  volatile int vol_val = global_volatile;
+  return __builtin_ct_select(cond, vol_val, 100);
+}
+
+// Test uninitialized variable behavior (should still work with ct_select)
+int test_uninitialized_operands(int cond, int initialized) {
+  // CHECK-LABEL: define {{.*}} @test_uninitialized_operands
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  int uninitialized; // Intentionally uninitialized
+  return __builtin_ct_select(cond, uninitialized, initialized);
+}
+
+// Test zero division avoidance patterns
+int test_division_by_zero_avoidance(int cond, int dividend, int divisor) {
+  // CHECK-LABEL: define {{.*}} @test_division_by_zero_avoidance
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[DIV_RESULT:%.*]] = sdiv i32 %{{.*}}, %{{.*}}
+  // CHECK-DAG: [[SAFE_DIVISOR:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 1)
+  // First get a safe divisor (never zero)
+  int safe_divisor = __builtin_ct_select(divisor != 0, divisor, 1);
+  // Then perform division with guaranteed non-zero divisor
+  return dividend / safe_divisor;
+}
+
+// Test array bounds checking patterns
+int test_array_bounds_protection(int cond, int index, int* array) {
+  // CHECK-LABEL: define {{.*}} @test_array_bounds_protection
+  // CHECK-DAG: [[SAFE_INDEX:%.*]] = call i32 @llvm.ct.select.i32(i1 {{.*}}, i32 %{{.*}}, i32 0)
+  // Use ct_select to ensure safe array indexing
+  int safe_index = __builtin_ct_select(index >= 0 && index < 10, index, 0);
+  return array[safe_index];
+}
+
+// Test bit manipulation edge cases
+unsigned int test_bit_manipulation_edge_cases(int cond, unsigned int value) {
+  // CHECK-LABEL: define {{.*}} @test_bit_manipulation_edge_cases
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[SHIFT_LEFT:%.*]] = shl i32 %{{.*}}, 31
+  // CHECK-DAG: [[SHIFT_RIGHT:%.*]] = lshr i32 %{{.*}}, 31
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  // Test extreme bit shifts that could cause undefined behavior
+  unsigned int left_shift = value << 31;   // Could overflow
+  unsigned int right_shift = value >> 31;  // Extract sign bit
+  return __builtin_ct_select(cond, left_shift, right_shift);
+}
+
+// Test signed integer wraparound
+int test_signed_wraparound(int cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_signed_wraparound
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[ADD:%.*]] = add nsw i32 %{{.*}}, %{{.*}}
+  // CHECK-DAG: [[SUB:%.*]] = sub nsw i32 %{{.*}}, %{{.*}}
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  int sum = a + b;      // Could overflow
+  int diff = a - b;     // Could underflow
+  return __builtin_ct_select(cond, sum, diff);
+}
+
+// Test vector NaN handling
+float4 test_vector_nan_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_vector_nan_operands
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  float nan_val = __builtin_nanf("");
+  float4 nan_vec = {nan_val, nan_val, nan_val, nan_val};
+  float4 normal_vec = {1.0f, 2.0f, 3.0f, 4.0f};
+  return __builtin_ct_select(cond, nan_vec, normal_vec);
+}
+
+// Test vector infinity handling
+float4 test_vector_infinity_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_vector_infinity_operands
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  float pos_inf = __builtin_inff();
+  float neg_inf = -__builtin_inff();
+  float4 inf_vec = {pos_inf, neg_inf, pos_inf, neg_inf};
+  float4 zero_vec = {0.0f, 0.0f, 0.0f, 0.0f};
+  return __builtin_ct_select(cond, inf_vec, zero_vec);
+}
+
+// Test mixed special values
+double test_mixed_special_values(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_mixed_special_values
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+  // CHECK: ret double [[RESULT]]
+  double nan_val = __builtin_nan("");
+  double inf_val = __builtin_inf();
+  return __builtin_ct_select(cond, nan_val, inf_val);
+}
+
+// Test constant-time memory access pattern
+int test_constant_time_memory_access(int secret_index, int* data_array) {
+  // CHECK-LABEL: define {{.*}} @test_constant_time_memory_access
+  // This pattern ensures constant-time memory access regardless of secret_index value
+  int result = 0;
+  // Use ct_select to accumulate values without revealing the secret index
+  for (int i = 0; i < 8; i++) {
+    int is_target = (i == secret_index);
+    int current_value = data_array[i];
+    int selected_value = __builtin_ct_select(is_target, current_value, 0);
+    result += selected_value;
+  }
+  return result;
+}
+
+// Test timing-attack resistant comparison
+int test_timing_resistant_comparison(const char* secret, const char* guess) {
+  // CHECK-LABEL: define {{.*}} @test_timing_resistant_comparison
+  // Constant-time string comparison using ct_select
+  int match = 1;
+  for (int i = 0; i < 32; i++) {
+    int chars_equal = (secret[i] == guess[i]);
+    int both_null = (secret[i] == 0) && (guess[i] == 0);
+    int still_matching = __builtin_ct_select(chars_equal || both_null, match, 0);
+    match = __builtin_ct_select(both_null, match, still_matching);
+  }
+  return match;
+}

>From ee350427d29647d798b13bc74517ffe3c210acd0 Mon Sep 17 00:00:00 2001
From: AkshayK <iit.akshay at gmail.com>
Date: Fri, 22 May 2026 18:27:24 -0400
Subject: [PATCH 2/2] [ConstantTime][Clang] Split __builtin_ct_select tests;
 expand coverage; misc fixes

- Move codegen test to clang/test/CodeGen/, regen with update_cc_test_checks
- Add ext_vector_type, half/bfloat, and array/function-decay coverage
- New clang/test/Sema/builtin-ct-select.c: -verify diagnostic tests for
  too-few/too-many args, non-integer cond, non-scalar/mismatched operands
- Sema: drop else-after-return in arg-count check
---
 clang/lib/Sema/SemaChecking.cpp        |   13 +-
 clang/test/CodeGen/builtin-ct-select.c | 1938 ++++++++++++++++++++++++
 clang/test/Sema/builtin-ct-select.c    |  692 +--------
 3 files changed, 1966 insertions(+), 677 deletions(-)
 create mode 100644 clang/test/CodeGen/builtin-ct-select.c

diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e5a15c84de8d3..319d9227343a9 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3931,18 +3931,15 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
 
   case Builtin::BI__builtin_ct_select: {
     if (TheCall->getNumArgs() != 3) {
-      // Simple argument count check without complex diagnostics
-      if (TheCall->getNumArgs() < 3) {
+      if (TheCall->getNumArgs() < 3)
         return Diag(TheCall->getEndLoc(),
                     diag::err_typecheck_call_too_few_args_at_least)
                << 0 << 3 << TheCall->getNumArgs() << 0
                << TheCall->getCallee()->getSourceRange();
-      } else {
-        return Diag(TheCall->getEndLoc(),
-                    diag::err_typecheck_call_too_many_args)
-               << 0 << 3 << TheCall->getNumArgs() << 0
-               << TheCall->getCallee()->getSourceRange();
-      }
+      return Diag(TheCall->getEndLoc(),
+                  diag::err_typecheck_call_too_many_args)
+             << 0 << 3 << TheCall->getNumArgs() << 0
+             << TheCall->getCallee()->getSourceRange();
     }
     auto *Cond = TheCall->getArg(0);
     auto *A = TheCall->getArg(1);
diff --git a/clang/test/CodeGen/builtin-ct-select.c b/clang/test/CodeGen/builtin-ct-select.c
new file mode 100644
index 0000000000000..da39ded5ad300
--- /dev/null
+++ b/clang/test/CodeGen/builtin-ct-select.c
@@ -0,0 +1,1938 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+
+// Test integer types
+// CHECK-LABEL: define i32 @test_int(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_int(int cond, int a, int b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define i64 @test_long(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.ct.select.i64(i1 [[TOBOOL]], i64 [[TMP1]], i64 [[TMP2]])
+// CHECK-NEXT:    ret i64 [[TMP3]]
+//
+long long test_long(int cond, long long a, long long b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define i16 @test_short(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i16 [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store i16 [[B]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP3:%.*]] = call i16 @llvm.ct.select.i16(i1 [[TOBOOL]], i16 [[TMP1]], i16 [[TMP2]])
+// CHECK-NEXT:    ret i16 [[TMP3]]
+//
+short test_short(int cond, short a, short b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define i8 @test_uchar(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i8 noundef [[A:%.*]], i8 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i8 [[A]], ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    store i8 [[B]], ptr [[B_ADDR]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[B_ADDR]], align 1
+// CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.ct.select.i8(i1 [[TOBOOL]], i8 [[TMP1]], i8 [[TMP2]])
+// CHECK-NEXT:    ret i8 [[TMP3]]
+//
+unsigned char test_uchar(int cond, unsigned char a, unsigned char b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define i64 @test_longlong(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i64 noundef [[A:%.*]], i64 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i64 [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store i64 [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.ct.select.i64(i1 [[TOBOOL]], i64 [[TMP1]], i64 [[TMP2]])
+// CHECK-NEXT:    ret i64 [[TMP3]]
+//
+long long test_longlong(int cond, long long a, long long b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test floating point types
+// CHECK-LABEL: define float @test_float(
+// CHECK-SAME: i32 noundef [[COND:%.*]], float noundef [[A:%.*]], float noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store float [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store float [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.ct.select.f32(i1 [[TOBOOL]], float [[TMP1]], float [[TMP2]])
+// CHECK-NEXT:    ret float [[TMP3]]
+//
+float test_float(int cond, float a, float b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define double @test_double(
+// CHECK-SAME: i32 noundef [[COND:%.*]], double noundef [[A:%.*]], double noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store double [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store double [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.ct.select.f64(i1 [[TOBOOL]], double [[TMP1]], double [[TMP2]])
+// CHECK-NEXT:    ret double [[TMP3]]
+//
+double test_double(int cond, double a, double b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test pointer types
+// CHECK-LABEL: define ptr @test_pointer(
+// CHECK-SAME: i32 noundef [[COND:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[A]], ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[B]], ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[A_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[B_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = call ptr @llvm.ct.select.p0(i1 [[TOBOOL]], ptr [[TMP1]], ptr [[TMP2]])
+// CHECK-NEXT:    ret ptr [[TMP3]]
+//
+int *test_pointer(int cond, int *a, int *b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with different condition types
+// CHECK-LABEL: define i32 @test_char_cond(
+// CHECK-SAME: i8 noundef [[COND:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i8 [[COND]], ptr [[COND_ADDR]], align 1
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[COND_ADDR]], align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_char_cond(char cond, int a, int b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define i32 @test_long_cond(
+// CHECK-SAME: i64 noundef [[COND:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i64 [[COND]], ptr [[COND_ADDR]], align 8
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[COND_ADDR]], align 8
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i64 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_long_cond(long long cond, int a, int b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with boolean condition
+// CHECK-LABEL: define i32 @test_bool_cond(
+// CHECK-SAME: i1 noundef [[COND:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[STOREDV:%.*]] = zext i1 [[COND]] to i8
+// CHECK-NEXT:    store i8 [[STOREDV]], ptr [[COND_ADDR]], align 1
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[COND_ADDR]], align 1
+// CHECK-NEXT:    [[LOADEDV:%.*]] = icmp ne i8 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[LOADEDV]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_bool_cond(_Bool cond, int a, int b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with constants
+// CHECK-LABEL: define i32 @test_constant_cond(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ct.select.i32(i1 true, i32 42, i32 24)
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int test_constant_cond(void) {
+  return __builtin_ct_select(1, 42, 24);
+}
+
+// CHECK-LABEL: define i32 @test_zero_cond(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.ct.select.i32(i1 false, i32 42, i32 24)
+// CHECK-NEXT:    ret i32 [[TMP0]]
+//
+int test_zero_cond(void) {
+  return __builtin_ct_select(0, 42, 24);
+}
+
+// Test type promotion
+// CHECK-LABEL: define i32 @test_promotion(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i16 noundef [[A:%.*]], i16 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i16 [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store i16 [[B]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[CONV1:%.*]] = sext i16 [[TMP2]] to i32
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[CONV]], i32 [[CONV1]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_promotion(int cond, short a, short b) {
+  return __builtin_ct_select(cond, (int)a, (int)b);
+}
+
+// Test mixed signedness
+// CHECK-LABEL: define i32 @test_mixed_signedness(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP1]] to i64
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[CONV1:%.*]] = zext i32 [[TMP2]] to i64
+// CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.ct.select.i64(i1 [[TOBOOL]], i64 [[CONV]], i64 [[CONV1]])
+// CHECK-NEXT:    [[CONV2:%.*]] = trunc i64 [[TMP3]] to i32
+// CHECK-NEXT:    ret i32 [[CONV2]]
+//
+unsigned int test_mixed_signedness(int cond, int a, unsigned int b) {
+  return __builtin_ct_select(cond, (long long)a, (long long)b);
+}
+
+// Test complex expression
+// CHECK-LABEL: define i32 @test_complex_expr_alt(
+// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[Y_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[Y]], ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP2]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP3]], [[TMP4]]
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 [[ADD]], i32 [[SUB]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
+int test_complex_expr_alt(int x, int y) {
+  // Separate the final sequence to ensure proper ordering
+  return __builtin_ct_select(x > 0, x + y, x - y);
+}
+
+// Test nested calls
+// CHECK-LABEL: define i32 @test_nested_structured(
+// CHECK-SAME: i32 noundef [[COND1:%.*]], i32 noundef [[COND2:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND1_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND2_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND1]], ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND2]], ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL1]], i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP4]], i32 [[TMP5]])
+// CHECK-NEXT:    ret i32 [[TMP6]]
+//
+int test_nested_structured(int cond1, int cond2, int a, int b, int c) {
+  // Phase 1: Conditions (order doesn't matter)
+
+  // Phase 2: Inner select (must happen before outer)
+
+  // Phase 3: Outer select (must use inner result)
+  return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
+}
+
+// Test with function calls
+// CHECK-LABEL: define i32 @helper(
+// CHECK-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP0]], 2
+// CHECK-NEXT:    ret i32 [[MUL]]
+//
+int helper(int x) { return x * 2; }
+// CHECK-LABEL: define i32 @test_function_calls(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[Y_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[Y]], ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[CALL:%.*]] = call i32 @helper(i32 noundef [[TMP1]])
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[CALL1:%.*]] = call i32 @helper(i32 noundef [[TMP2]])
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[CALL]], i32 [[CALL1]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_function_calls(int cond, int x, int y) {
+  return __builtin_ct_select(cond, helper(x), helper(y));
+}
+
+// Test using ct_select as condition for another ct_select
+// CHECK-LABEL: define i32 @test_intrinsic_condition(
+// CHECK-SAME: i32 noundef [[COND1:%.*]], i32 noundef [[COND2:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND1_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND2_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND1]], ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND2]], ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[TMP3]], 0
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL1]], i32 [[TMP4]], i32 [[TMP5]])
+// CHECK-NEXT:    ret i32 [[TMP6]]
+//
+int test_intrinsic_condition(int cond1, int cond2, int a, int b, int c, int d) {
+  return __builtin_ct_select(__builtin_ct_select(cond1, cond2, a), b, c);
+}
+
+// Test using comparison result of ct_select as condition
+// CHECK-LABEL: define i32 @test_comparison_condition(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP3]], [[TMP4]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 [[TMP5]], i32 [[TMP6]])
+// CHECK-NEXT:    ret i32 [[TMP7]]
+//
+int test_comparison_condition(int cond, int a, int b, int c, int d) {
+  return __builtin_ct_select(__builtin_ct_select(cond, a, b) > c, d, a);
+}
+
+// Test using ct_select result in arithmetic as condition
+// CHECK-LABEL: define i32 @test_arithmetic_condition(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], [[TMP4]]
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[ADD]], 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL1]], i32 [[TMP5]], i32 [[TMP6]])
+// CHECK-NEXT:    ret i32 [[TMP7]]
+//
+int test_arithmetic_condition(int cond, int a, int b, int c, int d) {
+  return __builtin_ct_select(__builtin_ct_select(cond, a, b) + c, d, a);
+}
+
+// Test chained ct_select as conditions
+// CHECK-LABEL: define i32 @test_chained_conditions(
+// CHECK-SAME: i32 noundef [[COND1:%.*]], i32 noundef [[COND2:%.*]], i32 noundef [[COND3:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]], i32 noundef [[E:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND1_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND2_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND3_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[E_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[FIRST_SELECT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SECOND_SELECT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND1]], ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND2]], ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND3]], ptr [[COND3_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[E]], ptr [[E_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    store i32 [[TMP3]], ptr [[FIRST_SELECT]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[FIRST_SELECT]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL1]], i32 [[TMP5]], i32 [[TMP6]])
+// CHECK-NEXT:    store i32 [[TMP7]], ptr [[SECOND_SELECT]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[SECOND_SELECT]], align 4
+// CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[E_ADDR]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL2]], i32 [[TMP9]], i32 [[TMP10]])
+// CHECK-NEXT:    ret i32 [[TMP11]]
+//
+int test_chained_conditions(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e) {
+  int first_select = __builtin_ct_select(cond1, a, b);
+  int second_select = __builtin_ct_select(cond2, first_select, c);
+  return __builtin_ct_select(second_select, d, e);
+}
+
+// Test using ct_select with pointer condition
+//int test_pointer_condition(int *ptr1, int *ptr2, int a, int b, int c) {
+  // NO-CHECK-LABEL: define {{.*}} @test_pointer_condition
+  // NO-CHECK: [[PTR_COND:%.*]] = icmp ne ptr %{{.*}}, null
+  // NO-CHECK: [[PTR_SELECT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[PTR_COND]], ptr %{{.*}}, ptr %{{.*}})
+  // NO-CHECK: [[FINAL_COND:%.*]] = icmp ne ptr [[PTR_SELECT]], null
+  // NO-CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // NO-CHECK: ret i32 [[RESULT]]
+//  return __builtin_ct_select(__builtin_ct_select(ptr1, ptr1, ptr2), a, b);
+//}
+
+
+// Test using ct_select result in logical operations as condition
+// CHECK-LABEL: define i32 @test_logical_condition(
+// CHECK-SAME: i32 noundef [[COND1:%.*]], i32 noundef [[COND2:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[COND1_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND2_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND1]], ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND2]], ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[TMP3]], 0
+// CHECK-NEXT:    br i1 [[TOBOOL1]], label %[[LAND_RHS:.*]], label %[[LAND_END:.*]]
+// CHECK:       [[LAND_RHS]]:
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK-NEXT:    br label %[[LAND_END]]
+// CHECK:       [[LAND_END]]:
+// CHECK-NEXT:    [[TMP5:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[TOBOOL2]], %[[LAND_RHS]] ]
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TMP5]], i32 [[TMP6]], i32 [[TMP7]])
+// CHECK-NEXT:    ret i32 [[TMP8]]
+//
+int test_logical_condition(int cond1, int cond2, int a, int b, int c, int d) {
+  return __builtin_ct_select(__builtin_ct_select(cond1, a, b) && cond2, c, d);
+}
+
+// Test multiple levels of ct_select as conditions
+// CHECK-LABEL: define i32 @test_deep_condition_nesting(
+// CHECK-SAME: i32 noundef [[COND1:%.*]], i32 noundef [[COND2:%.*]], i32 noundef [[COND3:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]], i32 noundef [[C:%.*]], i32 noundef [[D:%.*]], i32 noundef [[E:%.*]], i32 noundef [[F:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND1_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND2_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND3_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[E_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[F_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND1]], ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND2]], ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND3]], ptr [[COND3_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[C]], ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[D]], ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[E]], ptr [[E_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[F]], ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL1]], i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[C_ADDR]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[D_ADDR]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL2]], i32 [[TMP5]], i32 [[TMP6]])
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[E_ADDR]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP7]], i32 [[TMP8]])
+// CHECK-NEXT:    [[TOBOOL3:%.*]] = icmp ne i32 [[TMP9]], 0
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[F_ADDR]], align 4
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL3]], i32 [[TMP10]], i32 [[TMP11]])
+// CHECK-NEXT:    ret i32 [[TMP12]]
+//
+int test_deep_condition_nesting(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e, int f) {
+  return __builtin_ct_select(__builtin_ct_select(cond1, __builtin_ct_select(__builtin_ct_select(cond2, a, b), c, d), e), f, a);
+}
+
+// Test ct_select with complex condition expressions
+// CHECK-LABEL: define i32 @test_complex_condition_expr(
+// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]], i32 noundef [[Z:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[Y_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[Z_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[Y]], ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[Z]], ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[Z_ADDR]], align 4
+// CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP4]], [[TMP5]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP1]], i32 [[TMP6]], i32 [[TMP7]])
+// CHECK-NEXT:    ret i32 [[TMP8]]
+//
+int test_complex_condition_expr(int x, int y, int z, int a, int b) {
+  return __builtin_ct_select(__builtin_ct_select(x > y, x, y) < z, a, b);
+}
+
+// Test vector types - 128-bit vectors
+typedef int __attribute__((vector_size(16))) int4;
+typedef float __attribute__((vector_size(16))) float4;
+typedef short __attribute__((vector_size(16))) short8;
+typedef char __attribute__((vector_size(16))) char16;
+
+// CHECK-LABEL: define <4 x i32> @test_vector_int4(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[TOBOOL]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+int4 test_vector_int4(int cond, int4 a, int4 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define <4 x float> @test_vector_float4(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[TOBOOL]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+//
+float4 test_vector_float4(int cond, float4 a, float4 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define <8 x i16> @test_vector_short8(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x i16>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x i16>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <8 x i16> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <8 x i16> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x i16>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i16> @llvm.ct.select.v8i16(i1 [[TOBOOL]], <8 x i16> [[TMP1]], <8 x i16> [[TMP2]])
+// CHECK-NEXT:    ret <8 x i16> [[TMP3]]
+//
+short8 test_vector_short8(int cond, short8 a, short8 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define <16 x i8> @test_vector_char16(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <16 x i8> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <16 x i8> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.ct.select.v16i8(i1 [[TOBOOL]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+//
+char16 test_vector_char16(int cond, char16 a, char16 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test 256-bit vectors
+typedef int __attribute__((vector_size(32))) int8;
+typedef float __attribute__((vector_size(32))) float8;
+typedef double __attribute__((vector_size(32))) double4;
+
+// CHECK-LABEL: define void @test_vector_int8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret(<8 x i32>) align 16 [[AGG_RESULT:%.*]], i32 noundef [[COND:%.*]], ptr noundef dead_on_return [[TMP0:%.*]], ptr noundef dead_on_return [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x i32>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x i32>, align 16
+// CHECK-NEXT:    [[A:%.*]] = load <8 x i32>, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[B:%.*]] = load <8 x i32>, ptr [[TMP1]], align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <8 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <8 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP5:%.*]] = call <8 x i32> @llvm.ct.select.v8i32(i1 [[TOBOOL]], <8 x i32> [[TMP3]], <8 x i32> [[TMP4]])
+// CHECK-NEXT:    store <8 x i32> [[TMP5]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <8 x i32>, ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    store <8 x i32> [[TMP6]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    ret void
+//
+int8 test_vector_int8(int cond, int8 a, int8 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define void @test_vector_float8(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret(<8 x float>) align 16 [[AGG_RESULT:%.*]], i32 noundef [[COND:%.*]], ptr noundef dead_on_return [[TMP0:%.*]], ptr noundef dead_on_return [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x float>, align 16
+// CHECK-NEXT:    [[A:%.*]] = load <8 x float>, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[B:%.*]] = load <8 x float>, ptr [[TMP1]], align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <8 x float> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <8 x float> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load <8 x float>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <8 x float>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP5:%.*]] = call <8 x float> @llvm.ct.select.v8f32(i1 [[TOBOOL]], <8 x float> [[TMP3]], <8 x float> [[TMP4]])
+// CHECK-NEXT:    store <8 x float> [[TMP5]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <8 x float>, ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    store <8 x float> [[TMP6]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    ret void
+//
+float8 test_vector_float8(int cond, float8 a, float8 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define void @test_vector_double4(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret(<4 x double>) align 16 [[AGG_RESULT:%.*]], i32 noundef [[COND:%.*]], ptr noundef dead_on_return [[TMP0:%.*]], ptr noundef dead_on_return [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x double>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x double>, align 16
+// CHECK-NEXT:    [[A:%.*]] = load <4 x double>, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[B:%.*]] = load <4 x double>, ptr [[TMP1]], align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <4 x double> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x double> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x double>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <4 x double>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP5:%.*]] = call <4 x double> @llvm.ct.select.v4f64(i1 [[TOBOOL]], <4 x double> [[TMP3]], <4 x double> [[TMP4]])
+// CHECK-NEXT:    store <4 x double> [[TMP5]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x double>, ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    store <4 x double> [[TMP6]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    ret void
+//
+double4 test_vector_double4(int cond, double4 a, double4 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test 512-bit vectors
+typedef int __attribute__((vector_size(64))) int16;
+typedef float __attribute__((vector_size(64))) float16;
+
+// CHECK-LABEL: define void @test_vector_int16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret(<16 x i32>) align 16 [[AGG_RESULT:%.*]], i32 noundef [[COND:%.*]], ptr noundef dead_on_return [[TMP0:%.*]], ptr noundef dead_on_return [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <16 x i32>, align 16
+// CHECK-NEXT:    [[A:%.*]] = load <16 x i32>, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[B:%.*]] = load <16 x i32>, ptr [[TMP1]], align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <16 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <16 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP5:%.*]] = call <16 x i32> @llvm.ct.select.v16i32(i1 [[TOBOOL]], <16 x i32> [[TMP3]], <16 x i32> [[TMP4]])
+// CHECK-NEXT:    store <16 x i32> [[TMP5]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <16 x i32>, ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    store <16 x i32> [[TMP6]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    ret void
+//
+int16 test_vector_int16(int cond, int16 a, int16 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define void @test_vector_float16(
+// CHECK-SAME: ptr dead_on_unwind noalias writable sret(<16 x float>) align 16 [[AGG_RESULT:%.*]], i32 noundef [[COND:%.*]], ptr noundef dead_on_return [[TMP0:%.*]], ptr noundef dead_on_return [[TMP1:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <16 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <16 x float>, align 16
+// CHECK-NEXT:    [[A:%.*]] = load <16 x float>, ptr [[TMP0]], align 16
+// CHECK-NEXT:    [[B:%.*]] = load <16 x float>, ptr [[TMP1]], align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <16 x float> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <16 x float> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load <16 x float>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load <16 x float>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP5:%.*]] = call <16 x float> @llvm.ct.select.v16f32(i1 [[TOBOOL]], <16 x float> [[TMP3]], <16 x float> [[TMP4]])
+// CHECK-NEXT:    store <16 x float> [[TMP5]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <16 x float>, ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    store <16 x float> [[TMP6]], ptr [[AGG_RESULT]], align 16
+// CHECK-NEXT:    ret void
+//
+float16 test_vector_float16(int cond, float16 a, float16 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test vector operations with different condition types
+// CHECK-LABEL: define <4 x i32> @test_vector_char_cond(
+// CHECK-SAME: i8 noundef [[COND:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store i8 [[COND]], ptr [[COND_ADDR]], align 1
+// CHECK-NEXT:    store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[COND_ADDR]], align 1
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[TOBOOL]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+int4 test_vector_char_cond(char cond, int4 a, int4 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define <4 x float> @test_vector_long_cond(
+// CHECK-SAME: i64 noundef [[COND:%.*]], <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store i64 [[COND]], ptr [[COND_ADDR]], align 8
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[COND_ADDR]], align 8
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i64 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[TOBOOL]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+//
+float4 test_vector_long_cond(long long cond, float4 a, float4 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test vector constants
+// CHECK-LABEL: define <4 x i32> @test_vector_constant_cond(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[B:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr [[A]], align 16
+// CHECK-NEXT:    store <4 x i32> <i32 5, i32 6, i32 7, i32 8>, ptr [[B]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[B]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 true, <4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+//
+int4 test_vector_constant_cond(void) {
+  int4 a = {1, 2, 3, 4};
+  int4 b = {5, 6, 7, 8};
+  return __builtin_ct_select(1, a, b);
+}
+
+// CHECK-LABEL: define <4 x float> @test_vector_zero_cond(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[A:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[B:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, ptr [[A]], align 16
+// CHECK-NEXT:    store <4 x float> <float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>, ptr [[B]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[B]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 false, <4 x float> [[TMP0]], <4 x float> [[TMP1]])
+// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+//
+float4 test_vector_zero_cond(void) {
+  float4 a = {1.0f, 2.0f, 3.0f, 4.0f};
+  float4 b = {5.0f, 6.0f, 7.0f, 8.0f};
+  return __builtin_ct_select(0, a, b);
+}
+
+// Test nested vector selections
+// CHECK-LABEL: define <4 x i32> @test_vector_nested(
+// CHECK-SAME: i32 noundef [[COND1:%.*]], i32 noundef [[COND2:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]], <4 x i32> noundef [[C:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND1_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND2_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store i32 [[COND1]], ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND2]], ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    store <4 x i32> [[C]], ptr [[C_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[TOBOOL1]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]])
+// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x i32>, ptr [[C_ADDR]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[TOBOOL]], <4 x i32> [[TMP4]], <4 x i32> [[TMP5]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP6]]
+//
+int4 test_vector_nested(int cond1, int cond2, int4 a, int4 b, int4 c) {
+  return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
+}
+
+// Test vector selection with complex expressions
+// CHECK-LABEL: define <4 x float> @test_vector_complex_expr(
+// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]], <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[Y_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store i32 [[X]], ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[Y]], ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[CMP]], <4 x float> [[TMP2]], <4 x float> [[TMP3]])
+// CHECK-NEXT:    ret <4 x float> [[TMP4]]
+//
+float4 test_vector_complex_expr(int x, int y, float4 a, float4 b) {
+  return __builtin_ct_select(x > y, a, b);
+}
+
+// Test vector with different element sizes
+typedef long long __attribute__((vector_size(16))) long2;
+typedef double __attribute__((vector_size(16))) double2;
+
+// CHECK-LABEL: define <2 x i64> @test_vector_long2(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <2 x i64> noundef [[A:%.*]], <2 x i64> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x i64>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x i64>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <2 x i64> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <2 x i64> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i64>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i64>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.ct.select.v2i64(i1 [[TOBOOL]], <2 x i64> [[TMP1]], <2 x i64> [[TMP2]])
+// CHECK-NEXT:    ret <2 x i64> [[TMP3]]
+//
+long2 test_vector_long2(int cond, long2 a, long2 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define <2 x double> @test_vector_double2(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <2 x double> noundef [[A:%.*]], <2 x double> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <2 x double>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <2 x double> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <2 x double> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.ct.select.v2f64(i1 [[TOBOOL]], <2 x double> [[TMP1]], <2 x double> [[TMP2]])
+// CHECK-NEXT:    ret <2 x double> [[TMP3]]
+//
+double2 test_vector_double2(int cond, double2 a, double2 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test mixed vector operations
+// CHECK-LABEL: define <4 x i32> @test_vector_from_scalar_condition(
+// CHECK-SAME: <4 x i32> noundef [[VEC_COND:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[VEC_COND_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[SCALAR_COND:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store <4 x i32> [[VEC_COND]], ptr [[VEC_COND_ADDR]], align 16
+// CHECK-NEXT:    store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[VEC_COND_ADDR]], align 16
+// CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
+// CHECK-NEXT:    store i32 [[VECEXT]], ptr [[SCALAR_COND]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[SCALAR_COND]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[TOBOOL]], <4 x i32> [[TMP2]], <4 x i32> [[TMP3]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP4]]
+//
+int4 test_vector_from_scalar_condition(int4 vec_cond, int4 a, int4 b) {
+  // Extract first element and use as condition
+  int scalar_cond = vec_cond[0];
+  return __builtin_ct_select(scalar_cond, a, b);
+}
+
+// Test vector chaining
+// CHECK-LABEL: define <4 x float> @test_vector_chaining(
+// CHECK-SAME: i32 noundef [[COND1:%.*]], i32 noundef [[COND2:%.*]], i32 noundef [[COND3:%.*]], <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]], <4 x float> noundef [[C:%.*]], <4 x float> noundef [[D:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND1_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND2_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[COND3_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[C_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[D_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[FIRST:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[SECOND:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store i32 [[COND1]], ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND2]], ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[COND3]], ptr [[COND3_ADDR]], align 4
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    store <4 x float> [[C]], ptr [[C_ADDR]], align 16
+// CHECK-NEXT:    store <4 x float> [[D]], ptr [[D_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND1_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[TOBOOL]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[FIRST]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[COND2_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL1:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr [[FIRST]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x float>, ptr [[C_ADDR]], align 16
+// CHECK-NEXT:    [[TMP7:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[TOBOOL1]], <4 x float> [[TMP5]], <4 x float> [[TMP6]])
+// CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[SECOND]], align 16
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[COND3_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL2:%.*]] = icmp ne i32 [[TMP8]], 0
+// CHECK-NEXT:    [[TMP9:%.*]] = load <4 x float>, ptr [[SECOND]], align 16
+// CHECK-NEXT:    [[TMP10:%.*]] = load <4 x float>, ptr [[D_ADDR]], align 16
+// CHECK-NEXT:    [[TMP11:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[TOBOOL2]], <4 x float> [[TMP9]], <4 x float> [[TMP10]])
+// CHECK-NEXT:    ret <4 x float> [[TMP11]]
+//
+float4 test_vector_chaining(int cond1, int cond2, int cond3, float4 a, float4 b, float4 c, float4 d) {
+  float4 first = __builtin_ct_select(cond1, a, b);
+  float4 second = __builtin_ct_select(cond2, first, c);
+  return __builtin_ct_select(cond3, second, d);
+}
+
+// Test special floating point values - NaN
+// CHECK-LABEL: define float @test_nan_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[NAN_VAL:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store float +qnan, ptr [[NAN_VAL]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[NAN_VAL]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call float @llvm.ct.select.f32(i1 [[TOBOOL]], float [[TMP1]], float 1.000000e+00)
+// CHECK-NEXT:    ret float [[TMP2]]
+//
+float test_nan_operands(int cond) {
+  float nan_val = __builtin_nanf("");
+  return __builtin_ct_select(cond, nan_val, 1.0f);
+}
+
+// CHECK-LABEL: define double @test_nan_double_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[NAN_VAL:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store double +qnan, ptr [[NAN_VAL]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[NAN_VAL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = call double @llvm.ct.select.f64(i1 [[TOBOOL]], double [[TMP1]], double 2.000000e+00)
+// CHECK-NEXT:    ret double [[TMP2]]
+//
+double test_nan_double_operands(int cond) {
+  double nan_val = __builtin_nan("");
+  return __builtin_ct_select(cond, nan_val, 2.0);
+}
+
+// Test infinity values
+// CHECK-LABEL: define float @test_infinity_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[POS_INF:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[NEG_INF:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store float +inf, ptr [[POS_INF]], align 4
+// CHECK-NEXT:    store float -inf, ptr [[NEG_INF]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[POS_INF]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[NEG_INF]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.ct.select.f32(i1 [[TOBOOL]], float [[TMP1]], float [[TMP2]])
+// CHECK-NEXT:    ret float [[TMP3]]
+//
+float test_infinity_operands(int cond) {
+  float pos_inf = __builtin_inff();
+  float neg_inf = -__builtin_inff();
+  return __builtin_ct_select(cond, pos_inf, neg_inf);
+}
+
+// CHECK-LABEL: define double @test_infinity_double_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[POS_INF:%.*]] = alloca double, align 8
+// CHECK-NEXT:    [[NEG_INF:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store double +inf, ptr [[POS_INF]], align 8
+// CHECK-NEXT:    store double -inf, ptr [[NEG_INF]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[POS_INF]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[NEG_INF]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.ct.select.f64(i1 [[TOBOOL]], double [[TMP1]], double [[TMP2]])
+// CHECK-NEXT:    ret double [[TMP3]]
+//
+double test_infinity_double_operands(int cond) {
+  double pos_inf = __builtin_inf();
+  double neg_inf = -__builtin_inf();
+  return __builtin_ct_select(cond, pos_inf, neg_inf);
+}
+
+// Test subnormal/denormal values
+// CHECK-LABEL: define float @test_subnormal_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SUBNORMAL1:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[SUBNORMAL2:%.*]] = alloca float, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store float 9.999940e-41, ptr [[SUBNORMAL1]], align 4
+// CHECK-NEXT:    store float 1.401300e-45, ptr [[SUBNORMAL2]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[SUBNORMAL1]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[SUBNORMAL2]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call float @llvm.ct.select.f32(i1 [[TOBOOL]], float [[TMP1]], float [[TMP2]])
+// CHECK-NEXT:    ret float [[TMP3]]
+//
+float test_subnormal_operands(int cond) {
+  // Very small subnormal values
+  float subnormal1 = 1e-40f;
+  float subnormal2 = 1e-45f;
+  return __builtin_ct_select(cond, subnormal1, subnormal2);
+}
+
+// Test integer overflow boundaries
+// CHECK-LABEL: define i32 @test_integer_overflow_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[MAX_INT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[MIN_INT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 2147483647, ptr [[MAX_INT]], align 4
+// CHECK-NEXT:    store i32 -2147483648, ptr [[MIN_INT]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[MAX_INT]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[MIN_INT]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_integer_overflow_operands(int cond) {
+  int max_int = __INT_MAX__;
+  int min_int = (-__INT_MAX__ - 1);
+  return __builtin_ct_select(cond, max_int, min_int);
+}
+
+// CHECK-LABEL: define i64 @test_longlong_overflow_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[MAX_LL:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    [[MIN_LL:%.*]] = alloca i64, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i64 9223372036854775807, ptr [[MAX_LL]], align 8
+// CHECK-NEXT:    store i64 -9223372036854775808, ptr [[MIN_LL]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i64, ptr [[MAX_LL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i64, ptr [[MIN_LL]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.ct.select.i64(i1 [[TOBOOL]], i64 [[TMP1]], i64 [[TMP2]])
+// CHECK-NEXT:    ret i64 [[TMP3]]
+//
+long long test_longlong_overflow_operands(int cond) {
+  long long max_ll = __LONG_LONG_MAX__;
+  long long min_ll = (-__LONG_LONG_MAX__ - 1);
+  return __builtin_ct_select(cond, max_ll, min_ll);
+}
+
+// Test unsigned overflow boundaries
+// CHECK-LABEL: define i32 @test_unsigned_overflow_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[MAX_UINT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[MIN_UINT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 -1, ptr [[MAX_UINT]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[MIN_UINT]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[MAX_UINT]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[MIN_UINT]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+unsigned int test_unsigned_overflow_operands(int cond) {
+  unsigned int max_uint = 4294967295;
+  unsigned int min_uint = 0;
+  return __builtin_ct_select(cond, max_uint, min_uint);
+}
+
+// Test null pointer dereference avoidance
+// CHECK-LABEL: define ptr @test_null_pointer_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]], ptr noundef [[VALID_PTR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[VALID_PTR_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[NULL_PTR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[VALID_PTR]], ptr [[VALID_PTR_ADDR]], align 8
+// CHECK-NEXT:    store ptr null, ptr [[NULL_PTR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[NULL_PTR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[VALID_PTR_ADDR]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = call ptr @llvm.ct.select.p0(i1 [[TOBOOL]], ptr [[TMP1]], ptr [[TMP2]])
+// CHECK-NEXT:    ret ptr [[TMP3]]
+//
+int* test_null_pointer_operands(int cond, int* valid_ptr) {
+  int* null_ptr = (int*)0;
+  return __builtin_ct_select(cond, null_ptr, valid_ptr);
+}
+
+// Test volatile operations
+volatile int global_volatile = 42;
+// CHECK-LABEL: define i32 @test_volatile_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[VOL_VAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load volatile i32, ptr @global_volatile, align 4
+// CHECK-NEXT:    store volatile i32 [[TMP0]], ptr [[VOL_VAL]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP1]], 0
+// CHECK-NEXT:    [[TMP2:%.*]] = load volatile i32, ptr [[VOL_VAL]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP2]], i32 100)
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_volatile_operands(int cond) {
+  volatile int vol_val = global_volatile;
+  return __builtin_ct_select(cond, vol_val, 100);
+}
+
+// Test uninitialized variable behavior (should still work with ct_select)
+// CHECK-LABEL: define i32 @test_uninitialized_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[INITIALIZED:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[INITIALIZED_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[UNINITIALIZED:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INITIALIZED]], ptr [[INITIALIZED_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[UNINITIALIZED]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[INITIALIZED_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_uninitialized_operands(int cond, int initialized) {
+  int uninitialized; // Intentionally uninitialized
+  return __builtin_ct_select(cond, uninitialized, initialized);
+}
+
+// Test zero division avoidance patterns
+// CHECK-LABEL: define i32 @test_division_by_zero_avoidance(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[DIVIDEND:%.*]], i32 noundef [[DIVISOR:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DIVIDEND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DIVISOR_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SAFE_DIVISOR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[DIVIDEND]], ptr [[DIVIDEND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[DIVISOR]], ptr [[DIVISOR_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[DIVISOR_ADDR]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[DIVISOR_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 [[TMP1]], i32 1)
+// CHECK-NEXT:    store i32 [[TMP2]], ptr [[SAFE_DIVISOR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[DIVIDEND_ADDR]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[SAFE_DIVISOR]], align 4
+// CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[TMP3]], [[TMP4]]
+// CHECK-NEXT:    ret i32 [[DIV]]
+//
+int test_division_by_zero_avoidance(int cond, int dividend, int divisor) {
+  // First get a safe divisor (never zero)
+  int safe_divisor = __builtin_ct_select(divisor != 0, divisor, 1);
+  // Then perform division with guaranteed non-zero divisor
+  return dividend / safe_divisor;
+}
+
+// Test array bounds checking patterns
+// CHECK-LABEL: define i32 @test_array_bounds_protection(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[INDEX:%.*]], ptr noundef [[ARRAY:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*]]:
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[ARRAY_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[SAFE_INDEX:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[INDEX]], ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[ARRAY]], ptr [[ARRAY_ADDR]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[TMP0]], 0
+// CHECK-NEXT:    br i1 [[CMP]], label %[[LAND_RHS:.*]], label %[[LAND_END:.*]]
+// CHECK:       [[LAND_RHS]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[TMP1]], 10
+// CHECK-NEXT:    br label %[[LAND_END]]
+// CHECK:       [[LAND_END]]:
+// CHECK-NEXT:    [[TMP2:%.*]] = phi i1 [ false, %[[ENTRY]] ], [ [[CMP1]], %[[LAND_RHS]] ]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TMP2]], i32 [[TMP3]], i32 0)
+// CHECK-NEXT:    store i32 [[TMP4]], ptr [[SAFE_INDEX]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = load ptr, ptr [[ARRAY_ADDR]], align 8
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[SAFE_INDEX]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP6]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    ret i32 [[TMP7]]
+//
+int test_array_bounds_protection(int cond, int index, int* array) {
+  // Use ct_select to ensure safe array indexing
+  int safe_index = __builtin_ct_select(index >= 0 && index < 10, index, 0);
+  return array[safe_index];
+}
+
+// Test bit manipulation edge cases
+// CHECK-LABEL: define i32 @test_bit_manipulation_edge_cases(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[VALUE:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[VALUE_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[LEFT_SHIFT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[RIGHT_SHIFT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[VALUE]], ptr [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[SHL:%.*]] = shl i32 [[TMP0]], 31
+// CHECK-NEXT:    store i32 [[SHL]], ptr [[LEFT_SHIFT]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VALUE_ADDR]], align 4
+// CHECK-NEXT:    [[SHR:%.*]] = lshr i32 [[TMP1]], 31
+// CHECK-NEXT:    store i32 [[SHR]], ptr [[RIGHT_SHIFT]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP2]], 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[LEFT_SHIFT]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[RIGHT_SHIFT]], align 4
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP3]], i32 [[TMP4]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
+unsigned int test_bit_manipulation_edge_cases(int cond, unsigned int value) {
+  // Test extreme bit shifts that could cause undefined behavior
+  unsigned int left_shift = value << 31;   // Could overflow
+  unsigned int right_shift = value >> 31;  // Extract sign bit
+  return __builtin_ct_select(cond, left_shift, right_shift);
+}
+
+// Test signed integer wraparound
+// CHECK-LABEL: define i32 @test_signed_wraparound(
+// CHECK-SAME: i32 noundef [[COND:%.*]], i32 noundef [[A:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SUM:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DIFF:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[A]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    store i32 [[ADD]], ptr [[SUM]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    store i32 [[SUB]], ptr [[DIFF]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[SUM]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DIFF]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP5]], i32 [[TMP6]])
+// CHECK-NEXT:    ret i32 [[TMP7]]
+//
+int test_signed_wraparound(int cond, int a, int b) {
+  int sum = a + b;      // Could overflow
+  int diff = a - b;     // Could underflow
+  return __builtin_ct_select(cond, sum, diff);
+}
+
+// Test vector NaN handling
+// CHECK-LABEL: define <4 x float> @test_vector_nan_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[NAN_VAL:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[NAN_VEC:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[NORMAL_VEC:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store float +qnan, ptr [[NAN_VAL]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[NAN_VAL]], align 4
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[NAN_VAL]], align 4
+// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x float> [[VECINIT]], float [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[NAN_VAL]], align 4
+// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x float> [[VECINIT1]], float [[TMP2]], i32 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[NAN_VAL]], align 4
+// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x float> [[VECINIT2]], float [[TMP3]], i32 3
+// CHECK-NEXT:    store <4 x float> [[VECINIT3]], ptr [[NAN_VEC]], align 16
+// CHECK-NEXT:    store <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, ptr [[NORMAL_VEC]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr [[NAN_VEC]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x float>, ptr [[NORMAL_VEC]], align 16
+// CHECK-NEXT:    [[TMP7:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[TOBOOL]], <4 x float> [[TMP5]], <4 x float> [[TMP6]])
+// CHECK-NEXT:    ret <4 x float> [[TMP7]]
+//
+float4 test_vector_nan_operands(int cond) {
+  float nan_val = __builtin_nanf("");
+  float4 nan_vec = {nan_val, nan_val, nan_val, nan_val};
+  float4 normal_vec = {1.0f, 2.0f, 3.0f, 4.0f};
+  return __builtin_ct_select(cond, nan_vec, normal_vec);
+}
+
+// Test vector infinity handling
+// CHECK-LABEL: define <4 x float> @test_vector_infinity_operands(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[POS_INF:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[NEG_INF:%.*]] = alloca float, align 4
+// CHECK-NEXT:    [[INF_VEC:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[ZERO_VEC:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store float +inf, ptr [[POS_INF]], align 4
+// CHECK-NEXT:    store float -inf, ptr [[NEG_INF]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load float, ptr [[POS_INF]], align 4
+// CHECK-NEXT:    [[VECINIT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i32 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load float, ptr [[NEG_INF]], align 4
+// CHECK-NEXT:    [[VECINIT1:%.*]] = insertelement <4 x float> [[VECINIT]], float [[TMP1]], i32 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load float, ptr [[POS_INF]], align 4
+// CHECK-NEXT:    [[VECINIT2:%.*]] = insertelement <4 x float> [[VECINIT1]], float [[TMP2]], i32 2
+// CHECK-NEXT:    [[TMP3:%.*]] = load float, ptr [[NEG_INF]], align 4
+// CHECK-NEXT:    [[VECINIT3:%.*]] = insertelement <4 x float> [[VECINIT2]], float [[TMP3]], i32 3
+// CHECK-NEXT:    store <4 x float> [[VECINIT3]], ptr [[INF_VEC]], align 16
+// CHECK-NEXT:    store <4 x float> zeroinitializer, ptr [[ZERO_VEC]], align 16
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP4]], 0
+// CHECK-NEXT:    [[TMP5:%.*]] = load <4 x float>, ptr [[INF_VEC]], align 16
+// CHECK-NEXT:    [[TMP6:%.*]] = load <4 x float>, ptr [[ZERO_VEC]], align 16
+// CHECK-NEXT:    [[TMP7:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[TOBOOL]], <4 x float> [[TMP5]], <4 x float> [[TMP6]])
+// CHECK-NEXT:    ret <4 x float> [[TMP7]]
+//
+float4 test_vector_infinity_operands(int cond) {
+  float pos_inf = __builtin_inff();
+  float neg_inf = -__builtin_inff();
+  float4 inf_vec = {pos_inf, neg_inf, pos_inf, neg_inf};
+  float4 zero_vec = {0.0f, 0.0f, 0.0f, 0.0f};
+  return __builtin_ct_select(cond, inf_vec, zero_vec);
+}
+
+// Test mixed special values
+// CHECK-LABEL: define double @test_mixed_special_values(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[NAN_VAL:%.*]] = alloca double, align 8
+// CHECK-NEXT:    [[INF_VAL:%.*]] = alloca double, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store double +qnan, ptr [[NAN_VAL]], align 8
+// CHECK-NEXT:    store double +inf, ptr [[INF_VAL]], align 8
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[NAN_VAL]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[INF_VAL]], align 8
+// CHECK-NEXT:    [[TMP3:%.*]] = call double @llvm.ct.select.f64(i1 [[TOBOOL]], double [[TMP1]], double [[TMP2]])
+// CHECK-NEXT:    ret double [[TMP3]]
+//
+double test_mixed_special_values(int cond) {
+  double nan_val = __builtin_nan("");
+  double inf_val = __builtin_inf();
+  return __builtin_ct_select(cond, nan_val, inf_val);
+}
+
+// Test constant-time memory access pattern
+// CHECK-LABEL: define i32 @test_constant_time_memory_access(
+// CHECK-SAME: i32 noundef [[SECRET_INDEX:%.*]], ptr noundef [[DATA_ARRAY:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SECRET_INDEX_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[DATA_ARRAY_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[RESULT:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[IS_TARGET:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CURRENT_VALUE:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SELECTED_VALUE:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[SECRET_INDEX]], ptr [[SECRET_INDEX_ADDR]], align 4
+// CHECK-NEXT:    store ptr [[DATA_ARRAY]], ptr [[DATA_ARRAY_ADDR]], align 8
+// CHECK-NEXT:    store i32 0, ptr [[RESULT]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 8
+// CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK:       [[FOR_BODY]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[SECRET_INDEX_ADDR]], align 4
+// CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[TMP1]], [[TMP2]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[CMP1]] to i32
+// CHECK-NEXT:    store i32 [[CONV]], ptr [[IS_TARGET]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[DATA_ARRAY_ADDR]], align 8
+// CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP4]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    store i32 [[TMP5]], ptr [[CURRENT_VALUE]], align 4
+// CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[IS_TARGET]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0
+// CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[CURRENT_VALUE]], align 4
+// CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL]], i32 [[TMP7]], i32 0)
+// CHECK-NEXT:    store i32 [[TMP8]], ptr [[SELECTED_VALUE]], align 4
+// CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[SELECTED_VALUE]], align 4
+// CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP9]]
+// CHECK-NEXT:    store i32 [[ADD]], ptr [[RESULT]], align 4
+// CHECK-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK:       [[FOR_INC]]:
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP11]], 1
+// CHECK-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP1:![0-9]+]]
+// CHECK:       [[FOR_END]]:
+// CHECK-NEXT:    [[TMP12:%.*]] = load i32, ptr [[RESULT]], align 4
+// CHECK-NEXT:    ret i32 [[TMP12]]
+//
+int test_constant_time_memory_access(int secret_index, int* data_array) {
+  // This pattern ensures constant-time memory access regardless of secret_index value
+  int result = 0;
+  // Use ct_select to accumulate values without revealing the secret index
+  for (int i = 0; i < 8; i++) {
+    int is_target = (i == secret_index);
+    int current_value = data_array[i];
+    int selected_value = __builtin_ct_select(is_target, current_value, 0);
+    result += selected_value;
+  }
+  return result;
+}
+
+// Test timing-attack resistant comparison
+// CHECK-LABEL: define i32 @test_timing_resistant_comparison(
+// CHECK-SAME: ptr noundef [[SECRET:%.*]], ptr noundef [[GUESS:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SECRET_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[GUESS_ADDR:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    [[MATCH:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[CHARS_EQUAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[BOTH_NULL:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[STILL_MATCHING:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store ptr [[SECRET]], ptr [[SECRET_ADDR]], align 8
+// CHECK-NEXT:    store ptr [[GUESS]], ptr [[GUESS_ADDR]], align 8
+// CHECK-NEXT:    store i32 1, ptr [[MATCH]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+// CHECK-NEXT:    br label %[[FOR_COND:.*]]
+// CHECK:       [[FOR_COND]]:
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 32
+// CHECK-NEXT:    br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
+// CHECK:       [[FOR_BODY]]:
+// CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[SECRET_ADDR]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[TMP1]], i64 [[IDXPROM]]
+// CHECK-NEXT:    [[TMP3:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP3]] to i32
+// CHECK-NEXT:    [[TMP4:%.*]] = load ptr, ptr [[GUESS_ADDR]], align 8
+// CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM1:%.*]] = sext i32 [[TMP5]] to i64
+// CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 [[IDXPROM1]]
+// CHECK-NEXT:    [[TMP6:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1
+// CHECK-NEXT:    [[CONV3:%.*]] = sext i8 [[TMP6]] to i32
+// CHECK-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[CONV]], [[CONV3]]
+// CHECK-NEXT:    [[CONV5:%.*]] = zext i1 [[CMP4]] to i32
+// CHECK-NEXT:    store i32 [[CONV5]], ptr [[CHARS_EQUAL]], align 4
+// CHECK-NEXT:    [[TMP7:%.*]] = load ptr, ptr [[SECRET_ADDR]], align 8
+// CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM6:%.*]] = sext i32 [[TMP8]] to i64
+// CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i64 [[IDXPROM6]]
+// CHECK-NEXT:    [[TMP9:%.*]] = load i8, ptr [[ARRAYIDX7]], align 1
+// CHECK-NEXT:    [[CONV8:%.*]] = sext i8 [[TMP9]] to i32
+// CHECK-NEXT:    [[CMP9:%.*]] = icmp eq i32 [[CONV8]], 0
+// CHECK-NEXT:    br i1 [[CMP9]], label %[[LAND_RHS:.*]], label %[[LAND_END:.*]]
+// CHECK:       [[LAND_RHS]]:
+// CHECK-NEXT:    [[TMP10:%.*]] = load ptr, ptr [[GUESS_ADDR]], align 8
+// CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[IDXPROM11:%.*]] = sext i32 [[TMP11]] to i64
+// CHECK-NEXT:    [[ARRAYIDX12:%.*]] = getelementptr inbounds i8, ptr [[TMP10]], i64 [[IDXPROM11]]
+// CHECK-NEXT:    [[TMP12:%.*]] = load i8, ptr [[ARRAYIDX12]], align 1
+// CHECK-NEXT:    [[CONV13:%.*]] = sext i8 [[TMP12]] to i32
+// CHECK-NEXT:    [[CMP14:%.*]] = icmp eq i32 [[CONV13]], 0
+// CHECK-NEXT:    br label %[[LAND_END]]
+// CHECK:       [[LAND_END]]:
+// CHECK-NEXT:    [[TMP13:%.*]] = phi i1 [ false, %[[FOR_BODY]] ], [ [[CMP14]], %[[LAND_RHS]] ]
+// CHECK-NEXT:    [[LAND_EXT:%.*]] = zext i1 [[TMP13]] to i32
+// CHECK-NEXT:    store i32 [[LAND_EXT]], ptr [[BOTH_NULL]], align 4
+// CHECK-NEXT:    [[TMP14:%.*]] = load i32, ptr [[CHARS_EQUAL]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP14]], 0
+// CHECK-NEXT:    br i1 [[TOBOOL]], label %[[LOR_END:.*]], label %[[LOR_RHS:.*]]
+// CHECK:       [[LOR_RHS]]:
+// CHECK-NEXT:    [[TMP15:%.*]] = load i32, ptr [[BOTH_NULL]], align 4
+// CHECK-NEXT:    [[TOBOOL16:%.*]] = icmp ne i32 [[TMP15]], 0
+// CHECK-NEXT:    br label %[[LOR_END]]
+// CHECK:       [[LOR_END]]:
+// CHECK-NEXT:    [[TMP16:%.*]] = phi i1 [ true, %[[LAND_END]] ], [ [[TOBOOL16]], %[[LOR_RHS]] ]
+// CHECK-NEXT:    [[TMP17:%.*]] = load i32, ptr [[MATCH]], align 4
+// CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TMP16]], i32 [[TMP17]], i32 0)
+// CHECK-NEXT:    store i32 [[TMP18]], ptr [[STILL_MATCHING]], align 4
+// CHECK-NEXT:    [[TMP19:%.*]] = load i32, ptr [[BOTH_NULL]], align 4
+// CHECK-NEXT:    [[TOBOOL17:%.*]] = icmp ne i32 [[TMP19]], 0
+// CHECK-NEXT:    [[TMP20:%.*]] = load i32, ptr [[MATCH]], align 4
+// CHECK-NEXT:    [[TMP21:%.*]] = load i32, ptr [[STILL_MATCHING]], align 4
+// CHECK-NEXT:    [[TMP22:%.*]] = call i32 @llvm.ct.select.i32(i1 [[TOBOOL17]], i32 [[TMP20]], i32 [[TMP21]])
+// CHECK-NEXT:    store i32 [[TMP22]], ptr [[MATCH]], align 4
+// CHECK-NEXT:    br label %[[FOR_INC:.*]]
+// CHECK:       [[FOR_INC]]:
+// CHECK-NEXT:    [[TMP23:%.*]] = load i32, ptr [[I]], align 4
+// CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP23]], 1
+// CHECK-NEXT:    store i32 [[INC]], ptr [[I]], align 4
+// CHECK-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]]
+// CHECK:       [[FOR_END]]:
+// CHECK-NEXT:    [[TMP24:%.*]] = load i32, ptr [[MATCH]], align 4
+// CHECK-NEXT:    ret i32 [[TMP24]]
+//
+int test_timing_resistant_comparison(const char* secret, const char* guess) {
+  // Constant-time string comparison using ct_select
+  int match = 1;
+  for (int i = 0; i < 32; i++) {
+    int chars_equal = (secret[i] == guess[i]);
+    int both_null = (secret[i] == 0) && (guess[i] == 0);
+    int still_matching = __builtin_ct_select(chars_equal || both_null, match, 0);
+    match = __builtin_ct_select(both_null, match, still_matching);
+  }
+  return match;
+}
+
+// Test ext_vector_type vectors -- should lower identically to vector_size.
+typedef int __attribute__((ext_vector_type(4))) int4_ext;
+typedef float __attribute__((ext_vector_type(4))) float4_ext;
+typedef char __attribute__((ext_vector_type(16))) char16_ext;
+
+// CHECK-LABEL: define <4 x i32> @test_ext_vector_int4(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x i32>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <4 x i32> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x i32> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[TOBOOL]], <4 x i32> [[TMP1]], <4 x i32> [[TMP2]])
+// CHECK-NEXT:    ret <4 x i32> [[TMP3]]
+//
+int4_ext test_ext_vector_int4(int cond, int4_ext a, int4_ext b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define <4 x float> @test_ext_vector_float4(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <4 x float>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <4 x float> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <4 x float> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <4 x float>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[TOBOOL]], <4 x float> [[TMP1]], <4 x float> [[TMP2]])
+// CHECK-NEXT:    ret <4 x float> [[TMP3]]
+//
+float4_ext test_ext_vector_float4(int cond, float4_ext a, float4_ext b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define <16 x i8> @test_ext_vector_char16(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <16 x i8>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <16 x i8> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <16 x i8> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <16 x i8>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <16 x i8> @llvm.ct.select.v16i8(i1 [[TOBOOL]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]])
+// CHECK-NEXT:    ret <16 x i8> [[TMP3]]
+//
+char16_ext test_ext_vector_char16(int cond, char16_ext a, char16_ext b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test half and bfloat scalar types.
+// CHECK-LABEL: define half @test_half(
+// CHECK-SAME: i32 noundef [[COND:%.*]], half noundef [[A:%.*]], half noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca half, align 2
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store half [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store half [[B]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load half, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP2:%.*]] = load half, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP3:%.*]] = call half @llvm.ct.select.f16(i1 [[TOBOOL]], half [[TMP1]], half [[TMP2]])
+// CHECK-NEXT:    ret half [[TMP3]]
+//
+_Float16 test_half(int cond, _Float16 a, _Float16 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define bfloat @test_bfloat(
+// CHECK-SAME: i32 noundef [[COND:%.*]], bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca bfloat, align 2
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store bfloat [[A]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store bfloat [[B]], ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load bfloat, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP2:%.*]] = load bfloat, ptr [[B_ADDR]], align 2
+// CHECK-NEXT:    [[TMP3:%.*]] = call bfloat @llvm.ct.select.bf16(i1 [[TOBOOL]], bfloat [[TMP1]], bfloat [[TMP2]])
+// CHECK-NEXT:    ret bfloat [[TMP3]]
+//
+__bf16 test_bfloat(int cond, __bf16 a, __bf16 b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test half and bfloat ext_vector_type vectors.
+typedef _Float16 __attribute__((ext_vector_type(8))) half8_ext;
+typedef __bf16 __attribute__((ext_vector_type(8))) bfloat8_ext;
+
+// CHECK-LABEL: define <8 x half> @test_ext_vector_half8(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x half>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <8 x half> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <8 x half> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x half>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x half>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x half> @llvm.ct.select.v8f16(i1 [[TOBOOL]], <8 x half> [[TMP1]], <8 x half> [[TMP2]])
+// CHECK-NEXT:    ret <8 x half> [[TMP3]]
+//
+half8_ext test_ext_vector_half8(int cond, half8_ext a, half8_ext b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// CHECK-LABEL: define <8 x bfloat> @test_ext_vector_bfloat8(
+// CHECK-SAME: i32 noundef [[COND:%.*]], <8 x bfloat> noundef [[A:%.*]], <8 x bfloat> noundef [[B:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca <8 x bfloat>, align 16
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    store <8 x bfloat> [[A]], ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    store <8 x bfloat> [[B]], ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = load <8 x bfloat>, ptr [[A_ADDR]], align 16
+// CHECK-NEXT:    [[TMP2:%.*]] = load <8 x bfloat>, ptr [[B_ADDR]], align 16
+// CHECK-NEXT:    [[TMP3:%.*]] = call <8 x bfloat> @llvm.ct.select.v8bf16(i1 [[TOBOOL]], <8 x bfloat> [[TMP1]], <8 x bfloat> [[TMP2]])
+// CHECK-NEXT:    ret <8 x bfloat> [[TMP3]]
+//
+bfloat8_ext test_ext_vector_bfloat8(int cond, bfloat8_ext a, bfloat8_ext b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Array arguments decay to pointers before the operand type check.
+// CHECK-LABEL: define i32 @test_array_decay(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[ARR1:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT:    [[ARR2:%.*]] = alloca [4 x i32], align 4
+// CHECK-NEXT:    [[R:%.*]] = alloca ptr, align 8
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR1]], i64 0, i64 0
+// CHECK-NEXT:    [[ARRAYDECAY1:%.*]] = getelementptr inbounds [4 x i32], ptr [[ARR2]], i64 0, i64 0
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.ct.select.p0(i1 [[TOBOOL]], ptr [[ARRAYDECAY]], ptr [[ARRAYDECAY1]])
+// CHECK-NEXT:    store ptr [[TMP1]], ptr [[R]], align 8
+// CHECK-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[R]], align 8
+// CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i64 0
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
+int test_array_decay(int cond) {
+  int arr1[4], arr2[4];
+  int *r = __builtin_ct_select(cond, arr1, arr2);
+  return r[0];
+}
+
+// Function arguments decay to function pointers before the operand type check.
+typedef void (*fnptr)(void);
+void fn_a(void);
+void fn_b(void);
+// CHECK-LABEL: define ptr @test_func_decay(
+// CHECK-SAME: i32 noundef [[COND:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[COND_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[COND]], ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[COND_ADDR]], align 4
+// CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+// CHECK-NEXT:    [[TMP1:%.*]] = call ptr @llvm.ct.select.p0(i1 [[TOBOOL]], ptr @fn_a, ptr @fn_b)
+// CHECK-NEXT:    ret ptr [[TMP1]]
+//
+fnptr test_func_decay(int cond) {
+  return __builtin_ct_select(cond, fn_a, fn_b);
+}
+//.
+// CHECK: [[LOOP1]] = distinct !{[[LOOP1]], [[META2:![0-9]+]]}
+// CHECK: [[META2]] = !{!"llvm.loop.mustprogress"}
+// CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]]}
+//.
diff --git a/clang/test/Sema/builtin-ct-select.c b/clang/test/Sema/builtin-ct-select.c
index 7f2d9291299d6..1f2d61eaff78d 100644
--- a/clang/test/Sema/builtin-ct-select.c
+++ b/clang/test/Sema/builtin-ct-select.c
@@ -1,683 +1,37 @@
-// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s
 
-// Test integer types
-int test_int(int cond, int a, int b) {
-  // CHECK-LABEL: define {{.*}} @test_int
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-long long test_long(int cond, long long a, long long b) {
-  // CHECK-LABEL: define {{.*}} @test_long
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
-  // CHECK: ret i64 [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-short test_short(int cond, short a, short b) {
-  // CHECK-LABEL: define {{.*}} @test_short
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i16 @llvm.ct.select.i16(i1 [[COND]], i16 %{{.*}}, i16 %{{.*}})
-  // CHECK: ret i16 [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-unsigned char test_uchar(int cond, unsigned char a, unsigned char b) {
-  // CHECK-LABEL: define {{.*}} @test_uchar
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i8 @llvm.ct.select.i8(i1 [[COND]], i8 %{{.*}}, i8 %{{.*}})
-  // CHECK: ret i8 [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-long long test_longlong(int cond, long long a, long long b) {
-  // CHECK-LABEL: define {{.*}} @test_longlong
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
-  // CHECK: ret i64 [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test floating point types
-float test_float(int cond, float a, float b) {
-  // CHECK-LABEL: define {{.*}} @test_float
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
-  // CHECK: ret float [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-double test_double(int cond, double a, double b) {
-  // CHECK-LABEL: define {{.*}} @test_double
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
-  // CHECK: ret double [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test pointer types
-int *test_pointer(int cond, int *a, int *b) {
-  // CHECK-LABEL: define {{.*}} @test_pointer
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[COND]], ptr %{{.*}}, ptr %{{.*}})
-  // CHECK: ret ptr [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test with different condition types
-int test_char_cond(char cond, int a, int b) {
-  // CHECK-LABEL: define {{.*}} @test_char_cond
-  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-int test_long_cond(long long cond, int a, int b) {
-  // CHECK-LABEL: define {{.*}} @test_long_cond
-  // CHECK: [[COND:%.*]] = icmp ne i64 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test with boolean condition
-int test_bool_cond(_Bool cond, int a, int b) {
-  // CHECK-LABEL: define {{.*}} @test_bool_cond
-  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test with constants
-int test_constant_cond(void) {
-  // CHECK-LABEL: define {{.*}} @test_constant_cond
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 true, i32 42, i32 24)
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(1, 42, 24);
-}
-
-int test_zero_cond(void) {
-  // CHECK-LABEL: define {{.*}} @test_zero_cond
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 false, i32 42, i32 24)
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(0, 42, 24);
-}
-
-// Test type promotion
-int test_promotion(int cond, short a, short b) {
-  // CHECK-LABEL: define {{.*}} @test_promotion
-  // CHECK-DAG: [[A_EXT:%.*]] = sext i16 %{{.*}} to i32
-  // CHECK-DAG: [[B_EXT:%.*]] = sext i16 %{{.*}} to i32
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 [[A_EXT]], i32 [[B_EXT]])
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(cond, (int)a, (int)b);
-}
-
-// Test mixed signedness
-unsigned int test_mixed_signedness(int cond, int a, unsigned int b) {
-  // CHECK-LABEL: define {{.*}} @test_mixed_signedness
-  // CHECK-DAG: [[A_EXT:%.*]] = sext i32 %{{.*}} to i64
-  // CHECK-DAG: [[B_EXT:%.*]] = zext i32 %{{.*}} to i64
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 [[A_EXT]], i64 [[B_EXT]])
-  // CHECK: [[RESULT_TRUNC:%.*]] = trunc i64 [[RESULT]] to i32
-  // CHECK: ret i32 [[RESULT_TRUNC]]
-  return __builtin_ct_select(cond, (long long)a, (long long)b);
-}
-
-// Test complex expression
-int test_complex_expr_alt(int x, int y) {
-  // CHECK-LABEL: define {{.*}} @test_complex_expr_alt
-  // CHECK-DAG: [[CMP:%.*]] = icmp sgt i32 %{{.*}}, 0
-  // CHECK-DAG: [[ADD:%.*]] = add nsw i32 %{{.*}}, %{{.*}}
-  // CHECK-DAG: [[SUB:%.*]] = sub nsw i32 %{{.*}}, %{{.*}}
-  // Separate the final sequence to ensure proper ordering
-  // CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 [[ADD]], i32 [[SUB]])
-  // CHECK-NEXT: ret i32 [[RESULT]]
-  return __builtin_ct_select(x > 0, x + y, x - y);
-}
-
-// Test nested calls
-int test_nested_structured(int cond1, int cond2, int a, int b, int c) {
-  // CHECK-LABEL: define {{.*}} @test_nested_structured
-  // Phase 1: Conditions (order doesn't matter)
-  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
-  
-  // Phase 2: Inner select (must happen before outer)
-  // CHECK: [[INNER:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
-  
-  // Phase 3: Outer select (must use inner result)
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 [[INNER]], i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
-}
-
-// Test with function calls
-int helper(int x) { return x * 2; }
-int test_function_calls(int cond, int x, int y) {
-  // CHECK-LABEL: define {{.*}} @test_function_calls
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[CALL1:%.*]] = call i32 @helper(i32 noundef %{{.*}})
-  // CHECK-DAG: [[CALL2:%.*]] = call i32 @helper(i32 noundef %{{.*}})
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 [[CALL1]], i32 [[CALL2]])
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(cond, helper(x), helper(y));
-}
-
-// Test using ct_select as condition for another ct_select
-int test_intrinsic_condition(int cond1, int cond2, int a, int b, int c, int d) {
-  // CHECK-LABEL: define {{.*}} @test_intrinsic_condition
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[INNER_COND:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 [[INNER_COND]], 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(__builtin_ct_select(cond1, cond2, a), b, c);
-}
-
-// Test using comparison result of ct_select as condition
-int test_comparison_condition(int cond, int a, int b, int c, int d) {
-  // CHECK-LABEL: define {{.*}} @test_comparison_condition
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: [[CMP:%.*]] = icmp sgt i32 [[FIRST_SELECT]], %{{.*}}
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(__builtin_ct_select(cond, a, b) > c, d, a);
-}
-
-// Test using ct_select result in arithmetic as condition
-int test_arithmetic_condition(int cond, int a, int b, int c, int d) {
-  // CHECK-LABEL: define {{.*}} @test_arithmetic_condition
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: [[ADD:%.*]] = add nsw i32 [[FIRST_SELECT]], %{{.*}}
-  // CHECK: [[FINAL_COND:%.*]] = icmp ne i32 [[ADD]], 0
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(__builtin_ct_select(cond, a, b) + c, d, a);
-}
-
-// Test chained ct_select as conditions
-int test_chained_conditions(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e) {
-  // CHECK-LABEL: define {{.*}} @test_chained_conditions
-  // CHECK: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[FIRST:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[SECOND:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  int first_select = __builtin_ct_select(cond1, a, b);
-  int second_select = __builtin_ct_select(cond2, first_select, c);
-  return __builtin_ct_select(second_select, d, e);
-}
-
-// Test using ct_select with pointer condition
-//int test_pointer_condition(int *ptr1, int *ptr2, int a, int b, int c) {
-  // NO-CHECK-LABEL: define {{.*}} @test_pointer_condition
-  // NO-CHECK: [[PTR_COND:%.*]] = icmp ne ptr %{{.*}}, null
-  // NO-CHECK: [[PTR_SELECT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[PTR_COND]], ptr %{{.*}}, ptr %{{.*}})
-  // NO-CHECK: [[FINAL_COND:%.*]] = icmp ne ptr [[PTR_SELECT]], null
-  // NO-CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
-  // NO-CHECK: ret i32 [[RESULT]]
-//  return __builtin_ct_select(__builtin_ct_select(ptr1, ptr1, ptr2), a, b);
-//}
-
-
-// Test using ct_select result in logical operations as condition
-int test_logical_condition(int cond1, int cond2, int a, int b, int c, int d) {
-  // CHECK-LABEL: define {{.*}} @test_logical_condition
-  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK-DAG: [[SELECT_BOOL:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(__builtin_ct_select(cond1, a, b) && cond2, c, d);
-}
-
-// Test multiple levels of ct_select as conditions
-int test_deep_condition_nesting(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e, int f) {
-  // CHECK-LABEL: define {{.*}} @test_deep_condition_nesting
-  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[INNER1:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK-DAG: [[INNER1_COND:%.*]] = icmp ne i32 [[INNER1]], 0
-  // CHECK-DAG: [[INNER2:%.*]] = call i32 @llvm.ct.select.i32(i1 [[INNER1_COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK-DAG: [[OUTER:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 [[INNER2]], i32 %{{.*}})
-  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 [[OUTER]], 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(__builtin_ct_select(cond1, __builtin_ct_select(__builtin_ct_select(cond2, a, b), c, d), e), f, a);
-}
-
-// Test ct_select with complex condition expressions
-int test_complex_condition_expr(int x, int y, int z, int a, int b) {
-  // CHECK-LABEL: define {{.*}} @test_complex_condition_expr
-  // CHECK: [[CMP1:%.*]] = icmp sgt i32 %{{.*}}, %{{.*}}
-  // CHECK: [[SELECT1:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP1]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: [[CMP2:%.*]] = icmp slt i32 [[SELECT1]], %{{.*}}
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP2]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  return __builtin_ct_select(__builtin_ct_select(x > y, x, y) < z, a, b);
-}
-
-// Test vector types - 128-bit vectors
-typedef int __attribute__((vector_size(16))) int4;
-typedef float __attribute__((vector_size(16))) float4;
-typedef short __attribute__((vector_size(16))) short8;
-typedef char __attribute__((vector_size(16))) char16;
-
-int4 test_vector_int4(int cond, int4 a, int4 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_int4
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-  // CHECK: ret <4 x i32> [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-float4 test_vector_float4(int cond, float4 a, float4 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_float4
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK: ret <4 x float> [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-short8 test_vector_short8(int cond, short8 a, short8 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_short8
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <8 x i16> @llvm.ct.select.v8i16(i1 [[COND]], <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
-  // CHECK: ret <8 x i16> [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-char16 test_vector_char16(int cond, char16 a, char16 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_char16
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <16 x i8> @llvm.ct.select.v16i8(i1 [[COND]], <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
-  // CHECK: ret <16 x i8> [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
+// Diagnostic coverage for the __builtin_ct_select Sema checks. Codegen
+// behavior is tested separately in clang/test/CodeGen/builtin-ct-select.c.
 
-// Test 256-bit vectors
-typedef int __attribute__((vector_size(32))) int8;
-typedef float __attribute__((vector_size(32))) float8;
-typedef double __attribute__((vector_size(32))) double4;
-
-int8 test_vector_int8(int cond, int8 a, int8 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_int8
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call <8 x i32> @llvm.ct.select.v8i32(i1 [[COND]], <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
-  return __builtin_ct_select(cond, a, b);
-}
-
-float8 test_vector_float8(int cond, float8 a, float8 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_float8
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call <8 x float> @llvm.ct.select.v8f32(i1 [[COND]], <8 x float> %{{.*}}, <8 x float> %{{.*}})
-  return __builtin_ct_select(cond, a, b);
-}
-
-double4 test_vector_double4(int cond, double4 a, double4 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_double4
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call <4 x double> @llvm.ct.select.v4f64(i1 [[COND]], <4 x double> %{{.*}}, <4 x double> %{{.*}})
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test 512-bit vectors
-typedef int __attribute__((vector_size(64))) int16;
-typedef float __attribute__((vector_size(64))) float16;
-
-int16 test_vector_int16(int cond, int16 a, int16 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_int16
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <16 x i32> @llvm.ct.select.v16i32(i1 [[COND]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
-  return __builtin_ct_select(cond, a, b);
-}
-
-float16 test_vector_float16(int cond, float16 a, float16 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_float16
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <16 x float> @llvm.ct.select.v16f32(i1 [[COND]], <16 x float> %{{.*}}, <16 x float> %{{.*}})
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test vector operations with different condition types
-int4 test_vector_char_cond(char cond, int4 a, int4 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_char_cond
-  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-  // CHECK: ret <4 x i32> [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-float4 test_vector_long_cond(long long cond, float4 a, float4 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_long_cond
-  // CHECK: [[COND:%.*]] = icmp ne i64 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK: ret <4 x float> [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test vector constants
-int4 test_vector_constant_cond(void) {
-  // CHECK-LABEL: define {{.*}} @test_vector_constant_cond
-  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 true, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-  // CHECK: ret <4 x i32> [[RESULT]]
-  int4 a = {1, 2, 3, 4};
-  int4 b = {5, 6, 7, 8};
-  return __builtin_ct_select(1, a, b);
-}
-
-float4 test_vector_zero_cond(void) {
-  // CHECK-LABEL: define {{.*}} @test_vector_zero_cond
-  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 false, <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK: ret <4 x float> [[RESULT]]
-  float4 a = {1.0f, 2.0f, 3.0f, 4.0f};
-  float4 b = {5.0f, 6.0f, 7.0f, 8.0f};
-  return __builtin_ct_select(0, a, b);
-}
-
-// Test nested vector selections
-int4 test_vector_nested(int cond1, int cond2, int4 a, int4 b, int4 c) {
-  // CHECK-LABEL: define {{.*}} @test_vector_nested
-  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[INNER:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND2]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND1]], <4 x i32> [[INNER]], <4 x i32> %{{.*}})
-  // CHECK: ret <4 x i32> [[RESULT]]
-  return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
-}
-
-// Test vector selection with complex expressions
-float4 test_vector_complex_expr(int x, int y, float4 a, float4 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_complex_expr
-  // CHECK: [[CMP:%.*]] = icmp sgt i32 %{{.*}}, %{{.*}}
-  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[CMP]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK: ret <4 x float> [[RESULT]]
-  return __builtin_ct_select(x > y, a, b);
-}
-
-// Test vector with different element sizes
-typedef long long __attribute__((vector_size(16))) long2;
-typedef double __attribute__((vector_size(16))) double2;
-
-long2 test_vector_long2(int cond, long2 a, long2 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_long2
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <2 x i64> @llvm.ct.select.v2i64(i1 [[COND]], <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
-  // CHECK: ret <2 x i64> [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-double2 test_vector_double2(int cond, double2 a, double2 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_double2
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <2 x double> @llvm.ct.select.v2f64(i1 [[COND]], <2 x double> %{{.*}}, <2 x double> %{{.*}})
-  // CHECK: ret <2 x double> [[RESULT]]
-  return __builtin_ct_select(cond, a, b);
-}
-
-// Test mixed vector operations
-int4 test_vector_from_scalar_condition(int4 vec_cond, int4 a, int4 b) {
-  // CHECK-LABEL: define {{.*}} @test_vector_from_scalar_condition
-  // Extract first element and use as condition
-  int scalar_cond = vec_cond[0];
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
-  // CHECK: ret <4 x i32> [[RESULT]]
-  return __builtin_ct_select(scalar_cond, a, b);
-}
-
-// Test vector chaining
-float4 test_vector_chaining(int cond1, int cond2, int cond3, float4 a, float4 b, float4 c, float4 d) {
-  // CHECK-LABEL: define {{.*}} @test_vector_chaining
-  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[COND3:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[FIRST:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND1]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK-DAG: [[SECOND:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND2]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK-DAG: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND3]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK: ret <4 x float> [[RESULT]]
-  float4 first = __builtin_ct_select(cond1, a, b);
-  float4 second = __builtin_ct_select(cond2, first, c);
-  return __builtin_ct_select(cond3, second, d);
-}
-
-// Test special floating point values - NaN
-float test_nan_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_nan_operands
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float  %{{.*}}, float 1.000000e+00)
-  // CHECK: ret float [[RESULT]]
-  float nan_val = __builtin_nanf("");
-  return __builtin_ct_select(cond, nan_val, 1.0f);
-}
-
-double test_nan_double_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_nan_double_operands
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double 2.000000e+00)
-  // CHECK: ret double [[RESULT]]
-  double nan_val = __builtin_nan("");
-  return __builtin_ct_select(cond, nan_val, 2.0);
-}
-
-// Test infinity values
-float test_infinity_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_infinity_operands
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
-  // CHECK: ret float [[RESULT]]
-  float pos_inf = __builtin_inff();
-  float neg_inf = -__builtin_inff();
-  return __builtin_ct_select(cond, pos_inf, neg_inf);
-}
-
-double test_infinity_double_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_infinity_double_operands
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
-  // CHECK: ret double [[RESULT]]
-  double pos_inf = __builtin_inf();
-  double neg_inf = -__builtin_inf();
-  return __builtin_ct_select(cond, pos_inf, neg_inf);
-}
-
-// Test subnormal/denormal values
-float test_subnormal_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_subnormal_operands
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
-  // CHECK: ret float [[RESULT]]
-  // Very small subnormal values
-  float subnormal1 = 1e-40f;
-  float subnormal2 = 1e-45f;
-  return __builtin_ct_select(cond, subnormal1, subnormal2);
-}
-
-// Test integer overflow boundaries
-int test_integer_overflow_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_integer_overflow_operands
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  int max_int = __INT_MAX__;
-  int min_int = (-__INT_MAX__ - 1);
-  return __builtin_ct_select(cond, max_int, min_int);
-}
-
-long long test_longlong_overflow_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_longlong_overflow_operands
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
-  // CHECK: ret i64 [[RESULT]]
-  long long max_ll = __LONG_LONG_MAX__;
-  long long min_ll = (-__LONG_LONG_MAX__ - 1);
-  return __builtin_ct_select(cond, max_ll, min_ll);
-}
-
-// Test unsigned overflow boundaries
-unsigned int test_unsigned_overflow_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_unsigned_overflow_operands
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  unsigned int max_uint = 4294967295;
-  unsigned int min_uint = 0;
-  return __builtin_ct_select(cond, max_uint, min_uint);
-}
-
-// Test null pointer dereference avoidance
-int* test_null_pointer_operands(int cond, int* valid_ptr) {
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[COND]], ptr %{{.*}}, ptr %{{.*}})
-  // CHECK: ret ptr [[RESULT]]
-  int* null_ptr = (int*)0;
-  return __builtin_ct_select(cond, null_ptr, valid_ptr);
-}
-
-// Test volatile operations
-volatile int global_volatile = 42;
-int test_volatile_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_volatile_operands
-  // CHECK-DAG: [[VOLATILE_LOAD:%.*]] = load volatile i32, ptr {{.*}}
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 100)
-  // CHECK: ret i32 [[RESULT]]
-  volatile int vol_val = global_volatile;
-  return __builtin_ct_select(cond, vol_val, 100);
-}
-
-// Test uninitialized variable behavior (should still work with ct_select)
-int test_uninitialized_operands(int cond, int initialized) {
-  // CHECK-LABEL: define {{.*}} @test_uninitialized_operands
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  int uninitialized; // Intentionally uninitialized
-  return __builtin_ct_select(cond, uninitialized, initialized);
-}
-
-// Test zero division avoidance patterns
-int test_division_by_zero_avoidance(int cond, int dividend, int divisor) {
-  // CHECK-LABEL: define {{.*}} @test_division_by_zero_avoidance
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[DIV_RESULT:%.*]] = sdiv i32 %{{.*}}, %{{.*}}
-  // CHECK-DAG: [[SAFE_DIVISOR:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 1)
-  // First get a safe divisor (never zero)
-  int safe_divisor = __builtin_ct_select(divisor != 0, divisor, 1);
-  // Then perform division with guaranteed non-zero divisor
-  return dividend / safe_divisor;
-}
-
-// Test array bounds checking patterns
-int test_array_bounds_protection(int cond, int index, int* array) {
-  // CHECK-LABEL: define {{.*}} @test_array_bounds_protection
-  // CHECK-DAG: [[SAFE_INDEX:%.*]] = call i32 @llvm.ct.select.i32(i1 {{.*}}, i32 %{{.*}}, i32 0)
-  // Use ct_select to ensure safe array indexing
-  int safe_index = __builtin_ct_select(index >= 0 && index < 10, index, 0);
-  return array[safe_index];
-}
-
-// Test bit manipulation edge cases
-unsigned int test_bit_manipulation_edge_cases(int cond, unsigned int value) {
-  // CHECK-LABEL: define {{.*}} @test_bit_manipulation_edge_cases
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[SHIFT_LEFT:%.*]] = shl i32 %{{.*}}, 31
-  // CHECK-DAG: [[SHIFT_RIGHT:%.*]] = lshr i32 %{{.*}}, 31
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  // Test extreme bit shifts that could cause undefined behavior
-  unsigned int left_shift = value << 31;   // Could overflow
-  unsigned int right_shift = value >> 31;  // Extract sign bit
-  return __builtin_ct_select(cond, left_shift, right_shift);
-}
+struct S {
+  int x;
+};
 
-// Test signed integer wraparound
-int test_signed_wraparound(int cond, int a, int b) {
-  // CHECK-LABEL: define {{.*}} @test_signed_wraparound
-  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK-DAG: [[ADD:%.*]] = add nsw i32 %{{.*}}, %{{.*}}
-  // CHECK-DAG: [[SUB:%.*]] = sub nsw i32 %{{.*}}, %{{.*}}
-  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
-  // CHECK: ret i32 [[RESULT]]
-  int sum = a + b;      // Could overflow
-  int diff = a - b;     // Could underflow
-  return __builtin_ct_select(cond, sum, diff);
+// A well-formed call must not diagnose.
+int test_valid(int c, int a, int b) {
+  return __builtin_ct_select(c, a, b);
 }
 
-// Test vector NaN handling
-float4 test_vector_nan_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_vector_nan_operands
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK: ret <4 x float> [[RESULT]]
-  float nan_val = __builtin_nanf("");
-  float4 nan_vec = {nan_val, nan_val, nan_val, nan_val};
-  float4 normal_vec = {1.0f, 2.0f, 3.0f, 4.0f};
-  return __builtin_ct_select(cond, nan_vec, normal_vec);
+// The builtin requires exactly three arguments.
+void test_too_few(int c, int a) {
+  __builtin_ct_select(c, a); // expected-error {{too few arguments to function call, expected at least 3, have 2}}
 }
 
-// Test vector infinity handling
-float4 test_vector_infinity_operands(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_vector_infinity_operands
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
-  // CHECK: ret <4 x float> [[RESULT]]
-  float pos_inf = __builtin_inff();
-  float neg_inf = -__builtin_inff();
-  float4 inf_vec = {pos_inf, neg_inf, pos_inf, neg_inf};
-  float4 zero_vec = {0.0f, 0.0f, 0.0f, 0.0f};
-  return __builtin_ct_select(cond, inf_vec, zero_vec);
+void test_too_many(int c, int a, int b, int d) {
+  __builtin_ct_select(c, a, b, d); // expected-error {{too many arguments to function call, expected 3, have 4}}
 }
 
-// Test mixed special values
-double test_mixed_special_values(int cond) {
-  // CHECK-LABEL: define {{.*}} @test_mixed_special_values
-  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
-  // CHECK: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
-  // CHECK: ret double [[RESULT]]
-  double nan_val = __builtin_nan("");
-  double inf_val = __builtin_inf();
-  return __builtin_ct_select(cond, nan_val, inf_val);
+// The condition must be an integer type.
+void test_noninteger_cond(struct S s, int a, int b) {
+  __builtin_ct_select(s, a, b); // expected-error {{used type 'struct S' where arithmetic or pointer type is required}}
 }
 
-// Test constant-time memory access pattern
-int test_constant_time_memory_access(int secret_index, int* data_array) {
-  // CHECK-LABEL: define {{.*}} @test_constant_time_memory_access
-  // This pattern ensures constant-time memory access regardless of secret_index value
-  int result = 0;
-  // Use ct_select to accumulate values without revealing the secret index
-  for (int i = 0; i < 8; i++) {
-    int is_target = (i == secret_index);
-    int current_value = data_array[i];
-    int selected_value = __builtin_ct_select(is_target, current_value, 0);
-    result += selected_value;
-  }
-  return result;
+// The value operands must be scalar or vector types.
+void test_nonscalar_operands(int c, struct S s) {
+  __builtin_ct_select(c, s, s); // expected-error {{incompatible operand types ('struct S' and 'struct S')}}
 }
 
-// Test timing-attack resistant comparison
-int test_timing_resistant_comparison(const char* secret, const char* guess) {
-  // CHECK-LABEL: define {{.*}} @test_timing_resistant_comparison
-  // Constant-time string comparison using ct_select
-  int match = 1;
-  for (int i = 0; i < 32; i++) {
-    int chars_equal = (secret[i] == guess[i]);
-    int both_null = (secret[i] == 0) && (guess[i] == 0);
-    int still_matching = __builtin_ct_select(chars_equal || both_null, match, 0);
-    match = __builtin_ct_select(both_null, match, still_matching);
-  }
-  return match;
+// The two value operands must have the same type.
+void test_mismatched_operands(int c, int a, int *p) {
+  __builtin_ct_select(c, a, p); // expected-error {{incompatible operand types ('int' and 'int *')}}
 }