[llvm-branch-commits] [clang] [llvm] [ConstantTime][MIPS] Add comprehensive tests for ct.select (PR #166705)

Fri May 22 18:10:01 PDT 2026

https://github.com/wizardengineer updated https://github.com/llvm/llvm-project/pull/166705

>From 449d362f834f40bffec55243ecc886d44decd5a2 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 10:56:34 -0500
Subject: [PATCH 1/4] [ConstantTime][Clang] Add __builtin_ct_select for
 constant-time selection

---
 clang/docs/LanguageExtensions.rst             |  44 ++
 clang/include/clang/Basic/Builtins.td         |   8 +
 clang/lib/CodeGen/CGBuiltin.cpp               |  13 +
 clang/lib/Sema/SemaChecking.cpp               |  64 ++
 .../test/Sema/builtin-ct-select-edge-cases.c  | 373 ++++++++++
 clang/test/Sema/builtin-ct-select.c           | 683 ++++++++++++++++++
 6 files changed, 1185 insertions(+)
 create mode 100644 clang/test/Sema/builtin-ct-select-edge-cases.c
 create mode 100644 clang/test/Sema/builtin-ct-select.c

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 03cb02deb5e7f..6f5cd5f95cdb0 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -7332,3 +7332,47 @@ Clang fails to reject some code that should be rejected. e.g.,
   // own initializer rather than rejecting the code with an undeclared identifier
   // diagnostic.
   auto x = x;
+
+.. _langext-__builtin_ct_select:
+
+``__builtin_ct_select``
+-----------------------
+
+``__builtin_ct_select`` performs a constant-time conditional selection between
+two values. Unlike the ternary operator ``?:``, this builtin is designed to
+execute in constant time regardless of the condition value, making it suitable
+for cryptographic and security-sensitive code where timing side-channels must
+be avoided.
+
+**Syntax**:
+
+.. code-block:: c++
+
+  __builtin_ct_select(condition, true_value, false_value)
+
+**Examples**:
+
+.. code-block:: c++
+
+  // Select between two integers
+  int result = __builtin_ct_select(secret_bit, value_a, value_b);
+
+  // Select between two pointers
+  int *ptr = __builtin_ct_select(condition, ptr_a, ptr_b);
+
+  // Select between two floating-point values
+  double d = __builtin_ct_select(flag, 1.0, 2.0);
+
+**Description**:
+
+The first argument is an integer condition that is converted to a boolean
+(non-zero is true, zero is false). The second and third arguments must have
+the same scalar or vector type. The builtin returns the second argument if
+the condition is true, otherwise the third argument.
+
+The operation is guaranteed to be lowered to constant-time machine code that
+does not branch on the condition value, preventing timing-based side-channel
+attacks.
+
+Query for this feature with ``__has_builtin(__builtin_ct_select)``.
+
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index 40ec94ab75046..389754a37f7e3 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5810,3 +5810,11 @@ def CountedByRef : Builtin {
   let Attributes = [NoThrow, CustomTypeChecking];
   let Prototype = "int(...)";
 }
+
+// Constant-time select builtin
+def CtSelect : Builtin {
+  let Spellings = ["__builtin_ct_select"];
+  let Attributes = [NoThrow, Const, UnevaluatedArguments,
+                    ConstIgnoringExceptions, CustomTypeChecking];
+  let Prototype = "void(...)";
+}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cac1628e68721..f69390b4ace57 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6668,6 +6668,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
     auto Str = CGM.GetAddrOfConstantCString(Name, "");
     return RValue::get(Str.getPointer());
   }
+  case Builtin::BI__builtin_ct_select: {
+    auto *Cond = EmitScalarExpr(E->getArg(0));
+    auto *A = EmitScalarExpr(E->getArg(1));
+    auto *B = EmitScalarExpr(E->getArg(2));
+
+    if (Cond->getType()->getIntegerBitWidth() != 1)
+      Cond = Builder.CreateICmpNE(
+          Cond, llvm::ConstantInt::get(Cond->getType(), 0), "cond.bool");
+
+    llvm::Function *Fn =
+        CGM.getIntrinsic(llvm::Intrinsic::ct_select, {A->getType()});
+    return RValue::get(Builder.CreateCall(Fn, {Cond, A, B}));
+  }
   }
 
   // If this is an alias for a lib function (e.g. __builtin_sin), emit
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index cc834bbee23c4..e5a15c84de8d3 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3928,6 +3928,70 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
     if (BuiltinCountedByRef(TheCall))
       return ExprError();
     break;
+
+  case Builtin::BI__builtin_ct_select: {
+    if (TheCall->getNumArgs() != 3) {
+      // Simple argument count check without complex diagnostics
+      if (TheCall->getNumArgs() < 3) {
+        return Diag(TheCall->getEndLoc(),
+                    diag::err_typecheck_call_too_few_args_at_least)
+               << 0 << 3 << TheCall->getNumArgs() << 0
+               << TheCall->getCallee()->getSourceRange();
+      } else {
+        return Diag(TheCall->getEndLoc(),
+                    diag::err_typecheck_call_too_many_args)
+               << 0 << 3 << TheCall->getNumArgs() << 0
+               << TheCall->getCallee()->getSourceRange();
+      }
+    }
+    auto *Cond = TheCall->getArg(0);
+    auto *A = TheCall->getArg(1);
+    auto *B = TheCall->getArg(2);
+
+    QualType CondTy = Cond->getType();
+    if (!CondTy->isIntegerType()) {
+      return Diag(Cond->getBeginLoc(), diag::err_typecheck_cond_expect_scalar)
+             << CondTy << Cond->getSourceRange();
+    }
+
+    ExprResult ARes = DefaultFunctionArrayLvalueConversion(A);
+    ExprResult BRes = DefaultFunctionArrayLvalueConversion(B);
+    if (ARes.isInvalid() || BRes.isInvalid())
+      return ExprError();
+
+    A = ARes.get();
+    B = BRes.get();
+    TheCall->setArg(1, A);
+    TheCall->setArg(2, B);
+
+    QualType ATy = A->getType();
+    QualType BTy = B->getType();
+
+    // check for scalar or vector scalar type
+    if ((!ATy->isScalarType() && !ATy->isVectorType()) ||
+        (!BTy->isScalarType() && !BTy->isVectorType())) {
+      return Diag(A->getBeginLoc(),
+                  diag::err_typecheck_cond_incompatible_operands)
+             << ATy << BTy << A->getSourceRange() << B->getSourceRange();
+    }
+
+    // Check if both operands have the same type or can be implicitly converted
+    if (!Context.hasSameType(ATy, BTy)) {
+      // For non-arithmetic types, they must be exactly the same
+      return Diag(A->getBeginLoc(),
+                  diag::err_typecheck_cond_incompatible_operands)
+             << ATy << BTy << A->getSourceRange() << B->getSourceRange();
+    }
+
+    QualType ResultTy = ATy;
+    ExprResult CondRes = PerformContextuallyConvertToBool(Cond);
+    if (CondRes.isInvalid())
+      return ExprError();
+
+    TheCall->setArg(0, CondRes.get());
+    TheCall->setType(ResultTy);
+    return TheCall;
+  }
   }
 
   if (getLangOpts().HLSL && HLSL().CheckBuiltinFunctionCall(BuiltinID, TheCall))
diff --git a/clang/test/Sema/builtin-ct-select-edge-cases.c b/clang/test/Sema/builtin-ct-select-edge-cases.c
new file mode 100644
index 0000000000000..167b19bf20663
--- /dev/null
+++ b/clang/test/Sema/builtin-ct-select-edge-cases.c
@@ -0,0 +1,373 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
+
+// Test with various condition expressions
+int test_conditional_expressions(int x, int y, int a, int b) {
+  // Logical expressions
+  int result1 = __builtin_ct_select(x && y, a, b);
+  int result2 = __builtin_ct_select(x || y, a, b);
+  int result3 = __builtin_ct_select(!x, a, b);
+  
+  // Comparison expressions
+  int result4 = __builtin_ct_select(x == y, a, b);
+  int result5 = __builtin_ct_select(x != y, a, b);
+  int result6 = __builtin_ct_select(x < y, a, b);
+  int result7 = __builtin_ct_select(x > y, a, b);
+  int result8 = __builtin_ct_select(x <= y, a, b);
+  int result9 = __builtin_ct_select(x >= y, a, b);
+  
+  // Bitwise expressions
+  int result10 = __builtin_ct_select(x & y, a, b);
+  int result11 = __builtin_ct_select(x | y, a, b);
+  int result12 = __builtin_ct_select(x ^ y, a, b);
+  int result13 = __builtin_ct_select(~x, a, b);
+  
+  // Arithmetic expressions
+  int result14 = __builtin_ct_select(x + y, a, b);
+  int result15 = __builtin_ct_select(x - y, a, b);
+  int result16 = __builtin_ct_select(x * y, a, b);
+  int result17 = __builtin_ct_select(x / y, a, b);
+  int result18 = __builtin_ct_select(x % y, a, b);
+  
+  return result1 + result2 + result3 + result4 + result5 + result6 + result7 + result8 + result9 + result10 + result11 + result12 + result13 + result14 + result15 + result16 + result17 + result18;
+}
+
+// Test with extreme values
+int test_extreme_values(int cond) {
+  // Maximum and minimum values
+  int max_int = __builtin_ct_select(cond, __INT_MAX__, -__INT_MAX__ - 1);
+  
+  // Very large numbers
+  long long max_ll = __builtin_ct_select(cond, __LONG_LONG_MAX__, -__LONG_LONG_MAX__ - 1);
+  
+  // Floating point extremes
+  float max_float = __builtin_ct_select(cond, __FLT_MAX__, -__FLT_MAX__);
+  double max_double = __builtin_ct_select(cond, __DBL_MAX__, -__DBL_MAX__);
+  
+  return max_int;
+}
+
+// Test with zero and negative zero
+int test_zero_values(int cond) {
+  // Integer zeros
+  int zero_int = __builtin_ct_select(cond, 0, -0);
+  
+  // Floating point zeros
+  float zero_float = __builtin_ct_select(cond, 0.0f, -0.0f);
+  double zero_double = __builtin_ct_select(cond, 0.0, -0.0);
+  
+  return zero_int;
+}
+
+// Test with infinity and NaN
+int test_special_float_values(int cond) {
+  // Infinity
+  float inf_float = __builtin_ct_select(cond, __builtin_inff(), -__builtin_inff());
+  double inf_double = __builtin_ct_select(cond, __builtin_inf(), -__builtin_inf());
+  
+  // NaN
+  float nan_float = __builtin_ct_select(cond, __builtin_nanf(""), __builtin_nanf(""));
+  double nan_double = __builtin_ct_select(cond, __builtin_nan(""), __builtin_nan(""));
+  
+  return 0;
+}
+
+// Test with complex pointer scenarios
+int test_pointer_edge_cases(int cond) {
+  int arr[10];
+  int *ptr1 = arr;
+  int *ptr2 = arr + 5;
+  
+  // Array pointers
+  int *result1 = __builtin_ct_select(cond, ptr1, ptr2);
+  
+  // Pointer arithmetic
+  int *result2 = __builtin_ct_select(cond, arr + 1, arr + 2);
+  
+  // NULL vs non-NULL
+  int *result3 = __builtin_ct_select(cond, ptr1, (int*)0);
+  
+  // Different pointer types (should fail)
+  float *fptr = (float*)0;
+  int *result4 = __builtin_ct_select(cond, ptr1, fptr); // expected-error {{incompatible operand types ('int *' and 'float *')}}
+  
+  return *result1;
+}
+
+// Test with function pointers
+int func1(int x) { return x; }
+int func2(int x) { return x * 2; }
+float func3(float x) { return x; }
+
+int test_function_pointers(int cond, int x) {
+  // Same signature function pointer 
+  int (*fptr)(int) = __builtin_ct_select(cond, &func1, &func2);
+  
+  // Different signature function pointers (should fail)
+  int (*bad_fptr)(int) = __builtin_ct_select(cond, &func1, &func3); // expected-error {{incompatible operand types ('int (*)(int)' and 'float (*)(float)')}}
+  
+  return fptr(x);
+}
+
+// Test with void pointers
+void *test_void_pointers(int cond, void *a, void *b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with const/volatile qualifiers
+int test_qualifiers(int cond) {
+  const int ca = 10;
+  const int cb = 20;
+  volatile int va = 30;
+  volatile int vb = 40;
+  const volatile int cva = 50;
+  const volatile int cvb = 60;
+  
+  // const to const
+  const int result1 = __builtin_ct_select(cond, ca, cb);
+  
+  // volatile to volatile
+  volatile int result2 = __builtin_ct_select(cond, va, vb);
+  
+  // const volatile to const volatile
+  const volatile int result3 = __builtin_ct_select(cond, cva, cvb);
+  
+  return result1 + result2 + result3;
+}
+
+// Test with arrays (should fail as they're not arithmetic or pointer)
+int test_arrays(int cond) {
+  int arr1[5] = {1, 2, 3, 4, 5};
+  int arr2[5] = {6, 7, 8, 9, 10};
+  
+  // This should fail??
+  int *result = __builtin_ct_select(cond, arr1, arr2); 
+  
+  return result[0];
+}
+
+// Test with structures (should fail)
+struct Point {
+  int x, y;
+};
+
+struct Point test_structs(int cond) {
+  struct Point p1 = {1, 2};
+  struct Point p2 = {3, 4};
+  
+  return __builtin_ct_select(cond, p1, p2); // expected-error {{incompatible operand types ('struct Point' and 'struct Point')}}
+}
+
+// Test with unions (should fail)
+union Data {
+  int i;
+  float f;
+};
+
+union Data test_unions(int cond) {
+  union Data d1 = {.i = 10};
+  union Data d2 = {.i = 20};
+  
+  return __builtin_ct_select(cond, d1, d2); // expected-error {{incompatible operand types ('union Data' and 'union Data')}}
+}
+
+// Test with bit fields (should work as they're integers)
+struct BitField {
+  int a : 4;
+  int b : 4;
+};
+
+int test_bit_fields(int cond) {
+  struct BitField bf1 = {1, 2};
+  struct BitField bf2 = {3, 4};
+  
+  // Individual bit fields should work
+  int result1 = __builtin_ct_select(cond, bf1.a, bf2.a);
+  int result2 = __builtin_ct_select(cond, bf1.b, bf2.b);
+  
+  return result1 + result2;
+}
+
+// Test with designated initializers
+int test_designated_init(int cond) {
+  int arr1[3] = {[0] = 1, [1] = 2, [2] = 3};
+  int arr2[3] = {[0] = 4, [1] = 5, [2] = 6};
+  
+  // Access specific elements
+  int result1 = __builtin_ct_select(cond, arr1[0], arr2[0]);
+  int result2 = __builtin_ct_select(cond, arr1[1], arr2[1]);
+  
+  return result1 + result2;
+}
+
+// Test with complex expressions in arguments
+int complex_expr(int x) { return x * x; }
+
+int test_complex_arguments(int cond, int x, int y) {
+  // Function calls as arguments
+  int result1 = __builtin_ct_select(cond, complex_expr(x), complex_expr(y));
+  
+  // Ternary operator as arguments
+  int result2 = __builtin_ct_select(cond, x > 0 ? x : -x, y > 0 ? y : -y);
+  
+  // Compound literals
+  int result3 = __builtin_ct_select(cond, (int){x}, (int){y});
+  
+  return result1 + result2 + result3;
+}
+
+// Test with preprocessor macros
+#define MACRO_A 42
+#define MACRO_B 24
+#define MACRO_COND(x) (x > 0)
+
+int test_macros(int x) {
+  int result1 = __builtin_ct_select(MACRO_COND(x), MACRO_A, MACRO_B);
+  
+  // Nested macros
+  #define NESTED_SELECT(c, a, b) __builtin_ct_select(c, a, b)
+  int result2 = NESTED_SELECT(x, 10, 20);
+  
+  return result1 + result2;
+}
+
+// Test with string literals (should fail)
+const char *test_strings(int cond) {
+  return __builtin_ct_select(cond, "hello", "world"); 
+}
+
+// Test with variable length arrays (VLA)
+int test_vla(int cond, int n) {
+  int vla1[n];
+  int vla2[n];
+  
+  // Individual elements should work
+  vla1[0] = 1;
+  vla2[0] = 2;
+  int result = __builtin_ct_select(cond, vla1[0], vla2[0]); 
+  
+  return result;
+}
+
+// Test with typedef
+typedef int MyInt;
+typedef float MyFloat;
+
+MyInt test_typedef(int cond, MyInt a, MyInt b) {
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with different typedef types (should fail)
+MyInt test_different_typedef(int cond, MyInt a, MyFloat b) {
+  return __builtin_ct_select(cond, a, b); // expected-error {{incompatible operand types ('MyInt' (aka 'int') and 'MyFloat' (aka 'float'))}}
+}
+
+// Test with side effects (should be evaluated)
+int side_effect_counter = 0;
+int side_effect_func(int x) {
+  side_effect_counter++;
+  return x;
+}
+
+int test_side_effects(int cond) {
+  // Both arguments should be evaluated
+  int result = __builtin_ct_select(cond, side_effect_func(10), side_effect_func(20));
+  return result;
+}
+
+// Test with goto labels (context where expressions are used)
+int test_goto_context(int cond, int a, int b) {
+  int result = __builtin_ct_select(cond, a, b);
+  
+  if (result > 0) {
+    goto positive;
+  } else {
+    goto negative;
+  }
+  
+positive:
+  return result;
+  
+negative:
+  return -result;
+}
+
+// Test with switch statements
+int test_switch_context(int cond, int a, int b) {
+  int result = __builtin_ct_select(cond, a, b);
+  
+  switch (result) {
+    case 0:
+      return 0;
+    case 1:
+      return 1;
+    default:
+      return -1;
+  }
+}
+
+// Test with loops
+int test_loop_context(int cond, int a, int b) {
+  int result = __builtin_ct_select(cond, a, b);
+  int sum = 0;
+  
+  for (int i = 0; i < result; i++) {
+    sum += i;
+  }
+  
+  return sum;
+}
+
+// Test with recursive functions
+int factorial(int n) {
+  if (n <= 1) return 1;
+  return n * factorial(n - 1);
+}
+
+int test_recursive(int cond, int n) {
+  int result = __builtin_ct_select(cond, n, n + 1);
+  return factorial(result);
+}
+
+// Test with inline functions
+static inline int inline_func(int x) {
+  return x * 2;
+}
+
+int test_inline(int cond, int a, int b) {
+  return __builtin_ct_select(cond, inline_func(a), inline_func(b));
+}
+
+// Test with static variables
+int test_static_vars(int cond) {
+  static int static_a = 10;
+  static int static_b = 20;
+  
+  return __builtin_ct_select(cond, static_a, static_b);
+}
+
+// Test with extern variables
+extern int extern_a;
+extern int extern_b;
+
+int test_extern_vars(int cond) {
+  return __builtin_ct_select(cond, extern_a, extern_b);
+}
+
+// Test with register variables
+int test_register_vars(int cond) {
+  register int reg_a = 30;
+  register int reg_b = 40;
+  
+  return __builtin_ct_select(cond, reg_a, reg_b);
+}
+
+// Test with thread-local variables (C11)
+#if __STDC_VERSION__ >= 201112L
+_Thread_local int tls_a = 50;
+_Thread_local int tls_b = 60;
+
+int test_tls_vars(int cond) {
+  return __builtin_ct_select(cond, tls_a, tls_b);
+}
+#endif
diff --git a/clang/test/Sema/builtin-ct-select.c b/clang/test/Sema/builtin-ct-select.c
new file mode 100644
index 0000000000000..7f2d9291299d6
--- /dev/null
+++ b/clang/test/Sema/builtin-ct-select.c
@@ -0,0 +1,683 @@
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+
+// Test integer types
+int test_int(int cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_int
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+long long test_long(int cond, long long a, long long b) {
+  // CHECK-LABEL: define {{.*}} @test_long
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+  // CHECK: ret i64 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+short test_short(int cond, short a, short b) {
+  // CHECK-LABEL: define {{.*}} @test_short
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i16 @llvm.ct.select.i16(i1 [[COND]], i16 %{{.*}}, i16 %{{.*}})
+  // CHECK: ret i16 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+unsigned char test_uchar(int cond, unsigned char a, unsigned char b) {
+  // CHECK-LABEL: define {{.*}} @test_uchar
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i8 @llvm.ct.select.i8(i1 [[COND]], i8 %{{.*}}, i8 %{{.*}})
+  // CHECK: ret i8 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+long long test_longlong(int cond, long long a, long long b) {
+  // CHECK-LABEL: define {{.*}} @test_longlong
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+  // CHECK: ret i64 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test floating point types
+float test_float(int cond, float a, float b) {
+  // CHECK-LABEL: define {{.*}} @test_float
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+  // CHECK: ret float [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+double test_double(int cond, double a, double b) {
+  // CHECK-LABEL: define {{.*}} @test_double
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+  // CHECK: ret double [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test pointer types
+int *test_pointer(int cond, int *a, int *b) {
+  // CHECK-LABEL: define {{.*}} @test_pointer
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[COND]], ptr %{{.*}}, ptr %{{.*}})
+  // CHECK: ret ptr [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with different condition types
+int test_char_cond(char cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_char_cond
+  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+int test_long_cond(long long cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_long_cond
+  // CHECK: [[COND:%.*]] = icmp ne i64 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with boolean condition
+int test_bool_cond(_Bool cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_bool_cond
+  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test with constants
+int test_constant_cond(void) {
+  // CHECK-LABEL: define {{.*}} @test_constant_cond
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 true, i32 42, i32 24)
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(1, 42, 24);
+}
+
+int test_zero_cond(void) {
+  // CHECK-LABEL: define {{.*}} @test_zero_cond
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 false, i32 42, i32 24)
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(0, 42, 24);
+}
+
+// Test type promotion
+int test_promotion(int cond, short a, short b) {
+  // CHECK-LABEL: define {{.*}} @test_promotion
+  // CHECK-DAG: [[A_EXT:%.*]] = sext i16 %{{.*}} to i32
+  // CHECK-DAG: [[B_EXT:%.*]] = sext i16 %{{.*}} to i32
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 [[A_EXT]], i32 [[B_EXT]])
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, (int)a, (int)b);
+}
+
+// Test mixed signedness
+unsigned int test_mixed_signedness(int cond, int a, unsigned int b) {
+  // CHECK-LABEL: define {{.*}} @test_mixed_signedness
+  // CHECK-DAG: [[A_EXT:%.*]] = sext i32 %{{.*}} to i64
+  // CHECK-DAG: [[B_EXT:%.*]] = zext i32 %{{.*}} to i64
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 [[A_EXT]], i64 [[B_EXT]])
+  // CHECK: [[RESULT_TRUNC:%.*]] = trunc i64 [[RESULT]] to i32
+  // CHECK: ret i32 [[RESULT_TRUNC]]
+  return __builtin_ct_select(cond, (long long)a, (long long)b);
+}
+
+// Test complex expression
+int test_complex_expr_alt(int x, int y) {
+  // CHECK-LABEL: define {{.*}} @test_complex_expr_alt
+  // CHECK-DAG: [[CMP:%.*]] = icmp sgt i32 %{{.*}}, 0
+  // CHECK-DAG: [[ADD:%.*]] = add nsw i32 %{{.*}}, %{{.*}}
+  // CHECK-DAG: [[SUB:%.*]] = sub nsw i32 %{{.*}}, %{{.*}}
+  // Separate the final sequence to ensure proper ordering
+  // CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 [[ADD]], i32 [[SUB]])
+  // CHECK-NEXT: ret i32 [[RESULT]]
+  return __builtin_ct_select(x > 0, x + y, x - y);
+}
+
+// Test nested calls
+int test_nested_structured(int cond1, int cond2, int a, int b, int c) {
+  // CHECK-LABEL: define {{.*}} @test_nested_structured
+  // Phase 1: Conditions (order doesn't matter)
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  
+  // Phase 2: Inner select (must happen before outer)
+  // CHECK: [[INNER:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+  
+  // Phase 3: Outer select (must use inner result)
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 [[INNER]], i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
+}
+
+// Test with function calls
+int helper(int x) { return x * 2; }
+int test_function_calls(int cond, int x, int y) {
+  // CHECK-LABEL: define {{.*}} @test_function_calls
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[CALL1:%.*]] = call i32 @helper(i32 noundef %{{.*}})
+  // CHECK-DAG: [[CALL2:%.*]] = call i32 @helper(i32 noundef %{{.*}})
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 [[CALL1]], i32 [[CALL2]])
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(cond, helper(x), helper(y));
+}
+
+// Test using ct_select as condition for another ct_select
+int test_intrinsic_condition(int cond1, int cond2, int a, int b, int c, int d) {
+  // CHECK-LABEL: define {{.*}} @test_intrinsic_condition
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[INNER_COND:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 [[INNER_COND]], 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond1, cond2, a), b, c);
+}
+
+// Test using comparison result of ct_select as condition
+int test_comparison_condition(int cond, int a, int b, int c, int d) {
+  // CHECK-LABEL: define {{.*}} @test_comparison_condition
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: [[CMP:%.*]] = icmp sgt i32 [[FIRST_SELECT]], %{{.*}}
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond, a, b) > c, d, a);
+}
+
+// Test using ct_select result in arithmetic as condition
+int test_arithmetic_condition(int cond, int a, int b, int c, int d) {
+  // CHECK-LABEL: define {{.*}} @test_arithmetic_condition
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: [[ADD:%.*]] = add nsw i32 [[FIRST_SELECT]], %{{.*}}
+  // CHECK: [[FINAL_COND:%.*]] = icmp ne i32 [[ADD]], 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond, a, b) + c, d, a);
+}
+
+// Test chained ct_select as conditions
+int test_chained_conditions(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e) {
+  // CHECK-LABEL: define {{.*}} @test_chained_conditions
+  // CHECK: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[FIRST:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[SECOND:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  int first_select = __builtin_ct_select(cond1, a, b);
+  int second_select = __builtin_ct_select(cond2, first_select, c);
+  return __builtin_ct_select(second_select, d, e);
+}
+
+// Test using ct_select with pointer condition
+//int test_pointer_condition(int *ptr1, int *ptr2, int a, int b, int c) {
+  // NO-CHECK-LABEL: define {{.*}} @test_pointer_condition
+  // NO-CHECK: [[PTR_COND:%.*]] = icmp ne ptr %{{.*}}, null
+  // NO-CHECK: [[PTR_SELECT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[PTR_COND]], ptr %{{.*}}, ptr %{{.*}})
+  // NO-CHECK: [[FINAL_COND:%.*]] = icmp ne ptr [[PTR_SELECT]], null
+  // NO-CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // NO-CHECK: ret i32 [[RESULT]]
+//  return __builtin_ct_select(__builtin_ct_select(ptr1, ptr1, ptr2), a, b);
+//}
+
+
+// Test using ct_select result in logical operations as condition
+int test_logical_condition(int cond1, int cond2, int a, int b, int c, int d) {
+  // CHECK-LABEL: define {{.*}} @test_logical_condition
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[SELECT_BOOL:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond1, a, b) && cond2, c, d);
+}
+
+// Test multiple levels of ct_select as conditions
+int test_deep_condition_nesting(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e, int f) {
+  // CHECK-LABEL: define {{.*}} @test_deep_condition_nesting
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[INNER1:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[INNER1_COND:%.*]] = icmp ne i32 [[INNER1]], 0
+  // CHECK-DAG: [[INNER2:%.*]] = call i32 @llvm.ct.select.i32(i1 [[INNER1_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK-DAG: [[OUTER:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 [[INNER2]], i32 %{{.*}})
+  // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 [[OUTER]], 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(cond1, __builtin_ct_select(__builtin_ct_select(cond2, a, b), c, d), e), f, a);
+}
+
+// Test ct_select with complex condition expressions
+int test_complex_condition_expr(int x, int y, int z, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_complex_condition_expr
+  // CHECK: [[CMP1:%.*]] = icmp sgt i32 %{{.*}}, %{{.*}}
+  // CHECK: [[SELECT1:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP1]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: [[CMP2:%.*]] = icmp slt i32 [[SELECT1]], %{{.*}}
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP2]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  return __builtin_ct_select(__builtin_ct_select(x > y, x, y) < z, a, b);
+}
+
+// Test vector types - 128-bit vectors
+typedef int __attribute__((vector_size(16))) int4;
+typedef float __attribute__((vector_size(16))) float4;
+typedef short __attribute__((vector_size(16))) short8;
+typedef char __attribute__((vector_size(16))) char16;
+
+int4 test_vector_int4(int cond, int4 a, int4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_int4
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+float4 test_vector_float4(int cond, float4 a, float4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_float4
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+short8 test_vector_short8(int cond, short8 a, short8 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_short8
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <8 x i16> @llvm.ct.select.v8i16(i1 [[COND]], <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+  // CHECK: ret <8 x i16> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+char16 test_vector_char16(int cond, char16 a, char16 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_char16
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <16 x i8> @llvm.ct.select.v16i8(i1 [[COND]], <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+  // CHECK: ret <16 x i8> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test 256-bit vectors
+typedef int __attribute__((vector_size(32))) int8;
+typedef float __attribute__((vector_size(32))) float8;
+typedef double __attribute__((vector_size(32))) double4;
+
+int8 test_vector_int8(int cond, int8 a, int8 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_int8
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call <8 x i32> @llvm.ct.select.v8i32(i1 [[COND]], <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+float8 test_vector_float8(int cond, float8 a, float8 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_float8
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call <8 x float> @llvm.ct.select.v8f32(i1 [[COND]], <8 x float> %{{.*}}, <8 x float> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+double4 test_vector_double4(int cond, double4 a, double4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_double4
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call <4 x double> @llvm.ct.select.v4f64(i1 [[COND]], <4 x double> %{{.*}}, <4 x double> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test 512-bit vectors
+typedef int __attribute__((vector_size(64))) int16;
+typedef float __attribute__((vector_size(64))) float16;
+
+int16 test_vector_int16(int cond, int16 a, int16 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_int16
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <16 x i32> @llvm.ct.select.v16i32(i1 [[COND]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+float16 test_vector_float16(int cond, float16 a, float16 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_float16
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <16 x float> @llvm.ct.select.v16f32(i1 [[COND]], <16 x float> %{{.*}}, <16 x float> %{{.*}})
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test vector operations with different condition types
+int4 test_vector_char_cond(char cond, int4 a, int4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_char_cond
+  // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+float4 test_vector_long_cond(long long cond, float4 a, float4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_long_cond
+  // CHECK: [[COND:%.*]] = icmp ne i64 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test vector constants
+int4 test_vector_constant_cond(void) {
+  // CHECK-LABEL: define {{.*}} @test_vector_constant_cond
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 true, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  int4 a = {1, 2, 3, 4};
+  int4 b = {5, 6, 7, 8};
+  return __builtin_ct_select(1, a, b);
+}
+
+float4 test_vector_zero_cond(void) {
+  // CHECK-LABEL: define {{.*}} @test_vector_zero_cond
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 false, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  float4 a = {1.0f, 2.0f, 3.0f, 4.0f};
+  float4 b = {5.0f, 6.0f, 7.0f, 8.0f};
+  return __builtin_ct_select(0, a, b);
+}
+
+// Test nested vector selections
+int4 test_vector_nested(int cond1, int cond2, int4 a, int4 b, int4 c) {
+  // CHECK-LABEL: define {{.*}} @test_vector_nested
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[INNER:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND2]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND1]], <4 x i32> [[INNER]], <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
+}
+
+// Test vector selection with complex expressions
+float4 test_vector_complex_expr(int x, int y, float4 a, float4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_complex_expr
+  // CHECK: [[CMP:%.*]] = icmp sgt i32 %{{.*}}, %{{.*}}
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[CMP]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  return __builtin_ct_select(x > y, a, b);
+}
+
+// Test vector with different element sizes
+typedef long long __attribute__((vector_size(16))) long2;
+typedef double __attribute__((vector_size(16))) double2;
+
+long2 test_vector_long2(int cond, long2 a, long2 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_long2
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <2 x i64> @llvm.ct.select.v2i64(i1 [[COND]], <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+  // CHECK: ret <2 x i64> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+double2 test_vector_double2(int cond, double2 a, double2 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_double2
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <2 x double> @llvm.ct.select.v2f64(i1 [[COND]], <2 x double> %{{.*}}, <2 x double> %{{.*}})
+  // CHECK: ret <2 x double> [[RESULT]]
+  return __builtin_ct_select(cond, a, b);
+}
+
+// Test mixed vector operations
+int4 test_vector_from_scalar_condition(int4 vec_cond, int4 a, int4 b) {
+  // CHECK-LABEL: define {{.*}} @test_vector_from_scalar_condition
+  // Extract first element and use as condition
+  int scalar_cond = vec_cond[0];
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+  // CHECK: ret <4 x i32> [[RESULT]]
+  return __builtin_ct_select(scalar_cond, a, b);
+}
+
+// Test vector chaining
+float4 test_vector_chaining(int cond1, int cond2, int cond3, float4 a, float4 b, float4 c, float4 d) {
+  // CHECK-LABEL: define {{.*}} @test_vector_chaining
+  // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[COND3:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[FIRST:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND1]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK-DAG: [[SECOND:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND2]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK-DAG: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND3]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  float4 first = __builtin_ct_select(cond1, a, b);
+  float4 second = __builtin_ct_select(cond2, first, c);
+  return __builtin_ct_select(cond3, second, d);
+}
+
+// Test special floating point values - NaN
+float test_nan_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_nan_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float  %{{.*}}, float 1.000000e+00)
+  // CHECK: ret float [[RESULT]]
+  float nan_val = __builtin_nanf("");
+  return __builtin_ct_select(cond, nan_val, 1.0f);
+}
+
+double test_nan_double_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_nan_double_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double 2.000000e+00)
+  // CHECK: ret double [[RESULT]]
+  double nan_val = __builtin_nan("");
+  return __builtin_ct_select(cond, nan_val, 2.0);
+}
+
+// Test infinity values
+float test_infinity_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_infinity_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+  // CHECK: ret float [[RESULT]]
+  float pos_inf = __builtin_inff();
+  float neg_inf = -__builtin_inff();
+  return __builtin_ct_select(cond, pos_inf, neg_inf);
+}
+
+double test_infinity_double_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_infinity_double_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+  // CHECK: ret double [[RESULT]]
+  double pos_inf = __builtin_inf();
+  double neg_inf = -__builtin_inf();
+  return __builtin_ct_select(cond, pos_inf, neg_inf);
+}
+
+// Test subnormal/denormal values
+float test_subnormal_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_subnormal_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+  // CHECK: ret float [[RESULT]]
+  // Very small subnormal values
+  float subnormal1 = 1e-40f;
+  float subnormal2 = 1e-45f;
+  return __builtin_ct_select(cond, subnormal1, subnormal2);
+}
+
+// Test integer overflow boundaries
+int test_integer_overflow_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_integer_overflow_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  int max_int = __INT_MAX__;
+  int min_int = (-__INT_MAX__ - 1);
+  return __builtin_ct_select(cond, max_int, min_int);
+}
+
+long long test_longlong_overflow_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_longlong_overflow_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+  // CHECK: ret i64 [[RESULT]]
+  long long max_ll = __LONG_LONG_MAX__;
+  long long min_ll = (-__LONG_LONG_MAX__ - 1);
+  return __builtin_ct_select(cond, max_ll, min_ll);
+}
+
+// Test unsigned overflow boundaries
+unsigned int test_unsigned_overflow_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_unsigned_overflow_operands
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  unsigned int max_uint = 4294967295;
+  unsigned int min_uint = 0;
+  return __builtin_ct_select(cond, max_uint, min_uint);
+}
+
+// Test null pointer dereference avoidance
+int* test_null_pointer_operands(int cond, int* valid_ptr) {
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[COND]], ptr %{{.*}}, ptr %{{.*}})
+  // CHECK: ret ptr [[RESULT]]
+  int* null_ptr = (int*)0;
+  return __builtin_ct_select(cond, null_ptr, valid_ptr);
+}
+
+// Test volatile operations
+volatile int global_volatile = 42;
+int test_volatile_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_volatile_operands
+  // CHECK-DAG: [[VOLATILE_LOAD:%.*]] = load volatile i32, ptr {{.*}}
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 100)
+  // CHECK: ret i32 [[RESULT]]
+  volatile int vol_val = global_volatile;
+  return __builtin_ct_select(cond, vol_val, 100);
+}
+
+// Test uninitialized variable behavior (should still work with ct_select)
+int test_uninitialized_operands(int cond, int initialized) {
+  // CHECK-LABEL: define {{.*}} @test_uninitialized_operands
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  int uninitialized; // Intentionally uninitialized
+  return __builtin_ct_select(cond, uninitialized, initialized);
+}
+
+// Test zero division avoidance patterns
+int test_division_by_zero_avoidance(int cond, int dividend, int divisor) {
+  // CHECK-LABEL: define {{.*}} @test_division_by_zero_avoidance
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[DIV_RESULT:%.*]] = sdiv i32 %{{.*}}, %{{.*}}
+  // CHECK-DAG: [[SAFE_DIVISOR:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 1)
+  // First get a safe divisor (never zero)
+  int safe_divisor = __builtin_ct_select(divisor != 0, divisor, 1);
+  // Then perform division with guaranteed non-zero divisor
+  return dividend / safe_divisor;
+}
+
+// Test array bounds checking patterns
+int test_array_bounds_protection(int cond, int index, int* array) {
+  // CHECK-LABEL: define {{.*}} @test_array_bounds_protection
+  // CHECK-DAG: [[SAFE_INDEX:%.*]] = call i32 @llvm.ct.select.i32(i1 {{.*}}, i32 %{{.*}}, i32 0)
+  // Use ct_select to ensure safe array indexing
+  int safe_index = __builtin_ct_select(index >= 0 && index < 10, index, 0);
+  return array[safe_index];
+}
+
+// Test bit manipulation edge cases
+unsigned int test_bit_manipulation_edge_cases(int cond, unsigned int value) {
+  // CHECK-LABEL: define {{.*}} @test_bit_manipulation_edge_cases
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[SHIFT_LEFT:%.*]] = shl i32 %{{.*}}, 31
+  // CHECK-DAG: [[SHIFT_RIGHT:%.*]] = lshr i32 %{{.*}}, 31
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  // Test extreme bit shifts that could cause undefined behavior
+  unsigned int left_shift = value << 31;   // Could overflow
+  unsigned int right_shift = value >> 31;  // Extract sign bit
+  return __builtin_ct_select(cond, left_shift, right_shift);
+}
+
+// Test signed integer wraparound
+int test_signed_wraparound(int cond, int a, int b) {
+  // CHECK-LABEL: define {{.*}} @test_signed_wraparound
+  // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK-DAG: [[ADD:%.*]] = add nsw i32 %{{.*}}, %{{.*}}
+  // CHECK-DAG: [[SUB:%.*]] = sub nsw i32 %{{.*}}, %{{.*}}
+  // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+  // CHECK: ret i32 [[RESULT]]
+  int sum = a + b;      // Could overflow
+  int diff = a - b;     // Could underflow
+  return __builtin_ct_select(cond, sum, diff);
+}
+
+// Test vector NaN handling
+float4 test_vector_nan_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_vector_nan_operands
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  float nan_val = __builtin_nanf("");
+  float4 nan_vec = {nan_val, nan_val, nan_val, nan_val};
+  float4 normal_vec = {1.0f, 2.0f, 3.0f, 4.0f};
+  return __builtin_ct_select(cond, nan_vec, normal_vec);
+}
+
+// Test vector infinity handling
+float4 test_vector_infinity_operands(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_vector_infinity_operands
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+  // CHECK: ret <4 x float> [[RESULT]]
+  float pos_inf = __builtin_inff();
+  float neg_inf = -__builtin_inff();
+  float4 inf_vec = {pos_inf, neg_inf, pos_inf, neg_inf};
+  float4 zero_vec = {0.0f, 0.0f, 0.0f, 0.0f};
+  return __builtin_ct_select(cond, inf_vec, zero_vec);
+}
+
+// Test mixed special values
+double test_mixed_special_values(int cond) {
+  // CHECK-LABEL: define {{.*}} @test_mixed_special_values
+  // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+  // CHECK: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+  // CHECK: ret double [[RESULT]]
+  double nan_val = __builtin_nan("");
+  double inf_val = __builtin_inf();
+  return __builtin_ct_select(cond, nan_val, inf_val);
+}
+
+// Test constant-time memory access pattern
+int test_constant_time_memory_access(int secret_index, int* data_array) {
+  // CHECK-LABEL: define {{.*}} @test_constant_time_memory_access
+  // This pattern ensures constant-time memory access regardless of secret_index value
+  int result = 0;
+  // Use ct_select to accumulate values without revealing the secret index
+  for (int i = 0; i < 8; i++) {
+    int is_target = (i == secret_index);
+    int current_value = data_array[i];
+    int selected_value = __builtin_ct_select(is_target, current_value, 0);
+    result += selected_value;
+  }
+  return result;
+}
+
+// Test timing-attack resistant comparison
+int test_timing_resistant_comparison(const char* secret, const char* guess) {
+  // CHECK-LABEL: define {{.*}} @test_timing_resistant_comparison
+  // Constant-time string comparison using ct_select
+  int match = 1;
+  for (int i = 0; i < 32; i++) {
+    int chars_equal = (secret[i] == guess[i]);
+    int both_null = (secret[i] == 0) && (guess[i] == 0);
+    int still_matching = __builtin_ct_select(chars_equal || both_null, match, 0);
+    match = __builtin_ct_select(both_null, match, still_matching);
+  }
+  return match;
+}

>From 5302ac244124c5a25c2a20c8bf8441fdfb2aeeda Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 11:01:26 -0500
Subject: [PATCH 2/4] [LLVM][MIPS] Add comprehensive tests for ct.select

---
 .../Mips/ctselect-fallback-edge-cases.ll      | 244 +++++
 .../Mips/ctselect-fallback-patterns.ll        | 426 +++++++++
 .../CodeGen/Mips/ctselect-fallback-vector.ll  | 830 ++++++++++++++++++
 llvm/test/CodeGen/Mips/ctselect-fallback.ll   | 371 ++++++++
 .../CodeGen/Mips/ctselect-side-effects.ll     | 183 ++++
 5 files changed, 2054 insertions(+)
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback.ll
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-side-effects.ll

diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
new file mode 100644
index 0000000000000..f1831a625d4a4
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64
+
+; Portable edge case tests
+
+; Test with small integer types
+define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
+; M32-LABEL: test_ctselect_i1:
+; M32:       # %bb.0:
+; M32-NEXT:    xori $2, $4, 1
+; M32-NEXT:    and $1, $4, $5
+; M32-NEXT:    and $2, $2, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $2
+;
+; M64-LABEL: test_ctselect_i1:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $1, $6, 0
+; M64-NEXT:    xori $2, $2, 1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    and $2, $4, $5
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
+  ret i1 %result
+}
+
+; Test with extremal values
+define i32 @test_ctselect_extremal_values(i1 %cond) {
+; M32-LABEL: test_ctselect_extremal_values:
+; M32:       # %bb.0:
+; M32-NEXT:    lui $3, 32767
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    ori $3, $3, 65535
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    lui $3, 32768
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_extremal_values:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    lui $3, 32767
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    ori $3, $3, 65535
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    lui $3, 32768
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
+  ret i32 %result
+}
+
+; Test with null pointers
+define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
+; M32-LABEL: test_ctselect_null_ptr:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    and $2, $1, $5
+;
+; M64-LABEL: test_ctselect_null_ptr:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $1, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    and $2, $1, $5
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
+  ret ptr %result
+}
+
+; Test with function pointers
+define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
+; M32-LABEL: test_ctselect_function_ptr:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_function_ptr:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $2, $1
+; M64-NEXT:    daddiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $5
+; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
+  ret ptr %result
+}
+
+; Test with condition from icmp on pointers
+define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
+; M32-LABEL: test_ctselect_ptr_cmp:
+; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    sltu $1, $zero, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_ptr_cmp:
+; M64:       # %bb.0:
+; M64-NEXT:    xor $1, $4, $5
+; M64-NEXT:    daddiu $3, $zero, -1
+; M64-NEXT:    daddiu $2, $zero, -1
+; M64-NEXT:    movn $3, $zero, $1
+; M64-NEXT:    xor $2, $3, $2
+; M64-NEXT:    and $1, $3, $6
+; M64-NEXT:    and $2, $2, $7
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %cmp = icmp eq ptr %p1, %p2
+  %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with struct pointer types
+%struct.pair = type { i32, i32 }
+
+define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
+; M32-LABEL: test_ctselect_struct_ptr:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_struct_ptr:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $2, $1
+; M64-NEXT:    daddiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $5
+; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with deeply nested conditions
+define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; M32-LABEL: test_ctselect_deeply_nested:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    lw $3, 16($sp)
+; M32-NEXT:    lw $9, 32($sp)
+; M32-NEXT:    lw $8, 28($sp)
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    lw $3, 20($sp)
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    andi $3, $5, 1
+; M32-NEXT:    or $1, $2, $1
+; M32-NEXT:    andi $2, $6, 1
+; M32-NEXT:    andi $6, $7, 1
+; M32-NEXT:    negu $4, $3
+; M32-NEXT:    addiu $3, $3, -1
+; M32-NEXT:    addiu $7, $6, -1
+; M32-NEXT:    and $1, $4, $1
+; M32-NEXT:    addiu $5, $2, -1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    negu $6, $6
+; M32-NEXT:    and $4, $7, $9
+; M32-NEXT:    lw $7, 24($sp)
+; M32-NEXT:    and $5, $5, $8
+; M32-NEXT:    and $3, $3, $7
+; M32-NEXT:    or $1, $1, $3
+; M32-NEXT:    and $1, $2, $1
+; M32-NEXT:    or $1, $1, $5
+; M32-NEXT:    and $1, $6, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $4
+;
+; M64-LABEL: test_ctselect_deeply_nested:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $8, 0
+; M64-NEXT:    sll $4, $5, 0
+; M64-NEXT:    lw $8, 0($sp)
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    andi $4, $4, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    negu $5, $4
+; M64-NEXT:    addiu $4, $4, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $9, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $3, $11, 0
+; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sll $6, $7, 0
+; M64-NEXT:    andi $2, $2, 1
+; M64-NEXT:    and $1, $5, $1
+; M64-NEXT:    andi $6, $6, 1
+; M64-NEXT:    addiu $5, $2, -1
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    addiu $7, $6, -1
+; M64-NEXT:    negu $6, $6
+; M64-NEXT:    and $3, $5, $3
+; M64-NEXT:    sll $5, $10, 0
+; M64-NEXT:    and $7, $7, $8
+; M64-NEXT:    and $4, $4, $5
+; M64-NEXT:    or $1, $1, $4
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    or $1, $1, $3
+; M64-NEXT:    and $1, $6, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $7
+  %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+  %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+  %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+  %sel4 = call i32 @llvm.ct.select.i32(i1 %c4, i32 %sel3, i32 %e)
+  ret i32 %sel4
+}
+
+; Declare the intrinsics
+declare i1 @llvm.ct.select.i1(i1, i1, i1)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
new file mode 100644
index 0000000000000..2e65e586ce5fa
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
@@ -0,0 +1,426 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64
+
+; Test smin(x, 0) pattern
+define i32 @test_ctselect_smin_zero(i32 %x) {
+; M32-LABEL: test_ctselect_smin_zero:
+; M32:       # %bb.0:
+; M32-NEXT:    sra $1, $4, 31
+; M32-NEXT:    jr $ra
+; M32-NEXT:    and $2, $1, $4
+;
+; M64-LABEL: test_ctselect_smin_zero:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sra $2, $1, 31
+; M64-NEXT:    jr $ra
+; M64-NEXT:    and $2, $2, $1
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+  ret i32 %result
+}
+
+; Test smax(x, 0) pattern
+define i32 @test_ctselect_smax_zero(i32 %x) {
+; M32-LABEL: test_ctselect_smax_zero:
+; M32:       # %bb.0:
+; M32-NEXT:    slti $1, $4, 1
+; M32-NEXT:    movn $4, $zero, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_ctselect_smax_zero:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    slti $1, $2, 1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    movn $2, $zero, $1
+  %cmp = icmp sgt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+  ret i32 %result
+}
+
+; Test generic smin pattern
+define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
+; M32-LABEL: test_ctselect_smin_generic:
+; M32:       # %bb.0:
+; M32-NEXT:    slt $1, $4, $5
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_smin_generic:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    slt $3, $2, $1
+; M64-NEXT:    xori $3, $3, 1
+; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cmp = icmp slt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test generic smax pattern
+define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
+; M32-LABEL: test_ctselect_smax_generic:
+; M32:       # %bb.0:
+; M32-NEXT:    slt $1, $5, $4
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_smax_generic:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    slt $3, $2, $1
+; M64-NEXT:    xori $3, $3, 1
+; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %cmp = icmp sgt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test umin pattern
+define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
+; M32-LABEL: test_ctselect_umin_generic:
+; M32:       # %bb.0:
+; M32-NEXT:    sltu $1, $4, $5
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_umin_generic:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sltu $3, $2, $1
+; M64-NEXT:    xori $3, $3, 1
+; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cmp = icmp ult i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test umax pattern
+define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
+; M32-LABEL: test_ctselect_umax_generic:
+; M32:       # %bb.0:
+; M32-NEXT:    sltu $1, $5, $4
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_umax_generic:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    sltu $3, $2, $1
+; M64-NEXT:    xori $3, $3, 1
+; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %cmp = icmp ugt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test abs pattern
+define i32 @test_ctselect_abs(i32 %x) {
+; M32-LABEL: test_ctselect_abs:
+; M32:       # %bb.0:
+; M32-NEXT:    negu $1, $4
+; M32-NEXT:    sra $2, $4, 31
+; M32-NEXT:    and $1, $2, $1
+; M32-NEXT:    not $2, $2
+; M32-NEXT:    and $2, $2, $4
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $2
+;
+; M64-LABEL: test_ctselect_abs:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    sra $3, $1, 31
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %neg = sub i32 0, %x
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x)
+  ret i32 %result
+}
+
+; Test nabs pattern (negative abs)
+define i32 @test_ctselect_nabs(i32 %x) {
+; M32-LABEL: test_ctselect_nabs:
+; M32:       # %bb.0:
+; M32-NEXT:    sra $1, $4, 31
+; M32-NEXT:    negu $3, $4
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_nabs:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sra $2, $1, 31
+; M64-NEXT:    and $3, $2, $1
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    not $2, $2
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $3, $1
+  %neg = sub i32 0, %x
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg)
+  ret i32 %result
+}
+
+; Test sign extension pattern
+define i32 @test_ctselect_sign_extend(i32 %x) {
+; M32-LABEL: test_ctselect_sign_extend:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    sra $2, $4, 31
+;
+; M64-LABEL: test_ctselect_sign_extend:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sra $2, $1, 31
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0)
+  ret i32 %result
+}
+
+; Test zero extension pattern
+define i32 @test_ctselect_zero_extend(i32 %x) {
+; M32-LABEL: test_ctselect_zero_extend:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    sltu $2, $zero, $4
+;
+; M64-LABEL: test_ctselect_zero_extend:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sltu $2, $zero, $1
+  %cmp = icmp ne i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 1, i32 0)
+  ret i32 %result
+}
+
+; Test constant folding with known condition
+define i32 @test_ctselect_constant_folding_true(i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_constant_folding_true:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_ctselect_constant_folding_true:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $4, 0
+  %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_constant_folding_false:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $zero, $5
+;
+; M64-LABEL: test_ctselect_constant_folding_false:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $zero, $1
+  %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with identical operands
+define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
+; M32-LABEL: test_ctselect_identical_operands:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_identical_operands:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
+  ret i32 %result
+}
+
+; Test with inverted condition
+define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_inverted_condition:
+; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    sltiu $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_inverted_condition:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sltiu $1, $1, 1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cmp = icmp eq i32 %x, %y
+  %not_cmp = xor i1 %cmp, true
+  %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test chain of ct.select operations
+define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) {
+; M32-LABEL: test_ctselect_chain:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    andi $3, $5, 1
+; M32-NEXT:    lw $5, 16($sp)
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    negu $4, $3
+; M32-NEXT:    addiu $3, $3, -1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    and $2, $2, $7
+; M32-NEXT:    lw $5, 24($sp)
+; M32-NEXT:    or $1, $2, $1
+; M32-NEXT:    andi $2, $6, 1
+; M32-NEXT:    and $1, $4, $1
+; M32-NEXT:    addiu $4, $2, -1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $4, $4, $5
+; M32-NEXT:    lw $5, 20($sp)
+; M32-NEXT:    and $3, $3, $5
+; M32-NEXT:    or $1, $1, $3
+; M32-NEXT:    and $1, $2, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $4
+;
+; M64-LABEL: test_ctselect_chain:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    sll $4, $5, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    andi $4, $4, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    negu $5, $4
+; M64-NEXT:    addiu $4, $4, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $8, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    sll $6, $10, 0
+; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    and $1, $5, $1
+; M64-NEXT:    sll $5, $9, 0
+; M64-NEXT:    addiu $2, $3, -1
+; M64-NEXT:    negu $3, $3
+; M64-NEXT:    and $4, $4, $5
+; M64-NEXT:    and $2, $2, $6
+; M64-NEXT:    or $1, $1, $4
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+  %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+  %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+  ret i32 %sel3
+}
+
+; Test for 64-bit operations (supported on all 64-bit architectures)
+define i64 @test_ctselect_i64_smin_zero(i64 %x) {
+; M32-LABEL: test_ctselect_i64_smin_zero:
+; M32:       # %bb.0:
+; M32-NEXT:    sra $1, $5, 31
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    jr $ra
+; M32-NEXT:    and $3, $1, $5
+;
+; M64-LABEL: test_ctselect_i64_smin_zero:
+; M64:       # %bb.0:
+; M64-NEXT:    dsra $1, $4, 63
+; M64-NEXT:    jr $ra
+; M64-NEXT:    and $2, $1, $4
+  %cmp = icmp slt i64 %x, 0
+  %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
+  ret i64 %result
+}
+
+; Declare the intrinsics
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
new file mode 100644
index 0000000000000..6222f6052e12f
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
@@ -0,0 +1,830 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r6 -mattr=+msa -O3 | FileCheck %s --check-prefix=MIPS64-MSA
+; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r6 -mattr=+msa -O3 | FileCheck %s --check-prefix=MIPS32-MSA
+
+; Test 32-bit integer vector (128 bits)
+define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test 16-bit integer vector (8 x i16 = 128-bit)
+define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v8i16:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.h $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.h $w1, $w1, 15
+; MIPS64-MSA-NEXT:    srai.h $w1, $w1, 15
+; MIPS64-MSA-NEXT:    shf.h $w2, $w2, 27
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.h $w2, $w2, 27
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.h $w0, $w0, 27
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v8i16:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.h $w1, $4
+; MIPS32-MSA-NEXT:    ldi.b $w0, -1
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
+; MIPS32-MSA-NEXT:    slli.h $w1, $w1, 15
+; MIPS32-MSA-NEXT:    srai.h $w1, $w1, 15
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    shf.h $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    shf.h $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.h $w0, $w0, 177
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %result = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %result
+}
+
+; Test byte vector (16 x i8 = 128-bit)
+define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v16i8:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    fill.b $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $6
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $8
+; MIPS64-MSA-NEXT:    slli.b $w2, $w2, 7
+; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS64-MSA-NEXT:    shf.b $w1, $w1, 27
+; MIPS64-MSA-NEXT:    srai.b $w2, $w2, 7
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    and.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    xori.b $w2, $w2, 255
+; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v16i8:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $6
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    fill.b $w2, $4
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $2
+; MIPS32-MSA-NEXT:    slli.b $w2, $w2, 7
+; MIPS32-MSA-NEXT:    srai.b $w2, $w2, 7
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    xori.b $w2, $w2, 255
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    shf.b $w1, $w1, 27
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %result
+}
+
+; Test 64-bit integer vector (2 x i64 = 128-bit)
+define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v2i64:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    fill.d $w2, $4
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    ldi.b $w1, -1
+; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v2i64:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    addiu $sp, $sp, -32
+; MIPS32-MSA-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32-MSA-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32-MSA-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
+; MIPS32-MSA-NEXT:    .cfi_offset 31, -4
+; MIPS32-MSA-NEXT:    .cfi_offset 30, -8
+; MIPS32-MSA-NEXT:    move $fp, $sp
+; MIPS32-MSA-NEXT:    .cfi_def_cfa_register 30
+; MIPS32-MSA-NEXT:    addiu $1, $zero, -16
+; MIPS32-MSA-NEXT:    and $sp, $sp, $1
+; MIPS32-MSA-NEXT:    lw $2, 56($fp)
+; MIPS32-MSA-NEXT:    lw $1, 60($fp)
+; MIPS32-MSA-NEXT:    sw $4, 12($sp)
+; MIPS32-MSA-NEXT:    sw $4, 4($sp)
+; MIPS32-MSA-NEXT:    ldi.b $w0, -1
+; MIPS32-MSA-NEXT:    ld.d $w1, 0($sp)
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
+; MIPS32-MSA-NEXT:    slli.d $w1, $w1, 63
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 64($fp)
+; MIPS32-MSA-NEXT:    srai.d $w1, $w1, 63
+; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 68($fp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 48($fp)
+; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 52($fp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32-MSA-NEXT:    move $sp, $fp
+; MIPS32-MSA-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
+; MIPS32-MSA-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    addiu $sp, $sp, 32
+  %result = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %result
+}
+
+; Test single-precision float vector (4 x float = 128-bit)
+define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v4f32:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4f32:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    st.w $w0, 0($4)
+  %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b)
+  ret <4 x float> %result
+}
+
+; Test double-precision float vector (2 x double = 128-bit)
+define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v2f64:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    fill.d $w2, $4
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    ldi.b $w1, -1
+; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v2f64:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    addiu $sp, $sp, -32
+; MIPS32-MSA-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32-MSA-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32-MSA-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
+; MIPS32-MSA-NEXT:    .cfi_offset 31, -4
+; MIPS32-MSA-NEXT:    .cfi_offset 30, -8
+; MIPS32-MSA-NEXT:    move $fp, $sp
+; MIPS32-MSA-NEXT:    .cfi_def_cfa_register 30
+; MIPS32-MSA-NEXT:    addiu $1, $zero, -16
+; MIPS32-MSA-NEXT:    and $sp, $sp, $1
+; MIPS32-MSA-NEXT:    lw $2, 56($fp)
+; MIPS32-MSA-NEXT:    lw $1, 60($fp)
+; MIPS32-MSA-NEXT:    sw $5, 12($sp)
+; MIPS32-MSA-NEXT:    sw $5, 4($sp)
+; MIPS32-MSA-NEXT:    ldi.b $w0, -1
+; MIPS32-MSA-NEXT:    ld.d $w1, 0($sp)
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
+; MIPS32-MSA-NEXT:    slli.d $w1, $w1, 63
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 64($fp)
+; MIPS32-MSA-NEXT:    srai.d $w1, $w1, 63
+; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 68($fp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 48($fp)
+; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 52($fp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    st.d $w0, 0($4)
+; MIPS32-MSA-NEXT:    move $sp, $fp
+; MIPS32-MSA-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
+; MIPS32-MSA-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    addiu $sp, $sp, 32
+  %result = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b)
+  ret <2 x double> %result
+}
+
+; Test with aligned loads (common case)
+define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_aligned_load:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS64-MSA-NEXT:    ldi.b $w2, -1
+; MIPS64-MSA-NEXT:    fill.w $w0, $1
+; MIPS64-MSA-NEXT:    slli.w $w0, $w0, 31
+; MIPS64-MSA-NEXT:    srai.w $w0, $w0, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w0, $w1
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_aligned_load:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    fill.w $w0, $4
+; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS32-MSA-NEXT:    ldi.b $w2, -1
+; MIPS32-MSA-NEXT:    slli.w $w0, $w0, 31
+; MIPS32-MSA-NEXT:    srai.w $w0, $w0, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w0, $w1
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %a = load <4 x i32>, ptr %p1, align 16
+  %b = load <4 x i32>, ptr %p2, align 16
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with unaligned loads (stress test)
+define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS64-MSA-NEXT:    ldi.b $w2, -1
+; MIPS64-MSA-NEXT:    fill.w $w0, $1
+; MIPS64-MSA-NEXT:    slli.w $w0, $w0, 31
+; MIPS64-MSA-NEXT:    srai.w $w0, $w0, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w0, $w1
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    fill.w $w0, $4
+; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS32-MSA-NEXT:    ldi.b $w2, -1
+; MIPS32-MSA-NEXT:    slli.w $w0, $w0, 31
+; MIPS32-MSA-NEXT:    srai.w $w0, $w0, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w0, $w1
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %a = load <4 x i32>, ptr %p1, align 4
+  %b = load <4 x i32>, ptr %p2, align 4
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with stores to verify result handling
+define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_store:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    st.w $w0, 0($9)
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_store:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 40($sp)
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    st.w $w0, 0($1)
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  store <4 x i32> %result, ptr %out, align 16
+  ret void
+}
+
+; Test chained selects (multiple conditions)
+define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_chain:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $8
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w1, -1
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    sll $1, $5, 0
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $9
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    xor.v $w3, $w2, $w1
+; MIPS64-MSA-NEXT:    and.v $w0, $w3, $w0
+; MIPS64-MSA-NEXT:    insert.d $w3[0], $6
+; MIPS64-MSA-NEXT:    insert.d $w3[1], $7
+; MIPS64-MSA-NEXT:    shf.w $w3, $w3, 177
+; MIPS64-MSA-NEXT:    and.v $w2, $w2, $w3
+; MIPS64-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    and.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $10
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $11
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_chain:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    lw $2, 40($sp)
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w3, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w3, $w0
+; MIPS32-MSA-NEXT:    insert.w $w3[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w3[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w3[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w3[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 44($sp)
+; MIPS32-MSA-NEXT:    and.v $w2, $w2, $w3
+; MIPS32-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    and.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 48($sp)
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 52($sp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %tmp = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond1, <4 x i32> %a, <4 x i32> %b)
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond2, <4 x i32> %tmp, <4 x i32> %c)
+  ret <4 x i32> %result
+}
+
+; Test with arithmetic operations (ensure float vectors work with FP ops)
+define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 x float> %y) {
+; MIPS64-MSA-LABEL: test_ctselect_v4f32_arithmetic:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    fill.w $w3, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w3, $w3, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    srai.w $w3, $w3, 31
+; MIPS64-MSA-NEXT:    fadd.w $w2, $w1, $w0
+; MIPS64-MSA-NEXT:    fsub.w $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    ldi.b $w1, -1
+; MIPS64-MSA-NEXT:    xor.v $w1, $w3, $w1
+; MIPS64-MSA-NEXT:    and.v $w2, $w3, $w2
+; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4f32_arithmetic:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    fill.w $w3, $5
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    slli.w $w3, $w3, 31
+; MIPS32-MSA-NEXT:    srai.w $w3, $w3, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    fadd.w $w2, $w1, $w0
+; MIPS32-MSA-NEXT:    fsub.w $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w3, $w1
+; MIPS32-MSA-NEXT:    and.v $w2, $w3, $w2
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    st.w $w0, 0($4)
+  %sum = fadd <4 x float> %x, %y
+  %diff = fsub <4 x float> %x, %y
+  %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %sum, <4 x float> %diff)
+  ret <4 x float> %result
+}
+
+; Test with mixed operations (load, compute, select, store)
+define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_mixed:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w0, 0($5)
+; MIPS64-MSA-NEXT:    ldi.b $w2, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    addvi.w $w0, $w0, 1
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS64-MSA-NEXT:    addvi.w $w2, $w2, 2
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    st.w $w0, 0($7)
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_mixed:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    ld.w $w0, 0($5)
+; MIPS32-MSA-NEXT:    fill.w $w1, $4
+; MIPS32-MSA-NEXT:    ldi.b $w2, -1
+; MIPS32-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS32-MSA-NEXT:    addvi.w $w0, $w0, 1
+; MIPS32-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS32-MSA-NEXT:    addvi.w $w2, $w2, 2
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    st.w $w0, 0($7)
+  %a = load <4 x i32>, ptr %p1, align 16
+  %b = load <4 x i32>, ptr %p2, align 16
+  %a_plus_1 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
+  %b_plus_2 = add <4 x i32> %b, <i32 2, i32 2, i32 2, i32 2>
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a_plus_1, <4 x i32> %b_plus_2)
+  store <4 x i32> %result, ptr %out, align 16
+  ret void
+}
+
+; Test with function arguments directly (no loads)
+define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_args:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_args:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with multiple uses of result
+define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_multi_use:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    addv.w $w0, $w0, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_multi_use:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    addv.w $w0, $w0, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  %add = add <4 x i32> %sel, %sel  ; Use result twice
+  ret <4 x i32> %add
+}
+
+declare <4 x i32> @llvm.ct.select.v4i32(i1, <4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.ct.select.v8i16(i1, <8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.ct.select.v16i8(i1, <16 x i8>, <16 x i8>)
+declare <2 x i64> @llvm.ct.select.v2i64(i1, <2 x i64>, <2 x i64>)
+declare <4 x float> @llvm.ct.select.v4f32(i1, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.ct.select.v2f64(i1, <2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback.ll b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
new file mode 100644
index 0000000000000..d89d7fc698712
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
@@ -0,0 +1,371 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64
+
+; Test basic ct.select functionality for scalar types
+define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
+; M32-LABEL: test_ctselect_i8:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    jr $ra
+; M32-NEXT:    xor $2, $1, $6
+;
+; M64-LABEL: test_ctselect_i8:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    xor $2, $5, $6
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    xor $2, $1, $2
+  %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
+  ret i8 %result
+}
+
+define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
+; M32-LABEL: test_ctselect_i16:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    jr $ra
+; M32-NEXT:    xor $2, $1, $6
+;
+; M64-LABEL: test_ctselect_i16:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    xor $2, $5, $6
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    xor $2, $1, $2
+  %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
+  ret i16 %result
+}
+
+define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_i32:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_i32:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
+; M32-LABEL: test_ctselect_i64:
+; M32:       # %bb.0:
+; M32-NEXT:    lw $1, 16($sp)
+; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $2, $6, $1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    xor $2, $2, $1
+; M32-NEXT:    lw $1, 20($sp)
+; M32-NEXT:    xor $4, $7, $1
+; M32-NEXT:    and $3, $4, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    xor $3, $3, $1
+;
+; M64-LABEL: test_ctselect_i64:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $2, $1
+; M64-NEXT:    daddiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $5
+; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
+  ret i64 %result
+}
+
+define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
+; M32-LABEL: test_ctselect_ptr:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_ptr:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $2, $1
+; M64-NEXT:    daddiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $5
+; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with constant conditions
+define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_const_true:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_ctselect_const_true:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $4, 0
+  %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_const_false:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $zero, $5
+;
+; M64-LABEL: test_ctselect_const_false:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $zero, $1
+  %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with comparison conditions
+define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_icmp_eq:
+; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    sltu $1, $zero, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_icmp_eq:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sltu $1, $zero, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cond = icmp eq i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_icmp_ne:
+; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    sltiu $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_icmp_ne:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sltiu $1, $1, 1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cond = icmp ne i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_icmp_slt:
+; M32:       # %bb.0:
+; M32-NEXT:    slt $1, $4, $5
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_icmp_slt:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    slt $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    xori $1, $1, 1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cond = icmp slt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_icmp_ult:
+; M32:       # %bb.0:
+; M32-NEXT:    sltu $1, $4, $5
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_icmp_ult:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    sltu $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    xori $1, $1, 1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cond = icmp ult i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with memory operands
+define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
+; M32-LABEL: test_ctselect_load:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    lw $3, 0($5)
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    lw $3, 0($6)
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_load:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    lw $3, 0($5)
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    lw $3, 0($6)
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %a = load i32, ptr %p1
+  %b = load i32, ptr %p2
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test nested ctselect calls
+define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
+; M32-LABEL: test_ctselect_nested:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $5, 1
+; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    negu $4, $3
+; M32-NEXT:    and $2, $2, $6
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    or $1, $2, $1
+; M32-NEXT:    addiu $2, $3, -1
+; M32-NEXT:    lw $3, 16($sp)
+; M32-NEXT:    and $1, $4, $1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $2
+;
+; M64-LABEL: test_ctselect_nested:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    sll $4, $4, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    andi $4, $4, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    negu $5, $4
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    addiu $3, $4, -1
+; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    sll $2, $8, 0
+; M64-NEXT:    and $1, $5, $1
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
+  %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
+  ret i32 %result
+}
+
+; Declare the intrinsics
+declare i8 @llvm.ct.select.i8(i1, i8, i8)
+declare i16 @llvm.ct.select.i16(i1, i16, i16)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
new file mode 100644
index 0000000000000..6cfa07afdd51e
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64
+
+; Test 1: Basic optimizations should still work
+define i32 @test_basic_opts(i32 %x) {
+; M32-LABEL: test_basic_opts:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_basic_opts:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $4, 0
+  %a = or i32 %x, 0
+  %b = and i32 %a, -1
+  %c = xor i32 %b, 0
+  ret i32 %c
+}
+
+; Test 2: Constant folding should work
+define i32 @test_constant_fold() {
+; M32-LABEL: test_constant_fold:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    addiu $2, $zero, 0
+;
+; M64-LABEL: test_constant_fold:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    addiu $2, $zero, 0
+  %a = xor i32 -1, -1    ; Should fold to 0
+  ret i32 %a
+}
+
+; Test 3: Protected pattern should NOT have branches
+define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
+; M32-LABEL: test_protected_no_branch:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_protected_no_branch:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test 4: Explicit branch should still generate branches
+define i32 @test_explicit_branch(i1 %cond, i32 %a, i32 %b) {
+; M32-LABEL: test_explicit_branch:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    beqz $1, $BB3_2
+; M32-NEXT:    nop
+; M32-NEXT:  # %bb.1: # %true
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $5
+; M32-NEXT:  $BB3_2: # %false
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $6
+;
+; M64-LABEL: test_explicit_branch:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    beqz $1, .LBB3_2
+; M64-NEXT:    nop
+; M64-NEXT:  # %bb.1: # %true
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:  .LBB3_2: # %false
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $6, 0
+  br i1 %cond, label %true, label %false
+true:
+  ret i32 %a
+false:
+  ret i32 %b
+}
+
+; Test 5: Regular select (not ct.select) - whatever wasm wants to do
+define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) {
+; M32-LABEL: test_regular_select:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    movn $6, $5, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $6
+;
+; M64-LABEL: test_regular_select:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $3, $4, 0
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    movn $2, $1, $3
+  %result = select i1 %cond, i32 %a, i32 %b
+  ret i32 %result
+}
+
+; Test if XOR with all-ones still gets optimized
+define i32 @test_xor_all_ones() {
+; M32-LABEL: test_xor_all_ones:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    addiu $2, $zero, 0
+;
+; M64-LABEL: test_xor_all_ones:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    addiu $2, $zero, 0
+  %xor1 = xor i32 -1, -1  ; Should optimize to 0
+  ret i32 %xor1
+}
+
+define i32 @test_xor_same_value(i32 %x) {
+; M32-LABEL: test_xor_same_value:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    addiu $2, $zero, 0
+;
+; M64-LABEL: test_xor_same_value:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    addiu $2, $zero, 0
+  %xor2 = xor i32 %x, %x  ; Should optimize to 0
+  ret i32 %xor2
+}
+
+define i32 @test_normal_ops(i32 %x) {
+; M32-LABEL: test_normal_ops:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_normal_ops:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $4, 0
+  %or1 = or i32 %x, 0      ; Should optimize to %x
+  %and1 = and i32 %or1, -1  ; Should optimize to %x
+  %xor1 = xor i32 %and1, 0  ; Should optimize to %x
+  ret i32 %xor1
+}
+
+; This simulates what the reviewer is worried about
+define i32 @test_xor_with_const_operands() {
+; M32-LABEL: test_xor_with_const_operands:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    addiu $2, $zero, 0
+;
+; M64-LABEL: test_xor_with_const_operands:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    addiu $2, $zero, 0
+  %a = xor i32 -1, -1
+  %b = xor i32 0, 0
+  %c = xor i32 42, 42
+  %result = or i32 %a, %b
+  %final = or i32 %result, %c
+  ret i32 %final  ; Should optimize to 0
+}
+
+declare i32 @llvm.ct.select.i32(i1, i32, i32)

>From 34d5750b65182839a04c3144435da65321d89b79 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Sat, 7 Mar 2026 15:38:15 -0500
Subject: [PATCH 3/4] [LLVM][MIPS] Regenerate ct.select test CHECK lines

Update CHECK lines to match the new constant-time AND/OR/XOR expansion
from the CT_SELECT legalization fix.
---
 .../Mips/ctselect-fallback-edge-cases.ll      | 223 +++----
 .../Mips/ctselect-fallback-patterns.ll        | 297 ++++-----
 .../CodeGen/Mips/ctselect-fallback-vector.ll  | 570 ++++++++----------
 llvm/test/CodeGen/Mips/ctselect-fallback.ll   | 258 ++++----
 .../CodeGen/Mips/ctselect-side-effects.ll     |  24 +-
 5 files changed, 617 insertions(+), 755 deletions(-)

diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
index f1831a625d4a4..401a742c27eae 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
@@ -8,22 +8,24 @@
 define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
 ; M32-LABEL: test_ctselect_i1:
 ; M32:       # %bb.0:
-; M32-NEXT:    xori $2, $4, 1
-; M32-NEXT:    and $1, $4, $5
-; M32-NEXT:    and $2, $2, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $2
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_i1:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $1, $6, 0
-; M64-NEXT:    xori $2, $2, 1
-; M64-NEXT:    and $1, $2, $1
-; M64-NEXT:    and $2, $4, $5
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    xor $2, $5, $6
+; M64-NEXT:    andi $1, $1, 1
 ; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
   ret i1 %result
 }
@@ -32,30 +34,18 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
 define i32 @test_ctselect_extremal_values(i1 %cond) {
 ; M32-LABEL: test_ctselect_extremal_values:
 ; M32:       # %bb.0:
-; M32-NEXT:    lui $3, 32767
 ; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    ori $3, $3, 65535
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $3
-; M32-NEXT:    lui $3, 32768
-; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    lui $2, 32768
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    subu $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_extremal_values:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    lui $3, 32767
+; M64-NEXT:    lui $2, 32768
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    ori $3, $3, 65535
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    lui $3, 32768
-; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    subu $2, $2, $1
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
   ret i32 %result
 }
@@ -67,14 +57,14 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
 ; M32-NEXT:    andi $1, $4, 1
 ; M32-NEXT:    negu $1, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    and $2, $1, $5
+; M32-NEXT:    and $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_null_ptr:
 ; M64:       # %bb.0:
 ; M64-NEXT:    andi $1, $4, 1
 ; M64-NEXT:    dnegu $1, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    and $2, $1, $5
+; M64-NEXT:    and $2, $5, $1
   %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
   ret ptr %result
 }
@@ -83,23 +73,21 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
 define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
 ; M32-LABEL: test_ctselect_function_ptr:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_function_ptr:
 ; M64:       # %bb.0:
-; M64-NEXT:    andi $1, $4, 1
-; M64-NEXT:    dnegu $2, $1
-; M64-NEXT:    daddiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $5
-; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    andi $2, $4, 1
+; M64-NEXT:    xor $1, $5, $6
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $6, $1
   %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
   ret ptr %result
 }
@@ -108,26 +96,25 @@ define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
 define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
 ; M32-LABEL: test_ctselect_ptr_cmp:
 ; M32:       # %bb.0:
-; M32-NEXT:    xor $1, $4, $5
-; M32-NEXT:    sltu $1, $zero, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    xor $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    sltiu $2, $2, 1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_ptr_cmp:
 ; M64:       # %bb.0:
-; M64-NEXT:    xor $1, $4, $5
-; M64-NEXT:    daddiu $3, $zero, -1
-; M64-NEXT:    daddiu $2, $zero, -1
-; M64-NEXT:    movn $3, $zero, $1
-; M64-NEXT:    xor $2, $3, $2
-; M64-NEXT:    and $1, $3, $6
-; M64-NEXT:    and $2, $2, $7
+; M64-NEXT:    xor $2, $4, $5
+; M64-NEXT:    xor $1, $6, $7
+; M64-NEXT:    sltiu $2, $2, 1
+; M64-NEXT:    dsll $2, $2, 32
+; M64-NEXT:    dsrl $2, $2, 32
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $7, $1
   %cmp = icmp eq ptr %p1, %p2
   %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
   ret ptr %result
@@ -139,23 +126,21 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
 define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
 ; M32-LABEL: test_ctselect_struct_ptr:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_struct_ptr:
 ; M64:       # %bb.0:
-; M64-NEXT:    andi $1, $4, 1
-; M64-NEXT:    dnegu $2, $1
-; M64-NEXT:    daddiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $5
-; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    andi $2, $4, 1
+; M64-NEXT:    xor $1, $5, $6
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $6, $1
   %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
   ret ptr %result
 }
@@ -164,73 +149,65 @@ define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
 define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; M32-LABEL: test_ctselect_deeply_nested:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    lw $3, 16($sp)
-; M32-NEXT:    lw $9, 32($sp)
-; M32-NEXT:    lw $8, 28($sp)
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    lw $1, 20($sp)
+; M32-NEXT:    lw $2, 16($sp)
+; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    andi $4, $6, 1
+; M32-NEXT:    lw $6, 28($sp)
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $2, $2, $1
 ; M32-NEXT:    and $2, $2, $3
-; M32-NEXT:    lw $3, 20($sp)
-; M32-NEXT:    and $1, $1, $3
 ; M32-NEXT:    andi $3, $5, 1
-; M32-NEXT:    or $1, $2, $1
-; M32-NEXT:    andi $2, $6, 1
-; M32-NEXT:    andi $6, $7, 1
-; M32-NEXT:    negu $4, $3
-; M32-NEXT:    addiu $3, $3, -1
-; M32-NEXT:    addiu $7, $6, -1
-; M32-NEXT:    and $1, $4, $1
-; M32-NEXT:    addiu $5, $2, -1
-; M32-NEXT:    negu $2, $2
-; M32-NEXT:    negu $6, $6
-; M32-NEXT:    and $4, $7, $9
-; M32-NEXT:    lw $7, 24($sp)
-; M32-NEXT:    and $5, $5, $8
-; M32-NEXT:    and $3, $3, $7
-; M32-NEXT:    or $1, $1, $3
-; M32-NEXT:    and $1, $2, $1
-; M32-NEXT:    or $1, $1, $5
-; M32-NEXT:    and $1, $6, $1
+; M32-NEXT:    lw $5, 32($sp)
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    lw $2, 24($sp)
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    andi $3, $7, 1
+; M32-NEXT:    xor $1, $2, $1
+; M32-NEXT:    negu $2, $4
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $1, $1, $6
+; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    xor $1, $6, $1
+; M32-NEXT:    xor $1, $1, $5
+; M32-NEXT:    and $1, $1, $3
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $4
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_deeply_nested:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $8, 0
-; M64-NEXT:    sll $4, $5, 0
-; M64-NEXT:    lw $8, 0($sp)
+; M64-NEXT:    xor $2, $8, $9
+; M64-NEXT:    sll $5, $5, 0
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    sll $6, $11, 0
+; M64-NEXT:    sll $4, $7, 0
+; M64-NEXT:    lw $7, 0($sp)
 ; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    andi $5, $5, 1
+; M64-NEXT:    andi $3, $3, 1
 ; M64-NEXT:    andi $4, $4, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    negu $5, $4
-; M64-NEXT:    addiu $4, $4, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $9, 0
-; M64-NEXT:    and $1, $1, $3
-; M64-NEXT:    sll $3, $11, 0
-; M64-NEXT:    or $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    sll $6, $7, 0
-; M64-NEXT:    andi $2, $2, 1
-; M64-NEXT:    and $1, $5, $1
-; M64-NEXT:    andi $6, $6, 1
-; M64-NEXT:    addiu $5, $2, -1
-; M64-NEXT:    negu $2, $2
-; M64-NEXT:    addiu $7, $6, -1
-; M64-NEXT:    negu $6, $6
-; M64-NEXT:    and $3, $5, $3
-; M64-NEXT:    sll $5, $10, 0
-; M64-NEXT:    and $7, $7, $8
-; M64-NEXT:    and $4, $4, $5
-; M64-NEXT:    or $1, $1, $4
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    negu $5, $5
+; M64-NEXT:    negu $4, $4
 ; M64-NEXT:    and $1, $2, $1
-; M64-NEXT:    or $1, $1, $3
-; M64-NEXT:    and $1, $6, $1
+; M64-NEXT:    sll $2, $9, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $10, 0
+; M64-NEXT:    xor $1, $1, $2
+; M64-NEXT:    and $1, $1, $5
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    negu $2, $3
+; M64-NEXT:    xor $1, $1, $6
+; M64-NEXT:    and $1, $1, $2
+; M64-NEXT:    xor $1, $6, $1
+; M64-NEXT:    xor $1, $1, $7
+; M64-NEXT:    and $1, $1, $4
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $7
+; M64-NEXT:    xor $2, $7, $1
   %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
   %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
   %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
index 2e65e586ce5fa..a1c5d524c6939 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
@@ -6,16 +6,18 @@
 define i32 @test_ctselect_smin_zero(i32 %x) {
 ; M32-LABEL: test_ctselect_smin_zero:
 ; M32:       # %bb.0:
-; M32-NEXT:    sra $1, $4, 31
+; M32-NEXT:    slti $1, $4, 0
+; M32-NEXT:    negu $1, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    and $2, $4, $1
 ;
 ; M64-LABEL: test_ctselect_smin_zero:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sra $2, $1, 31
+; M64-NEXT:    slti $2, $1, 0
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    and $2, $2, $1
+; M64-NEXT:    and $2, $1, $2
   %cmp = icmp slt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
   ret i32 %result
@@ -25,17 +27,18 @@ define i32 @test_ctselect_smin_zero(i32 %x) {
 define i32 @test_ctselect_smax_zero(i32 %x) {
 ; M32-LABEL: test_ctselect_smax_zero:
 ; M32:       # %bb.0:
-; M32-NEXT:    slti $1, $4, 1
-; M32-NEXT:    movn $4, $zero, $1
+; M32-NEXT:    slt $1, $zero, $4
+; M32-NEXT:    negu $1, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    move $2, $4
+; M32-NEXT:    and $2, $4, $1
 ;
 ; M64-LABEL: test_ctselect_smax_zero:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    slti $1, $2, 1
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    slt $2, $zero, $1
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    movn $2, $zero, $1
+; M64-NEXT:    and $2, $1, $2
   %cmp = icmp sgt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
   ret i32 %result
@@ -45,27 +48,23 @@ define i32 @test_ctselect_smax_zero(i32 %x) {
 define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
 ; M32-LABEL: test_ctselect_smin_generic:
 ; M32:       # %bb.0:
-; M32-NEXT:    slt $1, $4, $5
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    slt $2, $4, $5
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_smin_generic:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    slt $3, $2, $1
-; M64-NEXT:    xori $3, $3, 1
-; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    xor $3, $2, $1
+; M64-NEXT:    slt $2, $2, $1
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    and $2, $3, $2
-; M64-NEXT:    not $3, $3
-; M64-NEXT:    and $1, $3, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %cmp = icmp slt i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
   ret i32 %result
@@ -75,27 +74,23 @@ define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
 define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
 ; M32-LABEL: test_ctselect_smax_generic:
 ; M32:       # %bb.0:
-; M32-NEXT:    slt $1, $5, $4
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    slt $2, $5, $4
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_smax_generic:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $2, $5, 0
-; M64-NEXT:    slt $3, $2, $1
-; M64-NEXT:    xori $3, $3, 1
-; M64-NEXT:    addiu $3, $3, -1
-; M64-NEXT:    and $1, $3, $1
-; M64-NEXT:    not $3, $3
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    xor $3, $2, $1
+; M64-NEXT:    slt $2, $1, $2
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    and $2, $3, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $1, $2
   %cmp = icmp sgt i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
   ret i32 %result
@@ -105,27 +100,23 @@ define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
 define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
 ; M32-LABEL: test_ctselect_umin_generic:
 ; M32:       # %bb.0:
-; M32-NEXT:    sltu $1, $4, $5
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    sltu $2, $4, $5
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_umin_generic:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sltu $3, $2, $1
-; M64-NEXT:    xori $3, $3, 1
-; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    xor $3, $2, $1
+; M64-NEXT:    sltu $2, $2, $1
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    and $2, $3, $2
-; M64-NEXT:    not $3, $3
-; M64-NEXT:    and $1, $3, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %cmp = icmp ult i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
   ret i32 %result
@@ -135,27 +126,23 @@ define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
 define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
 ; M32-LABEL: test_ctselect_umax_generic:
 ; M32:       # %bb.0:
-; M32-NEXT:    sltu $1, $5, $4
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    sltu $2, $5, $4
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_umax_generic:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $2, $5, 0
-; M64-NEXT:    sltu $3, $2, $1
-; M64-NEXT:    xori $3, $3, 1
-; M64-NEXT:    addiu $3, $3, -1
-; M64-NEXT:    and $1, $3, $1
-; M64-NEXT:    not $3, $3
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    xor $3, $2, $1
+; M64-NEXT:    sltu $2, $1, $2
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    and $2, $3, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $1, $2
   %cmp = icmp ugt i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
   ret i32 %result
@@ -165,24 +152,24 @@ define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
 define i32 @test_ctselect_abs(i32 %x) {
 ; M32-LABEL: test_ctselect_abs:
 ; M32:       # %bb.0:
-; M32-NEXT:    negu $1, $4
-; M32-NEXT:    sra $2, $4, 31
+; M32-NEXT:    slti $1, $4, 0
+; M32-NEXT:    negu $2, $4
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    xor $2, $2, $4
 ; M32-NEXT:    and $1, $2, $1
-; M32-NEXT:    not $2, $2
-; M32-NEXT:    and $2, $2, $4
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $2
+; M32-NEXT:    xor $2, $4, $1
 ;
 ; M64-LABEL: test_ctselect_abs:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    sra $3, $1, 31
+; M64-NEXT:    slti $2, $1, 0
+; M64-NEXT:    negu $3, $1
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    xor $3, $3, $1
 ; M64-NEXT:    and $2, $3, $2
-; M64-NEXT:    not $3, $3
-; M64-NEXT:    and $1, $3, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %neg = sub i32 0, %x
   %cmp = icmp slt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x)
@@ -193,24 +180,24 @@ define i32 @test_ctselect_abs(i32 %x) {
 define i32 @test_ctselect_nabs(i32 %x) {
 ; M32-LABEL: test_ctselect_nabs:
 ; M32:       # %bb.0:
-; M32-NEXT:    sra $1, $4, 31
-; M32-NEXT:    negu $3, $4
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    slti $1, $4, 0
+; M32-NEXT:    negu $2, $4
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    xor $3, $4, $2
+; M32-NEXT:    and $1, $3, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_nabs:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sra $2, $1, 31
-; M64-NEXT:    and $3, $2, $1
-; M64-NEXT:    negu $1, $1
-; M64-NEXT:    not $2, $2
-; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    slti $2, $1, 0
+; M64-NEXT:    negu $3, $1
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    xor $1, $1, $3
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $3, $1
+; M64-NEXT:    xor $2, $3, $1
   %neg = sub i32 0, %x
   %cmp = icmp slt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg)
@@ -221,14 +208,16 @@ define i32 @test_ctselect_nabs(i32 %x) {
 define i32 @test_ctselect_sign_extend(i32 %x) {
 ; M32-LABEL: test_ctselect_sign_extend:
 ; M32:       # %bb.0:
+; M32-NEXT:    slti $1, $4, 0
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    sra $2, $4, 31
+; M32-NEXT:    negu $2, $1
 ;
 ; M64-LABEL: test_ctselect_sign_extend:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    slti $1, $1, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    sra $2, $1, 31
+; M64-NEXT:    negu $2, $1
   %cmp = icmp slt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0)
   ret i32 %result
@@ -270,13 +259,12 @@ define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_constant_folding_false:
 ; M32:       # %bb.0:
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $zero, $5
+; M32-NEXT:    move $2, $5
 ;
 ; M64-LABEL: test_ctselect_constant_folding_false:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $zero, $1
+; M64-NEXT:    sll $2, $5, 0
   %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -285,25 +273,13 @@ define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
 define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
 ; M32-LABEL: test_ctselect_identical_operands:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $5
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    move $2, $5
 ;
 ; M64-LABEL: test_ctselect_identical_operands:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $5, 0
-; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    sll $2, $5, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
   ret i32 %result
 }
@@ -312,29 +288,27 @@ define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
 define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_inverted_condition:
 ; M32:       # %bb.0:
-; M32-NEXT:    xor $1, $4, $5
-; M32-NEXT:    sltiu $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    xor $2, $4, $5
+; M32-NEXT:    xor $1, $7, $6
+; M32-NEXT:    sltiu $2, $2, 1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_inverted_condition:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    xor $2, $7, $6
 ; M64-NEXT:    sltiu $1, $1, 1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cmp = icmp eq i32 %x, %y
   %not_cmp = xor i1 %cmp, true
   %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b)
@@ -345,57 +319,51 @@ define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; M32-LABEL: test_ctselect_chain:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    lw $1, 16($sp)
+; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $2, $7, $1
+; M32-NEXT:    and $2, $2, $3
 ; M32-NEXT:    andi $3, $5, 1
-; M32-NEXT:    lw $5, 16($sp)
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    negu $4, $3
-; M32-NEXT:    addiu $3, $3, -1
-; M32-NEXT:    and $1, $1, $5
-; M32-NEXT:    and $2, $2, $7
-; M32-NEXT:    lw $5, 24($sp)
-; M32-NEXT:    or $1, $2, $1
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    lw $2, 20($sp)
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    lw $3, 24($sp)
+; M32-NEXT:    xor $1, $2, $1
 ; M32-NEXT:    andi $2, $6, 1
-; M32-NEXT:    and $1, $4, $1
-; M32-NEXT:    addiu $4, $2, -1
+; M32-NEXT:    xor $1, $1, $3
 ; M32-NEXT:    negu $2, $2
-; M32-NEXT:    and $4, $4, $5
-; M32-NEXT:    lw $5, 20($sp)
-; M32-NEXT:    and $3, $3, $5
-; M32-NEXT:    or $1, $1, $3
-; M32-NEXT:    and $1, $2, $1
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $4
+; M32-NEXT:    xor $2, $3, $1
 ;
 ; M64-LABEL: test_ctselect_chain:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $7, 0
-; M64-NEXT:    sll $4, $5, 0
+; M64-NEXT:    xor $2, $7, $8
+; M64-NEXT:    sll $3, $5, 0
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    andi $4, $4, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    negu $5, $4
-; M64-NEXT:    addiu $4, $4, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $8, 0
-; M64-NEXT:    and $1, $1, $3
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    sll $6, $10, 0
-; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    sll $2, $2, 0
 ; M64-NEXT:    andi $3, $3, 1
-; M64-NEXT:    and $1, $5, $1
-; M64-NEXT:    sll $5, $9, 0
-; M64-NEXT:    addiu $2, $3, -1
+; M64-NEXT:    negu $1, $1
 ; M64-NEXT:    negu $3, $3
-; M64-NEXT:    and $4, $4, $5
-; M64-NEXT:    and $2, $2, $6
-; M64-NEXT:    or $1, $1, $4
-; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $8, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $9, 0
+; M64-NEXT:    xor $1, $1, $2
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    andi $2, $3, 1
+; M64-NEXT:    sll $3, $10, 0
+; M64-NEXT:    xor $1, $1, $3
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $3, $1
   %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
   %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
   %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
@@ -406,16 +374,17 @@ define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c,
 define i64 @test_ctselect_i64_smin_zero(i64 %x) {
 ; M32-LABEL: test_ctselect_i64_smin_zero:
 ; M32:       # %bb.0:
-; M32-NEXT:    sra $1, $5, 31
-; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    slti $1, $5, 0
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    and $2, $4, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    and $3, $1, $5
+; M32-NEXT:    and $3, $5, $1
 ;
 ; M64-LABEL: test_ctselect_i64_smin_zero:
 ; M64:       # %bb.0:
 ; M64-NEXT:    dsra $1, $4, 63
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    and $2, $1, $4
+; M64-NEXT:    and $2, $4, $1
   %cmp = icmp slt i64 %x, 0
   %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
   ret i64 %result
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
index 6222f6052e12f..302e06b0a7335 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
@@ -6,21 +6,19 @@
 define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -30,26 +28,24 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -63,21 +59,19 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v8i16:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.h $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.h $w1, $w1, 15
-; MIPS64-MSA-NEXT:    srai.h $w1, $w1, 15
-; MIPS64-MSA-NEXT:    shf.h $w2, $w2, 27
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.h $w2, $w2, 27
+; MIPS64-MSA-NEXT:    fill.h $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.h $w2, $w2, 15
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.h $w2, $w2, 15
+; MIPS64-MSA-NEXT:    shf.h $w0, $w0, 27
+; MIPS64-MSA-NEXT:    shf.h $w1, $w1, 27
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.h $w0, $w0, 27
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -87,28 +81,26 @@ define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
-; MIPS32-MSA-NEXT:    fill.h $w1, $4
-; MIPS32-MSA-NEXT:    ldi.b $w0, -1
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
-; MIPS32-MSA-NEXT:    slli.h $w1, $w1, 15
-; MIPS32-MSA-NEXT:    srai.h $w1, $w1, 15
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    fill.h $w2, $4
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    slli.h $w2, $w2, 15
+; MIPS32-MSA-NEXT:    srai.h $w2, $w2, 15
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    shf.h $w2, $w2, 177
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
-; MIPS32-MSA-NEXT:    shf.h $w2, $w2, 177
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.h $w0, $w0, 177
+; MIPS32-MSA-NEXT:    shf.h $w1, $w1, 177
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    shf.h $w0, $w0, 177
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
@@ -123,22 +115,21 @@ define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
 define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v16i8:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w0[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w1[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
 ; MIPS64-MSA-NEXT:    fill.b $w2, $1
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $6
-; MIPS64-MSA-NEXT:    insert.d $w1[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
 ; MIPS64-MSA-NEXT:    slli.b $w2, $w2, 7
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
-; MIPS64-MSA-NEXT:    shf.b $w1, $w1, 27
 ; MIPS64-MSA-NEXT:    srai.b $w2, $w2, 7
+; MIPS64-MSA-NEXT:    shf.b $w1, $w1, 27
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
-; MIPS64-MSA-NEXT:    and.v $w0, $w2, $w0
-; MIPS64-MSA-NEXT:    xori.b $w2, $w2, 255
-; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
@@ -147,29 +138,28 @@ define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v16i8:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    insert.w $w0[0], $6
-; MIPS32-MSA-NEXT:    lw $1, 16($sp)
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.b $w2, $4
-; MIPS32-MSA-NEXT:    insert.w $w0[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.b $w2, $w2, 7
 ; MIPS32-MSA-NEXT:    srai.b $w2, $w2, 7
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
-; MIPS32-MSA-NEXT:    lw $1, 28($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $1
-; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 36($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w2, $w0
-; MIPS32-MSA-NEXT:    xori.b $w2, $w2, 255
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
 ; MIPS32-MSA-NEXT:    shf.b $w1, $w1, 27
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
@@ -184,18 +174,16 @@ define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
 define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v2i64:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    fill.d $w2, $4
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
-; MIPS64-MSA-NEXT:    ldi.b $w1, -1
-; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
-; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    fill.d $w2, $4
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
 ; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
@@ -214,31 +202,28 @@ define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
 ; MIPS32-MSA-NEXT:    and $sp, $sp, $1
 ; MIPS32-MSA-NEXT:    lw $2, 56($fp)
 ; MIPS32-MSA-NEXT:    lw $1, 60($fp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    sw $4, 12($sp)
 ; MIPS32-MSA-NEXT:    sw $4, 4($sp)
-; MIPS32-MSA-NEXT:    ldi.b $w0, -1
-; MIPS32-MSA-NEXT:    ld.d $w1, 0($sp)
-; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
-; MIPS32-MSA-NEXT:    slli.d $w1, $w1, 63
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    ld.d $w2, 0($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 64($fp)
-; MIPS32-MSA-NEXT:    srai.d $w1, $w1, 63
-; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 68($fp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 48($fp)
-; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 52($fp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
-; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
@@ -257,21 +242,19 @@ define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
 define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4f32:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -281,26 +264,24 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $5
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
 ; MIPS32-MSA-NEXT:    st.w $w0, 0($4)
   %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b)
@@ -311,18 +292,16 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
 define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v2f64:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    fill.d $w2, $4
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
-; MIPS64-MSA-NEXT:    ldi.b $w1, -1
-; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
-; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    fill.d $w2, $4
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
 ; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
@@ -341,31 +320,28 @@ define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double>
 ; MIPS32-MSA-NEXT:    and $sp, $sp, $1
 ; MIPS32-MSA-NEXT:    lw $2, 56($fp)
 ; MIPS32-MSA-NEXT:    lw $1, 60($fp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    sw $5, 12($sp)
 ; MIPS32-MSA-NEXT:    sw $5, 4($sp)
-; MIPS32-MSA-NEXT:    ldi.b $w0, -1
-; MIPS32-MSA-NEXT:    ld.d $w1, 0($sp)
-; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
-; MIPS32-MSA-NEXT:    slli.d $w1, $w1, 63
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    ld.d $w2, 0($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 64($fp)
-; MIPS32-MSA-NEXT:    srai.d $w1, $w1, 63
-; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 68($fp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 48($fp)
-; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 52($fp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
-; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    st.d $w0, 0($4)
 ; MIPS32-MSA-NEXT:    move $sp, $fp
 ; MIPS32-MSA-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
@@ -381,16 +357,14 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_aligned_load:
 ; MIPS64-MSA:       # %bb.0:
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS64-MSA-NEXT:    ldi.b $w2, -1
-; MIPS64-MSA-NEXT:    fill.w $w0, $1
-; MIPS64-MSA-NEXT:    slli.w $w0, $w0, 31
-; MIPS64-MSA-NEXT:    srai.w $w0, $w0, 31
-; MIPS64-MSA-NEXT:    and.v $w1, $w0, $w1
-; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -398,16 +372,14 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v4i32_aligned_load:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    fill.w $w0, $4
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS32-MSA-NEXT:    ldi.b $w2, -1
-; MIPS32-MSA-NEXT:    slli.w $w0, $w0, 31
-; MIPS32-MSA-NEXT:    srai.w $w0, $w0, 31
-; MIPS32-MSA-NEXT:    and.v $w1, $w0, $w1
-; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -424,16 +396,14 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
 ; MIPS64-MSA:       # %bb.0:
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS64-MSA-NEXT:    ldi.b $w2, -1
-; MIPS64-MSA-NEXT:    fill.w $w0, $1
-; MIPS64-MSA-NEXT:    slli.w $w0, $w0, 31
-; MIPS64-MSA-NEXT:    srai.w $w0, $w0, 31
-; MIPS64-MSA-NEXT:    and.v $w1, $w0, $w1
-; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -441,16 +411,14 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    fill.w $w0, $4
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS32-MSA-NEXT:    ldi.b $w2, -1
-; MIPS32-MSA-NEXT:    slli.w $w0, $w0, 31
-; MIPS32-MSA-NEXT:    srai.w $w0, $w0, 31
-; MIPS32-MSA-NEXT:    and.v $w1, $w0, $w1
-; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -466,21 +434,19 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
 define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_store:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    jr $ra
 ; MIPS64-MSA-NEXT:    st.w $w0, 0($9)
 ;
@@ -488,27 +454,25 @@ define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 40($sp)
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
 ; MIPS32-MSA-NEXT:    st.w $w0, 0($1)
   %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
@@ -521,31 +485,28 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a,
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_chain:
 ; MIPS64-MSA:       # %bb.0:
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $6
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    sll $1, $5, 0
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $9
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $7
 ; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS64-MSA-NEXT:    xor.v $w3, $w2, $w1
-; MIPS64-MSA-NEXT:    and.v $w0, $w3, $w0
-; MIPS64-MSA-NEXT:    insert.d $w3[0], $6
-; MIPS64-MSA-NEXT:    insert.d $w3[1], $7
-; MIPS64-MSA-NEXT:    shf.w $w3, $w3, 177
-; MIPS64-MSA-NEXT:    and.v $w2, $w2, $w3
-; MIPS64-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $10
 ; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $11
 ; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    and.v $w0, $w2, $w0
-; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $10
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $11
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -555,41 +516,38 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a,
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    lw $2, 40($sp)
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w3, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w3, $w0
-; MIPS32-MSA-NEXT:    insert.w $w3[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w3[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w3[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
-; MIPS32-MSA-NEXT:    insert.w $w3[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 44($sp)
-; MIPS32-MSA-NEXT:    and.v $w2, $w2, $w3
-; MIPS32-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $2
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    and.v $w0, $w2, $w0
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 48($sp)
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 52($sp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -607,20 +565,18 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w3, $1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.w $w3, $w3, 31
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
-; MIPS64-MSA-NEXT:    srai.w $w3, $w3, 31
 ; MIPS64-MSA-NEXT:    fadd.w $w2, $w1, $w0
 ; MIPS64-MSA-NEXT:    fsub.w $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    ldi.b $w1, -1
-; MIPS64-MSA-NEXT:    xor.v $w1, $w3, $w1
-; MIPS64-MSA-NEXT:    and.v $w2, $w3, $w2
-; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w0
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -631,11 +587,8 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.w $w3, $5
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
 ; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.w $w3, $w3, 31
-; MIPS32-MSA-NEXT:    srai.w $w3, $w3, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
@@ -647,11 +600,12 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
 ; MIPS32-MSA-NEXT:    fadd.w $w2, $w1, $w0
 ; MIPS32-MSA-NEXT:    fsub.w $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
-; MIPS32-MSA-NEXT:    xor.v $w1, $w3, $w1
-; MIPS32-MSA-NEXT:    and.v $w2, $w3, $w2
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w0
+; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
 ; MIPS32-MSA-NEXT:    st.w $w0, 0($4)
   %sum = fadd <4 x float> %x, %y
@@ -664,36 +618,32 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_mixed:
 ; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
+; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ld.w $w0, 0($5)
-; MIPS64-MSA-NEXT:    ldi.b $w2, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    addvi.w $w0, $w0, 1
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS64-MSA-NEXT:    addvi.w $w2, $w2, 2
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    addvi.w $w0, $w0, 2
+; MIPS64-MSA-NEXT:    addvi.w $w1, $w1, 1
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    jr $ra
 ; MIPS64-MSA-NEXT:    st.w $w0, 0($7)
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v4i32_mixed:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    ld.w $w0, 0($5)
-; MIPS32-MSA-NEXT:    fill.w $w1, $4
-; MIPS32-MSA-NEXT:    ldi.b $w2, -1
-; MIPS32-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS32-MSA-NEXT:    addvi.w $w0, $w0, 1
-; MIPS32-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS32-MSA-NEXT:    addvi.w $w2, $w2, 2
+; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
+; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    addvi.w $w0, $w0, 2
+; MIPS32-MSA-NEXT:    addvi.w $w1, $w1, 1
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
 ; MIPS32-MSA-NEXT:    st.w $w0, 0($7)
   %a = load <4 x i32>, ptr %p1, align 16
@@ -709,21 +659,19 @@ define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
 define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_args:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -733,26 +681,24 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b)
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -766,21 +712,19 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b)
 define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_multi_use:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    addv.w $w0, $w0, $w0
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
@@ -791,26 +735,24 @@ define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    addv.w $w0, $w0, $w0
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback.ll b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
index d89d7fc698712..6a61412367f76 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
@@ -11,7 +11,7 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
 ; M32-NEXT:    negu $2, $2
 ; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $2, $1, $6
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_i8:
 ; M64:       # %bb.0:
@@ -23,7 +23,7 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
 ; M64-NEXT:    and $1, $2, $1
 ; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    xor $2, $1, $2
+; M64-NEXT:    xor $2, $2, $1
   %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
   ret i8 %result
 }
@@ -36,7 +36,7 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
 ; M32-NEXT:    negu $2, $2
 ; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $2, $1, $6
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_i16:
 ; M64:       # %bb.0:
@@ -48,7 +48,7 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
 ; M64-NEXT:    and $1, $2, $1
 ; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    xor $2, $1, $2
+; M64-NEXT:    xor $2, $2, $1
   %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
   ret i16 %result
 }
@@ -56,26 +56,24 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
 define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_i32:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_i32:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    xor $2, $5, $6
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -88,22 +86,21 @@ define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
 ; M32-NEXT:    negu $3, $3
 ; M32-NEXT:    xor $2, $6, $1
 ; M32-NEXT:    and $2, $2, $3
-; M32-NEXT:    xor $2, $2, $1
+; M32-NEXT:    xor $2, $1, $2
 ; M32-NEXT:    lw $1, 20($sp)
 ; M32-NEXT:    xor $4, $7, $1
 ; M32-NEXT:    and $3, $4, $3
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $3, $3, $1
+; M32-NEXT:    xor $3, $1, $3
 ;
 ; M64-LABEL: test_ctselect_i64:
 ; M64:       # %bb.0:
-; M64-NEXT:    andi $1, $4, 1
-; M64-NEXT:    dnegu $2, $1
-; M64-NEXT:    daddiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $5
-; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    andi $2, $4, 1
+; M64-NEXT:    xor $1, $5, $6
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $6, $1
   %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
   ret i64 %result
 }
@@ -111,23 +108,21 @@ define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
 define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
 ; M32-LABEL: test_ctselect_ptr:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_ptr:
 ; M64:       # %bb.0:
-; M64-NEXT:    andi $1, $4, 1
-; M64-NEXT:    dnegu $2, $1
-; M64-NEXT:    daddiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $5
-; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    andi $2, $4, 1
+; M64-NEXT:    xor $1, $5, $6
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $6, $1
   %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
   ret ptr %result
 }
@@ -151,13 +146,12 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_const_false:
 ; M32:       # %bb.0:
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $zero, $5
+; M32-NEXT:    move $2, $5
 ;
 ; M64-LABEL: test_ctselect_const_false:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $zero, $1
+; M64-NEXT:    sll $2, $5, 0
   %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -166,29 +160,27 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
 define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_icmp_eq:
 ; M32:       # %bb.0:
-; M32-NEXT:    xor $1, $4, $5
-; M32-NEXT:    sltu $1, $zero, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    xor $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    sltiu $2, $2, 1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_icmp_eq:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    sltu $1, $zero, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    sltiu $1, $1, 1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cond = icmp eq i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
@@ -197,29 +189,27 @@ define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_icmp_ne:
 ; M32:       # %bb.0:
-; M32-NEXT:    xor $1, $4, $5
-; M32-NEXT:    sltiu $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    xor $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    sltu $2, $zero, $2
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_icmp_ne:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    sltiu $1, $1, 1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    sltu $1, $zero, $1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cond = icmp ne i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
@@ -228,29 +218,25 @@ define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_icmp_slt:
 ; M32:       # %bb.0:
-; M32-NEXT:    slt $1, $4, $5
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    slt $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_icmp_slt:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    slt $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    xori $1, $1, 1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cond = icmp slt i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
@@ -259,29 +245,25 @@ define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_icmp_ult:
 ; M32:       # %bb.0:
-; M32-NEXT:    sltu $1, $4, $5
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    sltu $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_icmp_ult:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    sltu $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    xori $1, $1, 1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cond = icmp ult i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
@@ -291,28 +273,26 @@ define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
 ; M32-LABEL: test_ctselect_load:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    lw $2, 0($6)
 ; M32-NEXT:    lw $3, 0($5)
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $3
-; M32-NEXT:    lw $3, 0($6)
-; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    xor $3, $3, $2
+; M32-NEXT:    and $1, $3, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_load:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    lw $3, 0($5)
-; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    sll $3, $4, 0
+; M64-NEXT:    lw $1, 0($6)
+; M64-NEXT:    lw $2, 0($5)
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    xor $2, $2, $1
+; M64-NEXT:    negu $3, $3
 ; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    lw $3, 0($6)
-; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %a = load i32, ptr %p1
   %b = load i32, ptr %p2
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
@@ -323,41 +303,37 @@ define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
 define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
 ; M32-LABEL: test_ctselect_nested:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $5, 1
+; M32-NEXT:    andi $2, $5, 1
+; M32-NEXT:    xor $1, $6, $7
 ; M32-NEXT:    andi $3, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    negu $4, $3
-; M32-NEXT:    and $2, $2, $6
-; M32-NEXT:    and $1, $1, $7
-; M32-NEXT:    or $1, $2, $1
-; M32-NEXT:    addiu $2, $3, -1
-; M32-NEXT:    lw $3, 16($sp)
-; M32-NEXT:    and $1, $4, $1
-; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    lw $2, 16($sp)
+; M32-NEXT:    xor $1, $7, $1
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    and $1, $1, $3
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $2
+; M32-NEXT:    xor $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_nested:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    sll $4, $4, 0
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    sll $3, $4, 0
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    andi $4, $4, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    negu $5, $4
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $7, 0
-; M64-NEXT:    and $1, $1, $3
-; M64-NEXT:    addiu $3, $4, -1
-; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    negu $3, $3
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
+; M64-NEXT:    xor $1, $2, $1
 ; M64-NEXT:    sll $2, $8, 0
-; M64-NEXT:    and $1, $5, $1
-; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    xor $1, $1, $2
+; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $2, $1
   %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
   %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
   ret i32 %result
diff --git a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
index 6cfa07afdd51e..069100e2d2a79 100644
--- a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
@@ -38,26 +38,24 @@ define i32 @test_constant_fold() {
 define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
 ; M32-LABEL: test_protected_no_branch:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_protected_no_branch:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    xor $2, $5, $6
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
 }

>From bce028978e479c7446be9378a19f33fdb628d44d Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Fri, 22 May 2026 12:18:41 -0400
Subject: [PATCH 4/4] [MIPS] Regen ctselect tests, drop MSA cases unsupported
 by new legalizer

The core PR's new CT_SELECT expansion normalizes the scalar mask to the
vector element type. On MIPS that element type is not always a legal
scalar register (i16, i8 on any MIPS; i64/double on mips32), triggering
"Unexpected illegal type!" in LegalizeOp.

Drop <8 x i16>, <16 x i8>, <2 x i64>, <2 x double> from the MSA fallback
vector test pending a fix in lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
(see TODO at top of test). Regen CHECK lines for everything else.
---
 .../Mips/ctselect-fallback-edge-cases.ll      |  54 +--
 .../Mips/ctselect-fallback-patterns.ll        |  60 +--
 .../CodeGen/Mips/ctselect-fallback-vector.ll  | 406 ++++--------------
 llvm/test/CodeGen/Mips/ctselect-fallback.ll   |  53 +--
 4 files changed, 177 insertions(+), 396 deletions(-)

diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
index 401a742c27eae..65b6a69597d93 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
@@ -36,16 +36,18 @@ define i32 @test_ctselect_extremal_values(i1 %cond) {
 ; M32:       # %bb.0:
 ; M32-NEXT:    andi $1, $4, 1
 ; M32-NEXT:    lui $2, 32768
+; M32-NEXT:    negu $1, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    subu $2, $2, $1
+; M32-NEXT:    xor $2, $1, $2
 ;
 ; M64-LABEL: test_ctselect_extremal_values:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
 ; M64-NEXT:    lui $2, 32768
 ; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $1, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    subu $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
   ret i32 %result
 }
@@ -155,55 +157,55 @@ define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a,
 ; M32-NEXT:    andi $4, $6, 1
 ; M32-NEXT:    lw $6, 28($sp)
 ; M32-NEXT:    negu $3, $3
+; M32-NEXT:    negu $4, $4
 ; M32-NEXT:    xor $2, $2, $1
 ; M32-NEXT:    and $2, $2, $3
 ; M32-NEXT:    andi $3, $5, 1
-; M32-NEXT:    lw $5, 32($sp)
+; M32-NEXT:    andi $5, $7, 1
+; M32-NEXT:    lw $7, 24($sp)
 ; M32-NEXT:    xor $1, $1, $2
-; M32-NEXT:    lw $2, 24($sp)
 ; M32-NEXT:    negu $3, $3
-; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    lw $2, 32($sp)
+; M32-NEXT:    negu $5, $5
+; M32-NEXT:    xor $1, $1, $7
 ; M32-NEXT:    and $1, $1, $3
-; M32-NEXT:    andi $3, $7, 1
-; M32-NEXT:    xor $1, $2, $1
-; M32-NEXT:    negu $2, $4
-; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $1, $7, $1
 ; M32-NEXT:    xor $1, $1, $6
-; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    and $1, $1, $4
 ; M32-NEXT:    xor $1, $6, $1
-; M32-NEXT:    xor $1, $1, $5
-; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    and $1, $1, $5
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $2, $5, $1
+; M32-NEXT:    xor $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_deeply_nested:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
 ; M64-NEXT:    xor $2, $8, $9
-; M64-NEXT:    sll $5, $5, 0
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    sll $6, $11, 0
+; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    sll $5, $11, 0
 ; M64-NEXT:    sll $4, $7, 0
 ; M64-NEXT:    lw $7, 0($sp)
 ; M64-NEXT:    andi $1, $1, 1
 ; M64-NEXT:    sll $2, $2, 0
-; M64-NEXT:    andi $5, $5, 1
 ; M64-NEXT:    andi $3, $3, 1
 ; M64-NEXT:    andi $4, $4, 1
 ; M64-NEXT:    negu $1, $1
-; M64-NEXT:    negu $5, $5
+; M64-NEXT:    negu $3, $3
 ; M64-NEXT:    negu $4, $4
 ; M64-NEXT:    and $1, $2, $1
-; M64-NEXT:    sll $2, $9, 0
-; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $10, 0
-; M64-NEXT:    xor $1, $1, $2
-; M64-NEXT:    and $1, $1, $5
-; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    negu $2, $3
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sll $6, $9, 0
+; M64-NEXT:    xor $1, $6, $1
+; M64-NEXT:    sll $6, $10, 0
+; M64-NEXT:    andi $2, $2, 1
 ; M64-NEXT:    xor $1, $1, $6
-; M64-NEXT:    and $1, $1, $2
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    xor $1, $6, $1
+; M64-NEXT:    xor $1, $1, $5
+; M64-NEXT:    and $1, $1, $2
+; M64-NEXT:    xor $1, $5, $1
 ; M64-NEXT:    xor $1, $1, $7
 ; M64-NEXT:    and $1, $1, $4
 ; M64-NEXT:    jr $ra
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
index a1c5d524c6939..dd15e5d4e57a0 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
@@ -244,13 +244,17 @@ define i32 @test_ctselect_zero_extend(i32 %x) {
 define i32 @test_ctselect_constant_folding_true(i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_constant_folding_true:
 ; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    move $2, $4
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_constant_folding_true:
 ; M64:       # %bb.0:
+; M64-NEXT:    xor $1, $4, $5
+; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    sll $1, $1, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    xor $2, $2, $1
   %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -259,12 +263,13 @@ define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_constant_folding_false:
 ; M32:       # %bb.0:
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    move $2, $5
+; M32-NEXT:    xor $2, $5, $zero
 ;
 ; M64-LABEL: test_ctselect_constant_folding_false:
 ; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    xor $2, $1, $zero
   %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -274,12 +279,13 @@ define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
 ; M32-LABEL: test_ctselect_identical_operands:
 ; M32:       # %bb.0:
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    move $2, $5
+; M32-NEXT:    xor $2, $5, $zero
 ;
 ; M64-LABEL: test_ctselect_identical_operands:
 ; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    xor $2, $1, $zero
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
   ret i32 %result
 }
@@ -321,49 +327,49 @@ define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c,
 ; M32:       # %bb.0:
 ; M32-NEXT:    lw $1, 16($sp)
 ; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    andi $4, $6, 1
 ; M32-NEXT:    negu $3, $3
+; M32-NEXT:    negu $4, $4
 ; M32-NEXT:    xor $2, $7, $1
 ; M32-NEXT:    and $2, $2, $3
 ; M32-NEXT:    andi $3, $5, 1
+; M32-NEXT:    lw $5, 20($sp)
 ; M32-NEXT:    xor $1, $1, $2
-; M32-NEXT:    lw $2, 20($sp)
 ; M32-NEXT:    negu $3, $3
-; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    lw $2, 24($sp)
+; M32-NEXT:    xor $1, $1, $5
 ; M32-NEXT:    and $1, $1, $3
-; M32-NEXT:    lw $3, 24($sp)
-; M32-NEXT:    xor $1, $2, $1
-; M32-NEXT:    andi $2, $6, 1
-; M32-NEXT:    xor $1, $1, $3
-; M32-NEXT:    negu $2, $2
-; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    xor $1, $5, $1
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    and $1, $1, $4
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $2, $3, $1
+; M32-NEXT:    xor $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_chain:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
 ; M64-NEXT:    xor $2, $7, $8
 ; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    sll $4, $8, 0
+; M64-NEXT:    sll $5, $9, 0
 ; M64-NEXT:    andi $1, $1, 1
 ; M64-NEXT:    sll $2, $2, 0
-; M64-NEXT:    andi $3, $3, 1
 ; M64-NEXT:    negu $1, $1
-; M64-NEXT:    negu $3, $3
 ; M64-NEXT:    and $1, $2, $1
-; M64-NEXT:    sll $2, $8, 0
-; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $9, 0
-; M64-NEXT:    xor $1, $1, $2
-; M64-NEXT:    and $1, $1, $3
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    xor $1, $2, $1
 ; M64-NEXT:    andi $2, $3, 1
-; M64-NEXT:    sll $3, $10, 0
-; M64-NEXT:    xor $1, $1, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    xor $1, $4, $1
 ; M64-NEXT:    negu $2, $2
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    sll $4, $10, 0
+; M64-NEXT:    xor $1, $1, $5
+; M64-NEXT:    negu $3, $3
 ; M64-NEXT:    and $1, $1, $2
+; M64-NEXT:    xor $1, $5, $1
+; M64-NEXT:    xor $1, $1, $4
+; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    xor $2, $3, $1
+; M64-NEXT:    xor $2, $4, $1
   %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
   %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
   %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
index 302e06b0a7335..f7b6aac3e7f25 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
@@ -2,6 +2,13 @@
 ; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r6 -mattr=+msa -O3 | FileCheck %s --check-prefix=MIPS64-MSA
 ; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r6 -mattr=+msa -O3 | FileCheck %s --check-prefix=MIPS32-MSA
 
+; TODO: <8 x i16>, <16 x i8>, <2 x i64>, <2 x double> MSA cases crash in the
+; new legalizer. The CT_SELECT expansion normalizes the scalar mask to the
+; vector element type (i16/i8/i64/double here), which is not always a legal
+; scalar register on MIPS. Fix needed in lib/CodeGen/SelectionDAG/LegalizeDAG.cpp.
+; vector element type (i16/i8 here), which is not a legal scalar register
+; on MIPS. Fix needed in lib/CodeGen/SelectionDAG/LegalizeDAG.cpp.
+
 ; Test 32-bit integer vector (128 bits)
 define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32:
@@ -9,14 +16,14 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    negu $1, $1
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
@@ -29,11 +36,8 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.w $w2, $4
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
 ; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
@@ -43,6 +47,9 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    andi $1, $4, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
@@ -56,187 +63,10 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 }
 
 ; Test 16-bit integer vector (8 x i16 = 128-bit)
-define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
-; MIPS64-MSA-LABEL: test_ctselect_v8i16:
-; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
-; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
-; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.h $w2, $1
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
-; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.h $w2, $w2, 15
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    srai.h $w2, $w2, 15
-; MIPS64-MSA-NEXT:    shf.h $w0, $w0, 27
-; MIPS64-MSA-NEXT:    shf.h $w1, $w1, 27
-; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS64-MSA-NEXT:    shf.h $w0, $w0, 27
-; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
-; MIPS64-MSA-NEXT:    jr $ra
-; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
-;
-; MIPS32-MSA-LABEL: test_ctselect_v8i16:
-; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    lw $2, 24($sp)
-; MIPS32-MSA-NEXT:    lw $1, 28($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.h $w2, $4
-; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.h $w2, $w2, 15
-; MIPS32-MSA-NEXT:    srai.h $w2, $w2, 15
-; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
-; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 36($sp)
-; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
-; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 20($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS32-MSA-NEXT:    shf.h $w0, $w0, 177
-; MIPS32-MSA-NEXT:    shf.h $w1, $w1, 177
-; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS32-MSA-NEXT:    shf.h $w0, $w0, 177
-; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
-; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
-; MIPS32-MSA-NEXT:    jr $ra
-; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
-  %result = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b)
-  ret <8 x i16> %result
-}
 
 ; Test byte vector (16 x i8 = 128-bit)
-define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
-; MIPS64-MSA-LABEL: test_ctselect_v16i8:
-; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
-; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
-; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.b $w2, $1
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
-; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.b $w2, $w2, 7
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
-; MIPS64-MSA-NEXT:    srai.b $w2, $w2, 7
-; MIPS64-MSA-NEXT:    shf.b $w1, $w1, 27
-; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
-; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
-; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
-; MIPS64-MSA-NEXT:    jr $ra
-; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
-;
-; MIPS32-MSA-LABEL: test_ctselect_v16i8:
-; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    lw $2, 24($sp)
-; MIPS32-MSA-NEXT:    lw $1, 28($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.b $w2, $4
-; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.b $w2, $w2, 7
-; MIPS32-MSA-NEXT:    srai.b $w2, $w2, 7
-; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
-; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 36($sp)
-; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
-; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 20($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
-; MIPS32-MSA-NEXT:    shf.b $w1, $w1, 27
-; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
-; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
-; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
-; MIPS32-MSA-NEXT:    jr $ra
-; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
-  %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b)
-  ret <16 x i8> %result
-}
 
 ; Test 64-bit integer vector (2 x i64 = 128-bit)
-define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
-; MIPS64-MSA-LABEL: test_ctselect_v2i64:
-; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
-; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
-; MIPS64-MSA-NEXT:    fill.d $w2, $4
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
-; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
-; MIPS64-MSA-NEXT:    jr $ra
-; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
-;
-; MIPS32-MSA-LABEL: test_ctselect_v2i64:
-; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    addiu $sp, $sp, -32
-; MIPS32-MSA-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32-MSA-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32-MSA-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32-MSA-NEXT:    .cfi_offset 31, -4
-; MIPS32-MSA-NEXT:    .cfi_offset 30, -8
-; MIPS32-MSA-NEXT:    move $fp, $sp
-; MIPS32-MSA-NEXT:    .cfi_def_cfa_register 30
-; MIPS32-MSA-NEXT:    addiu $1, $zero, -16
-; MIPS32-MSA-NEXT:    and $sp, $sp, $1
-; MIPS32-MSA-NEXT:    lw $2, 56($fp)
-; MIPS32-MSA-NEXT:    lw $1, 60($fp)
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    sw $4, 12($sp)
-; MIPS32-MSA-NEXT:    sw $4, 4($sp)
-; MIPS32-MSA-NEXT:    ld.d $w2, 0($sp)
-; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.d $w2, $w2, 63
-; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
-; MIPS32-MSA-NEXT:    lw $1, 64($fp)
-; MIPS32-MSA-NEXT:    srai.d $w2, $w2, 63
-; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 68($fp)
-; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
-; MIPS32-MSA-NEXT:    lw $1, 48($fp)
-; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 52($fp)
-; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS32-MSA-NEXT:    shf.w $w1, $w1, 177
-; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
-; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
-; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
-; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
-; MIPS32-MSA-NEXT:    move $sp, $fp
-; MIPS32-MSA-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32-MSA-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32-MSA-NEXT:    jr $ra
-; MIPS32-MSA-NEXT:    addiu $sp, $sp, 32
-  %result = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %result
-}
 
 ; Test single-precision float vector (4 x float = 128-bit)
 define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) {
@@ -245,14 +75,14 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    negu $1, $1
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
@@ -265,11 +95,8 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.w $w2, $5
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
 ; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
@@ -279,6 +106,9 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    andi $1, $5, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
@@ -289,68 +119,6 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
 }
 
 ; Test double-precision float vector (2 x double = 128-bit)
-define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) {
-; MIPS64-MSA-LABEL: test_ctselect_v2f64:
-; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
-; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
-; MIPS64-MSA-NEXT:    fill.d $w2, $4
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
-; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
-; MIPS64-MSA-NEXT:    jr $ra
-; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
-;
-; MIPS32-MSA-LABEL: test_ctselect_v2f64:
-; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    addiu $sp, $sp, -32
-; MIPS32-MSA-NEXT:    .cfi_def_cfa_offset 32
-; MIPS32-MSA-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
-; MIPS32-MSA-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
-; MIPS32-MSA-NEXT:    .cfi_offset 31, -4
-; MIPS32-MSA-NEXT:    .cfi_offset 30, -8
-; MIPS32-MSA-NEXT:    move $fp, $sp
-; MIPS32-MSA-NEXT:    .cfi_def_cfa_register 30
-; MIPS32-MSA-NEXT:    addiu $1, $zero, -16
-; MIPS32-MSA-NEXT:    and $sp, $sp, $1
-; MIPS32-MSA-NEXT:    lw $2, 56($fp)
-; MIPS32-MSA-NEXT:    lw $1, 60($fp)
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    sw $5, 12($sp)
-; MIPS32-MSA-NEXT:    sw $5, 4($sp)
-; MIPS32-MSA-NEXT:    ld.d $w2, 0($sp)
-; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.d $w2, $w2, 63
-; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
-; MIPS32-MSA-NEXT:    lw $1, 64($fp)
-; MIPS32-MSA-NEXT:    srai.d $w2, $w2, 63
-; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 68($fp)
-; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
-; MIPS32-MSA-NEXT:    lw $1, 48($fp)
-; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 52($fp)
-; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS32-MSA-NEXT:    shf.w $w1, $w1, 177
-; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS32-MSA-NEXT:    st.d $w0, 0($4)
-; MIPS32-MSA-NEXT:    move $sp, $fp
-; MIPS32-MSA-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
-; MIPS32-MSA-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
-; MIPS32-MSA-NEXT:    jr $ra
-; MIPS32-MSA-NEXT:    addiu $sp, $sp, 32
-  %result = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b)
-  ret <2 x double> %result
-}
 
 ; Test with aligned loads (common case)
 define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
@@ -359,10 +127,10 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
 ; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
+; MIPS64-MSA-NEXT:    negu $1, $1
 ; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
@@ -372,12 +140,12 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v4i32_aligned_load:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    andi $1, $4, 1
 ; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    negu $1, $1
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
@@ -398,10 +166,10 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
 ; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
+; MIPS64-MSA-NEXT:    negu $1, $1
 ; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
@@ -411,12 +179,12 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    andi $1, $4, 1
 ; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    negu $1, $1
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
@@ -437,14 +205,14 @@ define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    negu $1, $1
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -455,11 +223,8 @@ define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.w $w2, $4
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
 ; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
@@ -469,8 +234,11 @@ define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    lw $1, 40($sp)
+; MIPS32-MSA-NEXT:    andi $1, $4, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    lw $1, 40($sp)
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
@@ -487,22 +255,22 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a,
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $6
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
-; MIPS64-MSA-NEXT:    sll $1, $5, 0
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $9
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $7
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    negu $1, $1
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    sll $1, $5, 0
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
+; MIPS64-MSA-NEXT:    negu $1, $1
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $10
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $11
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
@@ -517,12 +285,10 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a,
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    lw $3, 40($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
 ; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    lw $2, 40($sp)
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    lw $2, 44($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
@@ -532,19 +298,21 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a,
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    lw $1, 44($sp)
+; MIPS32-MSA-NEXT:    andi $1, $4, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    andi $1, $5, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $2
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $1
-; MIPS32-MSA-NEXT:    lw $1, 48($sp)
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 52($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $3
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $2
+; MIPS32-MSA-NEXT:    lw $2, 48($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $2
+; MIPS32-MSA-NEXT:    lw $2, 52($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
@@ -565,16 +333,16 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    negu $1, $1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    fadd.w $w2, $w1, $w0
 ; MIPS64-MSA-NEXT:    fsub.w $w0, $w1, $w0
 ; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w0
 ; MIPS64-MSA-NEXT:    fill.w $w2, $1
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
@@ -598,12 +366,12 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    andi $1, $5, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
 ; MIPS32-MSA-NEXT:    fadd.w $w2, $w1, $w0
 ; MIPS32-MSA-NEXT:    fsub.w $w0, $w1, $w0
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w0
-; MIPS32-MSA-NEXT:    fill.w $w2, $5
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
@@ -621,12 +389,12 @@ define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
 ; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
+; MIPS64-MSA-NEXT:    negu $1, $1
 ; MIPS64-MSA-NEXT:    addvi.w $w0, $w0, 2
 ; MIPS64-MSA-NEXT:    addvi.w $w1, $w1, 1
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -636,11 +404,11 @@ define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    andi $1, $4, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
 ; MIPS32-MSA-NEXT:    addvi.w $w0, $w0, 2
 ; MIPS32-MSA-NEXT:    addvi.w $w1, $w1, 1
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
@@ -662,14 +430,14 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b)
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    negu $1, $1
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
@@ -682,11 +450,8 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b)
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.w $w2, $4
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
 ; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
@@ -696,6 +461,9 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b)
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    andi $1, $4, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
@@ -715,14 +483,14 @@ define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    andi $1, $1, 1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
-; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    negu $1, $1
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    addv.w $w0, $w0, $w0
@@ -736,11 +504,8 @@ define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.w $w2, $4
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
 ; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
@@ -750,6 +515,9 @@ define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    andi $1, $4, 1
+; MIPS32-MSA-NEXT:    negu $1, $1
+; MIPS32-MSA-NEXT:    fill.w $w2, $1
 ; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback.ll b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
index 6a61412367f76..8e0fe458712f9 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
@@ -81,17 +81,17 @@ define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
 define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
 ; M32-LABEL: test_ctselect_i64:
 ; M32:       # %bb.0:
-; M32-NEXT:    lw $1, 16($sp)
 ; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    lw $1, 16($sp)
+; M32-NEXT:    lw $4, 20($sp)
 ; M32-NEXT:    negu $3, $3
 ; M32-NEXT:    xor $2, $6, $1
+; M32-NEXT:    xor $5, $7, $4
 ; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    and $3, $5, $3
 ; M32-NEXT:    xor $2, $1, $2
-; M32-NEXT:    lw $1, 20($sp)
-; M32-NEXT:    xor $4, $7, $1
-; M32-NEXT:    and $3, $4, $3
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $3, $1, $3
+; M32-NEXT:    xor $3, $4, $3
 ;
 ; M64-LABEL: test_ctselect_i64:
 ; M64:       # %bb.0:
@@ -131,13 +131,17 @@ define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
 define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_const_true:
 ; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    move $2, $4
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_const_true:
 ; M64:       # %bb.0:
+; M64-NEXT:    xor $1, $4, $5
+; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    sll $1, $1, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    xor $2, $2, $1
   %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -146,12 +150,13 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_const_false:
 ; M32:       # %bb.0:
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    move $2, $5
+; M32-NEXT:    xor $2, $5, $zero
 ;
 ; M64-LABEL: test_ctselect_const_false:
 ; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    xor $2, $1, $zero
   %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -305,35 +310,35 @@ define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
 ; M32:       # %bb.0:
 ; M32-NEXT:    andi $2, $5, 1
 ; M32-NEXT:    xor $1, $6, $7
-; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    lw $3, 16($sp)
 ; M32-NEXT:    negu $2, $2
-; M32-NEXT:    negu $3, $3
 ; M32-NEXT:    and $1, $1, $2
-; M32-NEXT:    lw $2, 16($sp)
+; M32-NEXT:    andi $2, $4, 1
 ; M32-NEXT:    xor $1, $7, $1
-; M32-NEXT:    xor $1, $1, $2
-; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    xor $1, $1, $3
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $2, $2, $1
+; M32-NEXT:    xor $2, $3, $1
 ;
 ; M64-LABEL: test_ctselect_nested:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    xor $2, $6, $7
-; M64-NEXT:    sll $3, $4, 0
+; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    andi $1, $1, 1
 ; M64-NEXT:    sll $2, $2, 0
-; M64-NEXT:    andi $3, $3, 1
 ; M64-NEXT:    negu $1, $1
-; M64-NEXT:    negu $3, $3
 ; M64-NEXT:    and $1, $2, $1
-; M64-NEXT:    sll $2, $7, 0
-; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $8, 0
-; M64-NEXT:    xor $1, $1, $2
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    andi $2, $2, 1
+; M64-NEXT:    xor $1, $3, $1
+; M64-NEXT:    sll $3, $8, 0
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    xor $1, $1, $3
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    xor $2, $2, $1
+; M64-NEXT:    xor $2, $3, $1
   %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
   %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
   ret i32 %result