[llvm-branch-commits] [llvm] [ConstantTime][MIPS] Add comprehensive tests for ct.select (PR #166705)

Sat Mar 7 13:40:40 PST 2026

https://github.com/wizardengineer updated https://github.com/llvm/llvm-project/pull/166705

>From 89b18c0965ccec867ad8e234c77ac7f83c7edfc5 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 11:01:26 -0500
Subject: [PATCH 1/2] [LLVM][MIPS] Add comprehensive tests for ct.select

---
 .../Mips/ctselect-fallback-edge-cases.ll      | 244 +++++
 .../Mips/ctselect-fallback-patterns.ll        | 426 +++++++++
 .../CodeGen/Mips/ctselect-fallback-vector.ll  | 830 ++++++++++++++++++
 llvm/test/CodeGen/Mips/ctselect-fallback.ll   | 371 ++++++++
 .../CodeGen/Mips/ctselect-side-effects.ll     | 183 ++++
 5 files changed, 2054 insertions(+)
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-fallback.ll
 create mode 100644 llvm/test/CodeGen/Mips/ctselect-side-effects.ll

diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
new file mode 100644
index 0000000000000..f1831a625d4a4
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
@@ -0,0 +1,244 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64
+
+; Portable edge case tests
+
+; Test with small integer types
+define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
+; M32-LABEL: test_ctselect_i1:
+; M32:       # %bb.0:
+; M32-NEXT:    xori $2, $4, 1
+; M32-NEXT:    and $1, $4, $5
+; M32-NEXT:    and $2, $2, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $2
+;
+; M64-LABEL: test_ctselect_i1:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $1, $6, 0
+; M64-NEXT:    xori $2, $2, 1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    and $2, $4, $5
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
+  ret i1 %result
+}
+
+; Test with extremal values
+define i32 @test_ctselect_extremal_values(i1 %cond) {
+; M32-LABEL: test_ctselect_extremal_values:
+; M32:       # %bb.0:
+; M32-NEXT:    lui $3, 32767
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    ori $3, $3, 65535
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    lui $3, 32768
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_extremal_values:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    lui $3, 32767
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    ori $3, $3, 65535
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    lui $3, 32768
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
+  ret i32 %result
+}
+
+; Test with null pointers
+define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
+; M32-LABEL: test_ctselect_null_ptr:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    and $2, $1, $5
+;
+; M64-LABEL: test_ctselect_null_ptr:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $1, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    and $2, $1, $5
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
+  ret ptr %result
+}
+
+; Test with function pointers
+define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
+; M32-LABEL: test_ctselect_function_ptr:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_function_ptr:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $2, $1
+; M64-NEXT:    daddiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $5
+; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
+  ret ptr %result
+}
+
+; Test with condition from icmp on pointers
+define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
+; M32-LABEL: test_ctselect_ptr_cmp:
+; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    sltu $1, $zero, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_ptr_cmp:
+; M64:       # %bb.0:
+; M64-NEXT:    xor $1, $4, $5
+; M64-NEXT:    daddiu $3, $zero, -1
+; M64-NEXT:    daddiu $2, $zero, -1
+; M64-NEXT:    movn $3, $zero, $1
+; M64-NEXT:    xor $2, $3, $2
+; M64-NEXT:    and $1, $3, $6
+; M64-NEXT:    and $2, $2, $7
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %cmp = icmp eq ptr %p1, %p2
+  %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with struct pointer types
+%struct.pair = type { i32, i32 }
+
+define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
+; M32-LABEL: test_ctselect_struct_ptr:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_struct_ptr:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $2, $1
+; M64-NEXT:    daddiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $5
+; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with deeply nested conditions
+define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; M32-LABEL: test_ctselect_deeply_nested:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    lw $3, 16($sp)
+; M32-NEXT:    lw $9, 32($sp)
+; M32-NEXT:    lw $8, 28($sp)
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    lw $3, 20($sp)
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    andi $3, $5, 1
+; M32-NEXT:    or $1, $2, $1
+; M32-NEXT:    andi $2, $6, 1
+; M32-NEXT:    andi $6, $7, 1
+; M32-NEXT:    negu $4, $3
+; M32-NEXT:    addiu $3, $3, -1
+; M32-NEXT:    addiu $7, $6, -1
+; M32-NEXT:    and $1, $4, $1
+; M32-NEXT:    addiu $5, $2, -1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    negu $6, $6
+; M32-NEXT:    and $4, $7, $9
+; M32-NEXT:    lw $7, 24($sp)
+; M32-NEXT:    and $5, $5, $8
+; M32-NEXT:    and $3, $3, $7
+; M32-NEXT:    or $1, $1, $3
+; M32-NEXT:    and $1, $2, $1
+; M32-NEXT:    or $1, $1, $5
+; M32-NEXT:    and $1, $6, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $4
+;
+; M64-LABEL: test_ctselect_deeply_nested:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $8, 0
+; M64-NEXT:    sll $4, $5, 0
+; M64-NEXT:    lw $8, 0($sp)
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    andi $4, $4, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    negu $5, $4
+; M64-NEXT:    addiu $4, $4, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $9, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $3, $11, 0
+; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sll $6, $7, 0
+; M64-NEXT:    andi $2, $2, 1
+; M64-NEXT:    and $1, $5, $1
+; M64-NEXT:    andi $6, $6, 1
+; M64-NEXT:    addiu $5, $2, -1
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    addiu $7, $6, -1
+; M64-NEXT:    negu $6, $6
+; M64-NEXT:    and $3, $5, $3
+; M64-NEXT:    sll $5, $10, 0
+; M64-NEXT:    and $7, $7, $8
+; M64-NEXT:    and $4, $4, $5
+; M64-NEXT:    or $1, $1, $4
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    or $1, $1, $3
+; M64-NEXT:    and $1, $6, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $7
+  %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+  %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+  %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+  %sel4 = call i32 @llvm.ct.select.i32(i1 %c4, i32 %sel3, i32 %e)
+  ret i32 %sel4
+}
+
+; Declare the intrinsics
+declare i1 @llvm.ct.select.i1(i1, i1, i1)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
new file mode 100644
index 0000000000000..2e65e586ce5fa
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
@@ -0,0 +1,426 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64
+
+; Test smin(x, 0) pattern
+define i32 @test_ctselect_smin_zero(i32 %x) {
+; M32-LABEL: test_ctselect_smin_zero:
+; M32:       # %bb.0:
+; M32-NEXT:    sra $1, $4, 31
+; M32-NEXT:    jr $ra
+; M32-NEXT:    and $2, $1, $4
+;
+; M64-LABEL: test_ctselect_smin_zero:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sra $2, $1, 31
+; M64-NEXT:    jr $ra
+; M64-NEXT:    and $2, $2, $1
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+  ret i32 %result
+}
+
+; Test smax(x, 0) pattern
+define i32 @test_ctselect_smax_zero(i32 %x) {
+; M32-LABEL: test_ctselect_smax_zero:
+; M32:       # %bb.0:
+; M32-NEXT:    slti $1, $4, 1
+; M32-NEXT:    movn $4, $zero, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_ctselect_smax_zero:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    slti $1, $2, 1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    movn $2, $zero, $1
+  %cmp = icmp sgt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+  ret i32 %result
+}
+
+; Test generic smin pattern
+define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
+; M32-LABEL: test_ctselect_smin_generic:
+; M32:       # %bb.0:
+; M32-NEXT:    slt $1, $4, $5
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_smin_generic:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    slt $3, $2, $1
+; M64-NEXT:    xori $3, $3, 1
+; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cmp = icmp slt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test generic smax pattern
+define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
+; M32-LABEL: test_ctselect_smax_generic:
+; M32:       # %bb.0:
+; M32-NEXT:    slt $1, $5, $4
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_smax_generic:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    slt $3, $2, $1
+; M64-NEXT:    xori $3, $3, 1
+; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %cmp = icmp sgt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test umin pattern
+define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
+; M32-LABEL: test_ctselect_umin_generic:
+; M32:       # %bb.0:
+; M32-NEXT:    sltu $1, $4, $5
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_umin_generic:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sltu $3, $2, $1
+; M64-NEXT:    xori $3, $3, 1
+; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cmp = icmp ult i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test umax pattern
+define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
+; M32-LABEL: test_ctselect_umax_generic:
+; M32:       # %bb.0:
+; M32-NEXT:    sltu $1, $5, $4
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_umax_generic:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:    sltu $3, $2, $1
+; M64-NEXT:    xori $3, $3, 1
+; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %cmp = icmp ugt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+  ret i32 %result
+}
+
+; Test abs pattern
+define i32 @test_ctselect_abs(i32 %x) {
+; M32-LABEL: test_ctselect_abs:
+; M32:       # %bb.0:
+; M32-NEXT:    negu $1, $4
+; M32-NEXT:    sra $2, $4, 31
+; M32-NEXT:    and $1, $2, $1
+; M32-NEXT:    not $2, $2
+; M32-NEXT:    and $2, $2, $4
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $2
+;
+; M64-LABEL: test_ctselect_abs:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    sra $3, $1, 31
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    not $3, $3
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %neg = sub i32 0, %x
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x)
+  ret i32 %result
+}
+
+; Test nabs pattern (negative abs)
+define i32 @test_ctselect_nabs(i32 %x) {
+; M32-LABEL: test_ctselect_nabs:
+; M32:       # %bb.0:
+; M32-NEXT:    sra $1, $4, 31
+; M32-NEXT:    negu $3, $4
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_nabs:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sra $2, $1, 31
+; M64-NEXT:    and $3, $2, $1
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    not $2, $2
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $3, $1
+  %neg = sub i32 0, %x
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg)
+  ret i32 %result
+}
+
+; Test sign extension pattern
+define i32 @test_ctselect_sign_extend(i32 %x) {
+; M32-LABEL: test_ctselect_sign_extend:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    sra $2, $4, 31
+;
+; M64-LABEL: test_ctselect_sign_extend:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sra $2, $1, 31
+  %cmp = icmp slt i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0)
+  ret i32 %result
+}
+
+; Test zero extension pattern
+define i32 @test_ctselect_zero_extend(i32 %x) {
+; M32-LABEL: test_ctselect_zero_extend:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    sltu $2, $zero, $4
+;
+; M64-LABEL: test_ctselect_zero_extend:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sltu $2, $zero, $1
+  %cmp = icmp ne i32 %x, 0
+  %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 1, i32 0)
+  ret i32 %result
+}
+
+; Test constant folding with known condition
+define i32 @test_ctselect_constant_folding_true(i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_constant_folding_true:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_ctselect_constant_folding_true:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $4, 0
+  %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_constant_folding_false:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $zero, $5
+;
+; M64-LABEL: test_ctselect_constant_folding_false:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $zero, $1
+  %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with identical operands
+define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
+; M32-LABEL: test_ctselect_identical_operands:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_identical_operands:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
+  ret i32 %result
+}
+
+; Test with inverted condition
+define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_inverted_condition:
+; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    sltiu $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_inverted_condition:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sltiu $1, $1, 1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cmp = icmp eq i32 %x, %y
+  %not_cmp = xor i1 %cmp, true
+  %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test chain of ct.select operations
+define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) {
+; M32-LABEL: test_ctselect_chain:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    andi $3, $5, 1
+; M32-NEXT:    lw $5, 16($sp)
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    negu $4, $3
+; M32-NEXT:    addiu $3, $3, -1
+; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    and $2, $2, $7
+; M32-NEXT:    lw $5, 24($sp)
+; M32-NEXT:    or $1, $2, $1
+; M32-NEXT:    andi $2, $6, 1
+; M32-NEXT:    and $1, $4, $1
+; M32-NEXT:    addiu $4, $2, -1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $4, $4, $5
+; M32-NEXT:    lw $5, 20($sp)
+; M32-NEXT:    and $3, $3, $5
+; M32-NEXT:    or $1, $1, $3
+; M32-NEXT:    and $1, $2, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $4
+;
+; M64-LABEL: test_ctselect_chain:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    sll $4, $5, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    andi $4, $4, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    negu $5, $4
+; M64-NEXT:    addiu $4, $4, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $8, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    sll $6, $10, 0
+; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    and $1, $5, $1
+; M64-NEXT:    sll $5, $9, 0
+; M64-NEXT:    addiu $2, $3, -1
+; M64-NEXT:    negu $3, $3
+; M64-NEXT:    and $4, $4, $5
+; M64-NEXT:    and $2, $2, $6
+; M64-NEXT:    or $1, $1, $4
+; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+  %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+  %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+  ret i32 %sel3
+}
+
+; Test for 64-bit operations (supported on all 64-bit architectures)
+define i64 @test_ctselect_i64_smin_zero(i64 %x) {
+; M32-LABEL: test_ctselect_i64_smin_zero:
+; M32:       # %bb.0:
+; M32-NEXT:    sra $1, $5, 31
+; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    jr $ra
+; M32-NEXT:    and $3, $1, $5
+;
+; M64-LABEL: test_ctselect_i64_smin_zero:
+; M64:       # %bb.0:
+; M64-NEXT:    dsra $1, $4, 63
+; M64-NEXT:    jr $ra
+; M64-NEXT:    and $2, $1, $4
+  %cmp = icmp slt i64 %x, 0
+  %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
+  ret i64 %result
+}
+
+; Declare the intrinsics
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
new file mode 100644
index 0000000000000..6222f6052e12f
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
@@ -0,0 +1,830 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mips64-unknown-linux-gnu -mcpu=mips64r6 -mattr=+msa -O3 | FileCheck %s --check-prefix=MIPS64-MSA
+; RUN: llc < %s -mtriple=mips-unknown-linux-gnu -mcpu=mips32r6 -mattr=+msa -O3 | FileCheck %s --check-prefix=MIPS32-MSA
+
+; Test 32-bit integer vector (128 bits)
+define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test 16-bit integer vector (8 x i16 = 128-bit)
+define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v8i16:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.h $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.h $w1, $w1, 15
+; MIPS64-MSA-NEXT:    srai.h $w1, $w1, 15
+; MIPS64-MSA-NEXT:    shf.h $w2, $w2, 27
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.h $w2, $w2, 27
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.h $w0, $w0, 27
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v8i16:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.h $w1, $4
+; MIPS32-MSA-NEXT:    ldi.b $w0, -1
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
+; MIPS32-MSA-NEXT:    slli.h $w1, $w1, 15
+; MIPS32-MSA-NEXT:    srai.h $w1, $w1, 15
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    shf.h $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    shf.h $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.h $w0, $w0, 177
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %result = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b)
+  ret <8 x i16> %result
+}
+
+; Test byte vector (16 x i8 = 128-bit)
+define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v16i8:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    fill.b $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $6
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $8
+; MIPS64-MSA-NEXT:    slli.b $w2, $w2, 7
+; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS64-MSA-NEXT:    shf.b $w1, $w1, 27
+; MIPS64-MSA-NEXT:    srai.b $w2, $w2, 7
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    and.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    xori.b $w2, $w2, 255
+; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v16i8:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $6
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    fill.b $w2, $4
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $2
+; MIPS32-MSA-NEXT:    slli.b $w2, $w2, 7
+; MIPS32-MSA-NEXT:    srai.b $w2, $w2, 7
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    xori.b $w2, $w2, 255
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    shf.b $w1, $w1, 27
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b)
+  ret <16 x i8> %result
+}
+
+; Test 64-bit integer vector (2 x i64 = 128-bit)
+define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v2i64:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    fill.d $w2, $4
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    ldi.b $w1, -1
+; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v2i64:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    addiu $sp, $sp, -32
+; MIPS32-MSA-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32-MSA-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32-MSA-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
+; MIPS32-MSA-NEXT:    .cfi_offset 31, -4
+; MIPS32-MSA-NEXT:    .cfi_offset 30, -8
+; MIPS32-MSA-NEXT:    move $fp, $sp
+; MIPS32-MSA-NEXT:    .cfi_def_cfa_register 30
+; MIPS32-MSA-NEXT:    addiu $1, $zero, -16
+; MIPS32-MSA-NEXT:    and $sp, $sp, $1
+; MIPS32-MSA-NEXT:    lw $2, 56($fp)
+; MIPS32-MSA-NEXT:    lw $1, 60($fp)
+; MIPS32-MSA-NEXT:    sw $4, 12($sp)
+; MIPS32-MSA-NEXT:    sw $4, 4($sp)
+; MIPS32-MSA-NEXT:    ldi.b $w0, -1
+; MIPS32-MSA-NEXT:    ld.d $w1, 0($sp)
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
+; MIPS32-MSA-NEXT:    slli.d $w1, $w1, 63
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 64($fp)
+; MIPS32-MSA-NEXT:    srai.d $w1, $w1, 63
+; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 68($fp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 48($fp)
+; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 52($fp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+; MIPS32-MSA-NEXT:    move $sp, $fp
+; MIPS32-MSA-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
+; MIPS32-MSA-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    addiu $sp, $sp, 32
+  %result = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b)
+  ret <2 x i64> %result
+}
+
+; Test single-precision float vector (4 x float = 128-bit)
+define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v4f32:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4f32:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    st.w $w0, 0($4)
+  %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b)
+  ret <4 x float> %result
+}
+
+; Test double-precision float vector (2 x double = 128-bit)
+define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v2f64:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    fill.d $w2, $4
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    ldi.b $w1, -1
+; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v2f64:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    addiu $sp, $sp, -32
+; MIPS32-MSA-NEXT:    .cfi_def_cfa_offset 32
+; MIPS32-MSA-NEXT:    sw $ra, 28($sp) # 4-byte Folded Spill
+; MIPS32-MSA-NEXT:    sw $fp, 24($sp) # 4-byte Folded Spill
+; MIPS32-MSA-NEXT:    .cfi_offset 31, -4
+; MIPS32-MSA-NEXT:    .cfi_offset 30, -8
+; MIPS32-MSA-NEXT:    move $fp, $sp
+; MIPS32-MSA-NEXT:    .cfi_def_cfa_register 30
+; MIPS32-MSA-NEXT:    addiu $1, $zero, -16
+; MIPS32-MSA-NEXT:    and $sp, $sp, $1
+; MIPS32-MSA-NEXT:    lw $2, 56($fp)
+; MIPS32-MSA-NEXT:    lw $1, 60($fp)
+; MIPS32-MSA-NEXT:    sw $5, 12($sp)
+; MIPS32-MSA-NEXT:    sw $5, 4($sp)
+; MIPS32-MSA-NEXT:    ldi.b $w0, -1
+; MIPS32-MSA-NEXT:    ld.d $w1, 0($sp)
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
+; MIPS32-MSA-NEXT:    slli.d $w1, $w1, 63
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 64($fp)
+; MIPS32-MSA-NEXT:    srai.d $w1, $w1, 63
+; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 68($fp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 48($fp)
+; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 52($fp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    st.d $w0, 0($4)
+; MIPS32-MSA-NEXT:    move $sp, $fp
+; MIPS32-MSA-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
+; MIPS32-MSA-NEXT:    lw $ra, 28($sp) # 4-byte Folded Reload
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    addiu $sp, $sp, 32
+  %result = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b)
+  ret <2 x double> %result
+}
+
+; Test with aligned loads (common case)
+define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_aligned_load:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS64-MSA-NEXT:    ldi.b $w2, -1
+; MIPS64-MSA-NEXT:    fill.w $w0, $1
+; MIPS64-MSA-NEXT:    slli.w $w0, $w0, 31
+; MIPS64-MSA-NEXT:    srai.w $w0, $w0, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w0, $w1
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_aligned_load:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    fill.w $w0, $4
+; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS32-MSA-NEXT:    ldi.b $w2, -1
+; MIPS32-MSA-NEXT:    slli.w $w0, $w0, 31
+; MIPS32-MSA-NEXT:    srai.w $w0, $w0, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w0, $w1
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %a = load <4 x i32>, ptr %p1, align 16
+  %b = load <4 x i32>, ptr %p2, align 16
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with unaligned loads (stress test)
+define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS64-MSA-NEXT:    ldi.b $w2, -1
+; MIPS64-MSA-NEXT:    fill.w $w0, $1
+; MIPS64-MSA-NEXT:    slli.w $w0, $w0, 31
+; MIPS64-MSA-NEXT:    srai.w $w0, $w0, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w0, $w1
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    fill.w $w0, $4
+; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS32-MSA-NEXT:    ldi.b $w2, -1
+; MIPS32-MSA-NEXT:    slli.w $w0, $w0, 31
+; MIPS32-MSA-NEXT:    srai.w $w0, $w0, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w0, $w1
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %a = load <4 x i32>, ptr %p1, align 4
+  %b = load <4 x i32>, ptr %p2, align 4
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with stores to verify result handling
+define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_store:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    st.w $w0, 0($9)
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_store:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 40($sp)
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    st.w $w0, 0($1)
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  store <4 x i32> %result, ptr %out, align 16
+  ret void
+}
+
+; Test chained selects (multiple conditions)
+define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_chain:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $8
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w1, -1
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    sll $1, $5, 0
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $9
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    xor.v $w3, $w2, $w1
+; MIPS64-MSA-NEXT:    and.v $w0, $w3, $w0
+; MIPS64-MSA-NEXT:    insert.d $w3[0], $6
+; MIPS64-MSA-NEXT:    insert.d $w3[1], $7
+; MIPS64-MSA-NEXT:    shf.w $w3, $w3, 177
+; MIPS64-MSA-NEXT:    and.v $w2, $w2, $w3
+; MIPS64-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    and.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $10
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $11
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_chain:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    lw $2, 40($sp)
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w3, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w3, $w0
+; MIPS32-MSA-NEXT:    insert.w $w3[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w3[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w3[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w3[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 44($sp)
+; MIPS32-MSA-NEXT:    and.v $w2, $w2, $w3
+; MIPS32-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    and.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 48($sp)
+; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 52($sp)
+; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %tmp = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond1, <4 x i32> %a, <4 x i32> %b)
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond2, <4 x i32> %tmp, <4 x i32> %c)
+  ret <4 x i32> %result
+}
+
+; Test with arithmetic operations (ensure float vectors work with FP ops)
+define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 x float> %y) {
+; MIPS64-MSA-LABEL: test_ctselect_v4f32_arithmetic:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    fill.w $w3, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w3, $w3, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    srai.w $w3, $w3, 31
+; MIPS64-MSA-NEXT:    fadd.w $w2, $w1, $w0
+; MIPS64-MSA-NEXT:    fsub.w $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    ldi.b $w1, -1
+; MIPS64-MSA-NEXT:    xor.v $w1, $w3, $w1
+; MIPS64-MSA-NEXT:    and.v $w2, $w3, $w2
+; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4f32_arithmetic:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    fill.w $w3, $5
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    slli.w $w3, $w3, 31
+; MIPS32-MSA-NEXT:    srai.w $w3, $w3, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    fadd.w $w2, $w1, $w0
+; MIPS32-MSA-NEXT:    fsub.w $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w3, $w1
+; MIPS32-MSA-NEXT:    and.v $w2, $w3, $w2
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    st.w $w0, 0($4)
+  %sum = fadd <4 x float> %x, %y
+  %diff = fsub <4 x float> %x, %y
+  %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %sum, <4 x float> %diff)
+  ret <4 x float> %result
+}
+
+; Test with mixed operations (load, compute, select, store)
+define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_mixed:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w0, 0($5)
+; MIPS64-MSA-NEXT:    ldi.b $w2, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    addvi.w $w0, $w0, 1
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS64-MSA-NEXT:    addvi.w $w2, $w2, 2
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    st.w $w0, 0($7)
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_mixed:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    ld.w $w0, 0($5)
+; MIPS32-MSA-NEXT:    fill.w $w1, $4
+; MIPS32-MSA-NEXT:    ldi.b $w2, -1
+; MIPS32-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS32-MSA-NEXT:    addvi.w $w0, $w0, 1
+; MIPS32-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
+; MIPS32-MSA-NEXT:    addvi.w $w2, $w2, 2
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    st.w $w0, 0($7)
+  %a = load <4 x i32>, ptr %p1, align 16
+  %b = load <4 x i32>, ptr %p2, align 16
+  %a_plus_1 = add <4 x i32> %a, <i32 1, i32 1, i32 1, i32 1>
+  %b_plus_2 = add <4 x i32> %b, <i32 2, i32 2, i32 2, i32 2>
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a_plus_1, <4 x i32> %b_plus_2)
+  store <4 x i32> %result, ptr %out, align 16
+  ret void
+}
+
+; Test with function arguments directly (no loads)
+define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_args:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_args:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  ret <4 x i32> %result
+}
+
+; Test with multiple uses of result
+define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
+; MIPS64-MSA-LABEL: test_ctselect_v4i32_multi_use:
+; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ldi.b $w0, -1
+; MIPS64-MSA-NEXT:    fill.w $w1, $1
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
+; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
+; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
+; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    addv.w $w0, $w0, $w0
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
+; MIPS64-MSA-NEXT:    jr $ra
+; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
+;
+; MIPS32-MSA-LABEL: test_ctselect_v4i32_multi_use:
+; MIPS32-MSA:       # %bb.0:
+; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ldi.b $w1, -1
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
+; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
+; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    addv.w $w0, $w0, $w0
+; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
+; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
+; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
+; MIPS32-MSA-NEXT:    jr $ra
+; MIPS32-MSA-NEXT:    copy_s.w $5, $w0[3]
+  %sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+  %add = add <4 x i32> %sel, %sel  ; Use result twice
+  ret <4 x i32> %add
+}
+
+declare <4 x i32> @llvm.ct.select.v4i32(i1, <4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.ct.select.v8i16(i1, <8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.ct.select.v16i8(i1, <16 x i8>, <16 x i8>)
+declare <2 x i64> @llvm.ct.select.v2i64(i1, <2 x i64>, <2 x i64>)
+declare <4 x float> @llvm.ct.select.v4f32(i1, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.ct.select.v2f64(i1, <2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback.ll b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
new file mode 100644
index 0000000000000..d89d7fc698712
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
@@ -0,0 +1,371 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64
+
+; Test basic ct.select functionality for scalar types
+define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
+; M32-LABEL: test_ctselect_i8:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    jr $ra
+; M32-NEXT:    xor $2, $1, $6
+;
+; M64-LABEL: test_ctselect_i8:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    xor $2, $5, $6
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    xor $2, $1, $2
+  %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
+  ret i8 %result
+}
+
+define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
+; M32-LABEL: test_ctselect_i16:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    jr $ra
+; M32-NEXT:    xor $2, $1, $6
+;
+; M64-LABEL: test_ctselect_i16:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    xor $2, $5, $6
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    xor $2, $1, $2
+  %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
+  ret i16 %result
+}
+
+define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_i32:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_i32:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
+; M32-LABEL: test_ctselect_i64:
+; M32:       # %bb.0:
+; M32-NEXT:    lw $1, 16($sp)
+; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $2, $6, $1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    xor $2, $2, $1
+; M32-NEXT:    lw $1, 20($sp)
+; M32-NEXT:    xor $4, $7, $1
+; M32-NEXT:    and $3, $4, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    xor $3, $3, $1
+;
+; M64-LABEL: test_ctselect_i64:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $2, $1
+; M64-NEXT:    daddiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $5
+; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
+  ret i64 %result
+}
+
+define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
+; M32-LABEL: test_ctselect_ptr:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_ptr:
+; M64:       # %bb.0:
+; M64-NEXT:    andi $1, $4, 1
+; M64-NEXT:    dnegu $2, $1
+; M64-NEXT:    daddiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $5
+; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+  ret ptr %result
+}
+
+; Test with constant conditions
+define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_const_true:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_ctselect_const_true:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $4, 0
+  %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_const_false:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $zero, $5
+;
+; M64-LABEL: test_ctselect_const_false:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $zero, $1
+  %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with comparison conditions
+define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_icmp_eq:
+; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    sltu $1, $zero, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_icmp_eq:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sltu $1, $zero, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cond = icmp eq i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_icmp_ne:
+; M32:       # %bb.0:
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    sltiu $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_icmp_ne:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sltiu $1, $1, 1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cond = icmp ne i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_icmp_slt:
+; M32:       # %bb.0:
+; M32-NEXT:    slt $1, $4, $5
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_icmp_slt:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    slt $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    xori $1, $1, 1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cond = icmp slt i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
+; M32-LABEL: test_ctselect_icmp_ult:
+; M32:       # %bb.0:
+; M32-NEXT:    sltu $1, $4, $5
+; M32-NEXT:    xori $1, $1, 1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $1, $6
+; M32-NEXT:    not $1, $1
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_icmp_ult:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    sltu $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    xori $1, $1, 1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $1, $2
+; M64-NEXT:    not $1, $1
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %cond = icmp ult i32 %x, %y
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test with memory operands
+define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
+; M32-LABEL: test_ctselect_load:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    lw $3, 0($5)
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    lw $3, 0($6)
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_ctselect_load:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    lw $3, 0($5)
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    lw $3, 0($6)
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %a = load i32, ptr %p1
+  %b = load i32, ptr %p2
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test nested ctselect calls
+define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
+; M32-LABEL: test_ctselect_nested:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $5, 1
+; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    negu $4, $3
+; M32-NEXT:    and $2, $2, $6
+; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    or $1, $2, $1
+; M32-NEXT:    addiu $2, $3, -1
+; M32-NEXT:    lw $3, 16($sp)
+; M32-NEXT:    and $1, $4, $1
+; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $1, $2
+;
+; M64-LABEL: test_ctselect_nested:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    sll $4, $4, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    andi $4, $4, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    negu $5, $4
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $7, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    addiu $3, $4, -1
+; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    sll $2, $8, 0
+; M64-NEXT:    and $1, $5, $1
+; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $1, $2
+  %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
+  %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
+  ret i32 %result
+}
+
+; Declare the intrinsics
+declare i8 @llvm.ct.select.i8(i1, i8, i8)
+declare i16 @llvm.ct.select.i16(i1, i16, i16)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
new file mode 100644
index 0000000000000..6cfa07afdd51e
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
@@ -0,0 +1,183 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=mipsel-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M32
+; RUN: llc < %s -mtriple=mips64el-unknown-linux-gnu -O3 | FileCheck %s --check-prefix=M64
+
+; Test 1: Basic optimizations should still work
+define i32 @test_basic_opts(i32 %x) {
+; M32-LABEL: test_basic_opts:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_basic_opts:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $4, 0
+  %a = or i32 %x, 0
+  %b = and i32 %a, -1
+  %c = xor i32 %b, 0
+  ret i32 %c
+}
+
+; Test 2: Constant folding should work
+define i32 @test_constant_fold() {
+; M32-LABEL: test_constant_fold:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    addiu $2, $zero, 0
+;
+; M64-LABEL: test_constant_fold:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    addiu $2, $zero, 0
+  %a = xor i32 -1, -1    ; Should fold to 0
+  ret i32 %a
+}
+
+; Test 3: Protected pattern should NOT have branches
+define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
+; M32-LABEL: test_protected_no_branch:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $2, $1
+; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    and $2, $2, $5
+; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    jr $ra
+; M32-NEXT:    or $2, $2, $1
+;
+; M64-LABEL: test_protected_no_branch:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    negu $2, $1
+; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    and $2, $2, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    jr $ra
+; M64-NEXT:    or $2, $2, $1
+  %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+  ret i32 %result
+}
+
+; Test 4: Explicit branch should still generate branches
+define i32 @test_explicit_branch(i1 %cond, i32 %a, i32 %b) {
+; M32-LABEL: test_explicit_branch:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    beqz $1, $BB3_2
+; M32-NEXT:    nop
+; M32-NEXT:  # %bb.1: # %true
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $5
+; M32-NEXT:  $BB3_2: # %false
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $6
+;
+; M64-LABEL: test_explicit_branch:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    beqz $1, .LBB3_2
+; M64-NEXT:    nop
+; M64-NEXT:  # %bb.1: # %true
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $5, 0
+; M64-NEXT:  .LBB3_2: # %false
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $6, 0
+  br i1 %cond, label %true, label %false
+true:
+  ret i32 %a
+false:
+  ret i32 %b
+}
+
+; Test 5: Regular select (not ct.select) - whatever wasm wants to do
+define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) {
+; M32-LABEL: test_regular_select:
+; M32:       # %bb.0:
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    movn $6, $5, $1
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $6
+;
+; M64-LABEL: test_regular_select:
+; M64:       # %bb.0:
+; M64-NEXT:    sll $3, $4, 0
+; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    jr $ra
+; M64-NEXT:    movn $2, $1, $3
+  %result = select i1 %cond, i32 %a, i32 %b
+  ret i32 %result
+}
+
+; Test if XOR with all-ones still gets optimized
+define i32 @test_xor_all_ones() {
+; M32-LABEL: test_xor_all_ones:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    addiu $2, $zero, 0
+;
+; M64-LABEL: test_xor_all_ones:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    addiu $2, $zero, 0
+  %xor1 = xor i32 -1, -1  ; Should optimize to 0
+  ret i32 %xor1
+}
+
+define i32 @test_xor_same_value(i32 %x) {
+; M32-LABEL: test_xor_same_value:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    addiu $2, $zero, 0
+;
+; M64-LABEL: test_xor_same_value:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    addiu $2, $zero, 0
+  %xor2 = xor i32 %x, %x  ; Should optimize to 0
+  ret i32 %xor2
+}
+
+define i32 @test_normal_ops(i32 %x) {
+; M32-LABEL: test_normal_ops:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    move $2, $4
+;
+; M64-LABEL: test_normal_ops:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    sll $2, $4, 0
+  %or1 = or i32 %x, 0      ; Should optimize to %x
+  %and1 = and i32 %or1, -1  ; Should optimize to %x
+  %xor1 = xor i32 %and1, 0  ; Should optimize to %x
+  ret i32 %xor1
+}
+
+; This simulates what the reviewer is worried about
+define i32 @test_xor_with_const_operands() {
+; M32-LABEL: test_xor_with_const_operands:
+; M32:       # %bb.0:
+; M32-NEXT:    jr $ra
+; M32-NEXT:    addiu $2, $zero, 0
+;
+; M64-LABEL: test_xor_with_const_operands:
+; M64:       # %bb.0:
+; M64-NEXT:    jr $ra
+; M64-NEXT:    addiu $2, $zero, 0
+  %a = xor i32 -1, -1
+  %b = xor i32 0, 0
+  %c = xor i32 42, 42
+  %result = or i32 %a, %b
+  %final = or i32 %result, %c
+  ret i32 %final  ; Should optimize to 0
+}
+
+declare i32 @llvm.ct.select.i32(i1, i32, i32)

>From 9d45e799ab7c4dea36c4c8df5ed757a92886c814 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Sat, 7 Mar 2026 15:38:15 -0500
Subject: [PATCH 2/2] [LLVM][MIPS] Regenerate ct.select test CHECK lines

Update CHECK lines to match the new constant-time AND/OR/XOR expansion
from the CT_SELECT legalization fix.
---
 .../Mips/ctselect-fallback-edge-cases.ll      | 223 +++----
 .../Mips/ctselect-fallback-patterns.ll        | 297 ++++-----
 .../CodeGen/Mips/ctselect-fallback-vector.ll  | 570 ++++++++----------
 llvm/test/CodeGen/Mips/ctselect-fallback.ll   | 258 ++++----
 .../CodeGen/Mips/ctselect-side-effects.ll     |  24 +-
 5 files changed, 617 insertions(+), 755 deletions(-)

diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
index f1831a625d4a4..401a742c27eae 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-edge-cases.ll
@@ -8,22 +8,24 @@
 define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
 ; M32-LABEL: test_ctselect_i1:
 ; M32:       # %bb.0:
-; M32-NEXT:    xori $2, $4, 1
-; M32-NEXT:    and $1, $4, $5
-; M32-NEXT:    and $2, $2, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $2
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_i1:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $1, $6, 0
-; M64-NEXT:    xori $2, $2, 1
-; M64-NEXT:    and $1, $2, $1
-; M64-NEXT:    and $2, $4, $5
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    xor $2, $5, $6
+; M64-NEXT:    andi $1, $1, 1
 ; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
   ret i1 %result
 }
@@ -32,30 +34,18 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
 define i32 @test_ctselect_extremal_values(i1 %cond) {
 ; M32-LABEL: test_ctselect_extremal_values:
 ; M32:       # %bb.0:
-; M32-NEXT:    lui $3, 32767
 ; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    ori $3, $3, 65535
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $3
-; M32-NEXT:    lui $3, 32768
-; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    lui $2, 32768
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    subu $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_extremal_values:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    lui $3, 32767
+; M64-NEXT:    lui $2, 32768
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    ori $3, $3, 65535
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    lui $3, 32768
-; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    subu $2, $2, $1
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
   ret i32 %result
 }
@@ -67,14 +57,14 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
 ; M32-NEXT:    andi $1, $4, 1
 ; M32-NEXT:    negu $1, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    and $2, $1, $5
+; M32-NEXT:    and $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_null_ptr:
 ; M64:       # %bb.0:
 ; M64-NEXT:    andi $1, $4, 1
 ; M64-NEXT:    dnegu $1, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    and $2, $1, $5
+; M64-NEXT:    and $2, $5, $1
   %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
   ret ptr %result
 }
@@ -83,23 +73,21 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
 define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
 ; M32-LABEL: test_ctselect_function_ptr:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_function_ptr:
 ; M64:       # %bb.0:
-; M64-NEXT:    andi $1, $4, 1
-; M64-NEXT:    dnegu $2, $1
-; M64-NEXT:    daddiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $5
-; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    andi $2, $4, 1
+; M64-NEXT:    xor $1, $5, $6
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $6, $1
   %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
   ret ptr %result
 }
@@ -108,26 +96,25 @@ define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
 define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
 ; M32-LABEL: test_ctselect_ptr_cmp:
 ; M32:       # %bb.0:
-; M32-NEXT:    xor $1, $4, $5
-; M32-NEXT:    sltu $1, $zero, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    xor $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    sltiu $2, $2, 1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_ptr_cmp:
 ; M64:       # %bb.0:
-; M64-NEXT:    xor $1, $4, $5
-; M64-NEXT:    daddiu $3, $zero, -1
-; M64-NEXT:    daddiu $2, $zero, -1
-; M64-NEXT:    movn $3, $zero, $1
-; M64-NEXT:    xor $2, $3, $2
-; M64-NEXT:    and $1, $3, $6
-; M64-NEXT:    and $2, $2, $7
+; M64-NEXT:    xor $2, $4, $5
+; M64-NEXT:    xor $1, $6, $7
+; M64-NEXT:    sltiu $2, $2, 1
+; M64-NEXT:    dsll $2, $2, 32
+; M64-NEXT:    dsrl $2, $2, 32
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $7, $1
   %cmp = icmp eq ptr %p1, %p2
   %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
   ret ptr %result
@@ -139,23 +126,21 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
 define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
 ; M32-LABEL: test_ctselect_struct_ptr:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_struct_ptr:
 ; M64:       # %bb.0:
-; M64-NEXT:    andi $1, $4, 1
-; M64-NEXT:    dnegu $2, $1
-; M64-NEXT:    daddiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $5
-; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    andi $2, $4, 1
+; M64-NEXT:    xor $1, $5, $6
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $6, $1
   %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
   ret ptr %result
 }
@@ -164,73 +149,65 @@ define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
 define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; M32-LABEL: test_ctselect_deeply_nested:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    lw $3, 16($sp)
-; M32-NEXT:    lw $9, 32($sp)
-; M32-NEXT:    lw $8, 28($sp)
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
+; M32-NEXT:    lw $1, 20($sp)
+; M32-NEXT:    lw $2, 16($sp)
+; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    andi $4, $6, 1
+; M32-NEXT:    lw $6, 28($sp)
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $2, $2, $1
 ; M32-NEXT:    and $2, $2, $3
-; M32-NEXT:    lw $3, 20($sp)
-; M32-NEXT:    and $1, $1, $3
 ; M32-NEXT:    andi $3, $5, 1
-; M32-NEXT:    or $1, $2, $1
-; M32-NEXT:    andi $2, $6, 1
-; M32-NEXT:    andi $6, $7, 1
-; M32-NEXT:    negu $4, $3
-; M32-NEXT:    addiu $3, $3, -1
-; M32-NEXT:    addiu $7, $6, -1
-; M32-NEXT:    and $1, $4, $1
-; M32-NEXT:    addiu $5, $2, -1
-; M32-NEXT:    negu $2, $2
-; M32-NEXT:    negu $6, $6
-; M32-NEXT:    and $4, $7, $9
-; M32-NEXT:    lw $7, 24($sp)
-; M32-NEXT:    and $5, $5, $8
-; M32-NEXT:    and $3, $3, $7
-; M32-NEXT:    or $1, $1, $3
-; M32-NEXT:    and $1, $2, $1
-; M32-NEXT:    or $1, $1, $5
-; M32-NEXT:    and $1, $6, $1
+; M32-NEXT:    lw $5, 32($sp)
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    lw $2, 24($sp)
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    andi $3, $7, 1
+; M32-NEXT:    xor $1, $2, $1
+; M32-NEXT:    negu $2, $4
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $1, $1, $6
+; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    xor $1, $6, $1
+; M32-NEXT:    xor $1, $1, $5
+; M32-NEXT:    and $1, $1, $3
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $4
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_deeply_nested:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $8, 0
-; M64-NEXT:    sll $4, $5, 0
-; M64-NEXT:    lw $8, 0($sp)
+; M64-NEXT:    xor $2, $8, $9
+; M64-NEXT:    sll $5, $5, 0
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    sll $6, $11, 0
+; M64-NEXT:    sll $4, $7, 0
+; M64-NEXT:    lw $7, 0($sp)
 ; M64-NEXT:    andi $1, $1, 1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    andi $5, $5, 1
+; M64-NEXT:    andi $3, $3, 1
 ; M64-NEXT:    andi $4, $4, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    negu $5, $4
-; M64-NEXT:    addiu $4, $4, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $9, 0
-; M64-NEXT:    and $1, $1, $3
-; M64-NEXT:    sll $3, $11, 0
-; M64-NEXT:    or $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    sll $6, $7, 0
-; M64-NEXT:    andi $2, $2, 1
-; M64-NEXT:    and $1, $5, $1
-; M64-NEXT:    andi $6, $6, 1
-; M64-NEXT:    addiu $5, $2, -1
-; M64-NEXT:    negu $2, $2
-; M64-NEXT:    addiu $7, $6, -1
-; M64-NEXT:    negu $6, $6
-; M64-NEXT:    and $3, $5, $3
-; M64-NEXT:    sll $5, $10, 0
-; M64-NEXT:    and $7, $7, $8
-; M64-NEXT:    and $4, $4, $5
-; M64-NEXT:    or $1, $1, $4
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    negu $5, $5
+; M64-NEXT:    negu $4, $4
 ; M64-NEXT:    and $1, $2, $1
-; M64-NEXT:    or $1, $1, $3
-; M64-NEXT:    and $1, $6, $1
+; M64-NEXT:    sll $2, $9, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $10, 0
+; M64-NEXT:    xor $1, $1, $2
+; M64-NEXT:    and $1, $1, $5
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    negu $2, $3
+; M64-NEXT:    xor $1, $1, $6
+; M64-NEXT:    and $1, $1, $2
+; M64-NEXT:    xor $1, $6, $1
+; M64-NEXT:    xor $1, $1, $7
+; M64-NEXT:    and $1, $1, $4
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $7
+; M64-NEXT:    xor $2, $7, $1
   %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
   %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
   %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
index 2e65e586ce5fa..a1c5d524c6939 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-patterns.ll
@@ -6,16 +6,18 @@
 define i32 @test_ctselect_smin_zero(i32 %x) {
 ; M32-LABEL: test_ctselect_smin_zero:
 ; M32:       # %bb.0:
-; M32-NEXT:    sra $1, $4, 31
+; M32-NEXT:    slti $1, $4, 0
+; M32-NEXT:    negu $1, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    and $2, $4, $1
 ;
 ; M64-LABEL: test_ctselect_smin_zero:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sra $2, $1, 31
+; M64-NEXT:    slti $2, $1, 0
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    and $2, $2, $1
+; M64-NEXT:    and $2, $1, $2
   %cmp = icmp slt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
   ret i32 %result
@@ -25,17 +27,18 @@ define i32 @test_ctselect_smin_zero(i32 %x) {
 define i32 @test_ctselect_smax_zero(i32 %x) {
 ; M32-LABEL: test_ctselect_smax_zero:
 ; M32:       # %bb.0:
-; M32-NEXT:    slti $1, $4, 1
-; M32-NEXT:    movn $4, $zero, $1
+; M32-NEXT:    slt $1, $zero, $4
+; M32-NEXT:    negu $1, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    move $2, $4
+; M32-NEXT:    and $2, $4, $1
 ;
 ; M64-LABEL: test_ctselect_smax_zero:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    slti $1, $2, 1
+; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    slt $2, $zero, $1
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    movn $2, $zero, $1
+; M64-NEXT:    and $2, $1, $2
   %cmp = icmp sgt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
   ret i32 %result
@@ -45,27 +48,23 @@ define i32 @test_ctselect_smax_zero(i32 %x) {
 define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
 ; M32-LABEL: test_ctselect_smin_generic:
 ; M32:       # %bb.0:
-; M32-NEXT:    slt $1, $4, $5
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    slt $2, $4, $5
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_smin_generic:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    slt $3, $2, $1
-; M64-NEXT:    xori $3, $3, 1
-; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    xor $3, $2, $1
+; M64-NEXT:    slt $2, $2, $1
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    and $2, $3, $2
-; M64-NEXT:    not $3, $3
-; M64-NEXT:    and $1, $3, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %cmp = icmp slt i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
   ret i32 %result
@@ -75,27 +74,23 @@ define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
 define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
 ; M32-LABEL: test_ctselect_smax_generic:
 ; M32:       # %bb.0:
-; M32-NEXT:    slt $1, $5, $4
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    slt $2, $5, $4
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_smax_generic:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $2, $5, 0
-; M64-NEXT:    slt $3, $2, $1
-; M64-NEXT:    xori $3, $3, 1
-; M64-NEXT:    addiu $3, $3, -1
-; M64-NEXT:    and $1, $3, $1
-; M64-NEXT:    not $3, $3
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    xor $3, $2, $1
+; M64-NEXT:    slt $2, $1, $2
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    and $2, $3, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $1, $2
   %cmp = icmp sgt i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
   ret i32 %result
@@ -105,27 +100,23 @@ define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
 define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
 ; M32-LABEL: test_ctselect_umin_generic:
 ; M32:       # %bb.0:
-; M32-NEXT:    sltu $1, $4, $5
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    sltu $2, $4, $5
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_umin_generic:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sltu $3, $2, $1
-; M64-NEXT:    xori $3, $3, 1
-; M64-NEXT:    addiu $3, $3, -1
+; M64-NEXT:    xor $3, $2, $1
+; M64-NEXT:    sltu $2, $2, $1
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    and $2, $3, $2
-; M64-NEXT:    not $3, $3
-; M64-NEXT:    and $1, $3, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %cmp = icmp ult i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
   ret i32 %result
@@ -135,27 +126,23 @@ define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
 define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
 ; M32-LABEL: test_ctselect_umax_generic:
 ; M32:       # %bb.0:
-; M32-NEXT:    sltu $1, $5, $4
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $5
+; M32-NEXT:    sltu $2, $5, $4
+; M32-NEXT:    xor $1, $4, $5
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $5, $1
 ;
 ; M64-LABEL: test_ctselect_umax_generic:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $2, $5, 0
-; M64-NEXT:    sltu $3, $2, $1
-; M64-NEXT:    xori $3, $3, 1
-; M64-NEXT:    addiu $3, $3, -1
-; M64-NEXT:    and $1, $3, $1
-; M64-NEXT:    not $3, $3
+; M64-NEXT:    sll $1, $5, 0
+; M64-NEXT:    sll $2, $4, 0
+; M64-NEXT:    xor $3, $2, $1
+; M64-NEXT:    sltu $2, $1, $2
+; M64-NEXT:    negu $2, $2
 ; M64-NEXT:    and $2, $3, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $1, $2
   %cmp = icmp ugt i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
   ret i32 %result
@@ -165,24 +152,24 @@ define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
 define i32 @test_ctselect_abs(i32 %x) {
 ; M32-LABEL: test_ctselect_abs:
 ; M32:       # %bb.0:
-; M32-NEXT:    negu $1, $4
-; M32-NEXT:    sra $2, $4, 31
+; M32-NEXT:    slti $1, $4, 0
+; M32-NEXT:    negu $2, $4
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    xor $2, $2, $4
 ; M32-NEXT:    and $1, $2, $1
-; M32-NEXT:    not $2, $2
-; M32-NEXT:    and $2, $2, $4
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $2
+; M32-NEXT:    xor $2, $4, $1
 ;
 ; M64-LABEL: test_ctselect_abs:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    sra $3, $1, 31
+; M64-NEXT:    slti $2, $1, 0
+; M64-NEXT:    negu $3, $1
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    xor $3, $3, $1
 ; M64-NEXT:    and $2, $3, $2
-; M64-NEXT:    not $3, $3
-; M64-NEXT:    and $1, $3, $1
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %neg = sub i32 0, %x
   %cmp = icmp slt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x)
@@ -193,24 +180,24 @@ define i32 @test_ctselect_abs(i32 %x) {
 define i32 @test_ctselect_nabs(i32 %x) {
 ; M32-LABEL: test_ctselect_nabs:
 ; M32:       # %bb.0:
-; M32-NEXT:    sra $1, $4, 31
-; M32-NEXT:    negu $3, $4
-; M32-NEXT:    and $2, $1, $4
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    slti $1, $4, 0
+; M32-NEXT:    negu $2, $4
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    xor $3, $4, $2
+; M32-NEXT:    and $1, $3, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_nabs:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sra $2, $1, 31
-; M64-NEXT:    and $3, $2, $1
-; M64-NEXT:    negu $1, $1
-; M64-NEXT:    not $2, $2
-; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    slti $2, $1, 0
+; M64-NEXT:    negu $3, $1
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    xor $1, $1, $3
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $3, $1
+; M64-NEXT:    xor $2, $3, $1
   %neg = sub i32 0, %x
   %cmp = icmp slt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg)
@@ -221,14 +208,16 @@ define i32 @test_ctselect_nabs(i32 %x) {
 define i32 @test_ctselect_sign_extend(i32 %x) {
 ; M32-LABEL: test_ctselect_sign_extend:
 ; M32:       # %bb.0:
+; M32-NEXT:    slti $1, $4, 0
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    sra $2, $4, 31
+; M32-NEXT:    negu $2, $1
 ;
 ; M64-LABEL: test_ctselect_sign_extend:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
+; M64-NEXT:    slti $1, $1, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    sra $2, $1, 31
+; M64-NEXT:    negu $2, $1
   %cmp = icmp slt i32 %x, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0)
   ret i32 %result
@@ -270,13 +259,12 @@ define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_constant_folding_false:
 ; M32:       # %bb.0:
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $zero, $5
+; M32-NEXT:    move $2, $5
 ;
 ; M64-LABEL: test_ctselect_constant_folding_false:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $zero, $1
+; M64-NEXT:    sll $2, $5, 0
   %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -285,25 +273,13 @@ define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
 define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
 ; M32-LABEL: test_ctselect_identical_operands:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $5
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    move $2, $5
 ;
 ; M64-LABEL: test_ctselect_identical_operands:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $5, 0
-; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    sll $2, $5, 0
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
   ret i32 %result
 }
@@ -312,29 +288,27 @@ define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
 define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_inverted_condition:
 ; M32:       # %bb.0:
-; M32-NEXT:    xor $1, $4, $5
-; M32-NEXT:    sltiu $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    xor $2, $4, $5
+; M32-NEXT:    xor $1, $7, $6
+; M32-NEXT:    sltiu $2, $2, 1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_inverted_condition:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
+; M64-NEXT:    xor $2, $7, $6
 ; M64-NEXT:    sltiu $1, $1, 1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cmp = icmp eq i32 %x, %y
   %not_cmp = xor i1 %cmp, true
   %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b)
@@ -345,57 +319,51 @@ define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) {
 ; M32-LABEL: test_ctselect_chain:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    lw $1, 16($sp)
+; M32-NEXT:    andi $3, $4, 1
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $2, $7, $1
+; M32-NEXT:    and $2, $2, $3
 ; M32-NEXT:    andi $3, $5, 1
-; M32-NEXT:    lw $5, 16($sp)
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    negu $4, $3
-; M32-NEXT:    addiu $3, $3, -1
-; M32-NEXT:    and $1, $1, $5
-; M32-NEXT:    and $2, $2, $7
-; M32-NEXT:    lw $5, 24($sp)
-; M32-NEXT:    or $1, $2, $1
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    lw $2, 20($sp)
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    lw $3, 24($sp)
+; M32-NEXT:    xor $1, $2, $1
 ; M32-NEXT:    andi $2, $6, 1
-; M32-NEXT:    and $1, $4, $1
-; M32-NEXT:    addiu $4, $2, -1
+; M32-NEXT:    xor $1, $1, $3
 ; M32-NEXT:    negu $2, $2
-; M32-NEXT:    and $4, $4, $5
-; M32-NEXT:    lw $5, 20($sp)
-; M32-NEXT:    and $3, $3, $5
-; M32-NEXT:    or $1, $1, $3
-; M32-NEXT:    and $1, $2, $1
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $4
+; M32-NEXT:    xor $2, $3, $1
 ;
 ; M64-LABEL: test_ctselect_chain:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $7, 0
-; M64-NEXT:    sll $4, $5, 0
+; M64-NEXT:    xor $2, $7, $8
+; M64-NEXT:    sll $3, $5, 0
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    andi $4, $4, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    negu $5, $4
-; M64-NEXT:    addiu $4, $4, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $8, 0
-; M64-NEXT:    and $1, $1, $3
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    sll $6, $10, 0
-; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    sll $2, $2, 0
 ; M64-NEXT:    andi $3, $3, 1
-; M64-NEXT:    and $1, $5, $1
-; M64-NEXT:    sll $5, $9, 0
-; M64-NEXT:    addiu $2, $3, -1
+; M64-NEXT:    negu $1, $1
 ; M64-NEXT:    negu $3, $3
-; M64-NEXT:    and $4, $4, $5
-; M64-NEXT:    and $2, $2, $6
-; M64-NEXT:    or $1, $1, $4
-; M64-NEXT:    and $1, $3, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $8, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    sll $2, $9, 0
+; M64-NEXT:    xor $1, $1, $2
+; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $3, $6, 0
+; M64-NEXT:    xor $1, $2, $1
+; M64-NEXT:    andi $2, $3, 1
+; M64-NEXT:    sll $3, $10, 0
+; M64-NEXT:    xor $1, $1, $3
+; M64-NEXT:    negu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $3, $1
   %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
   %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
   %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
@@ -406,16 +374,17 @@ define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c,
 define i64 @test_ctselect_i64_smin_zero(i64 %x) {
 ; M32-LABEL: test_ctselect_i64_smin_zero:
 ; M32:       # %bb.0:
-; M32-NEXT:    sra $1, $5, 31
-; M32-NEXT:    and $2, $1, $4
+; M32-NEXT:    slti $1, $5, 0
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    and $2, $4, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    and $3, $1, $5
+; M32-NEXT:    and $3, $5, $1
 ;
 ; M64-LABEL: test_ctselect_i64_smin_zero:
 ; M64:       # %bb.0:
 ; M64-NEXT:    dsra $1, $4, 63
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    and $2, $1, $4
+; M64-NEXT:    and $2, $4, $1
   %cmp = icmp slt i64 %x, 0
   %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
   ret i64 %result
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
index 6222f6052e12f..302e06b0a7335 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback-vector.ll
@@ -6,21 +6,19 @@
 define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -30,26 +28,24 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -63,21 +59,19 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v8i16:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.h $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.h $w1, $w1, 15
-; MIPS64-MSA-NEXT:    srai.h $w1, $w1, 15
-; MIPS64-MSA-NEXT:    shf.h $w2, $w2, 27
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.h $w2, $w2, 27
+; MIPS64-MSA-NEXT:    fill.h $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.h $w2, $w2, 15
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.h $w2, $w2, 15
+; MIPS64-MSA-NEXT:    shf.h $w0, $w0, 27
+; MIPS64-MSA-NEXT:    shf.h $w1, $w1, 27
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.h $w0, $w0, 27
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -87,28 +81,26 @@ define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
-; MIPS32-MSA-NEXT:    fill.h $w1, $4
-; MIPS32-MSA-NEXT:    ldi.b $w0, -1
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
-; MIPS32-MSA-NEXT:    slli.h $w1, $w1, 15
-; MIPS32-MSA-NEXT:    srai.h $w1, $w1, 15
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
+; MIPS32-MSA-NEXT:    fill.h $w2, $4
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    slli.h $w2, $w2, 15
+; MIPS32-MSA-NEXT:    srai.h $w2, $w2, 15
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    shf.h $w2, $w2, 177
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
-; MIPS32-MSA-NEXT:    shf.h $w2, $w2, 177
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.h $w0, $w0, 177
+; MIPS32-MSA-NEXT:    shf.h $w1, $w1, 177
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    shf.h $w0, $w0, 177
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
@@ -123,22 +115,21 @@ define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
 define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v16i8:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w0[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w1[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
 ; MIPS64-MSA-NEXT:    fill.b $w2, $1
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $6
-; MIPS64-MSA-NEXT:    insert.d $w1[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
 ; MIPS64-MSA-NEXT:    slli.b $w2, $w2, 7
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
-; MIPS64-MSA-NEXT:    shf.b $w1, $w1, 27
 ; MIPS64-MSA-NEXT:    srai.b $w2, $w2, 7
+; MIPS64-MSA-NEXT:    shf.b $w1, $w1, 27
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
-; MIPS64-MSA-NEXT:    and.v $w0, $w2, $w0
-; MIPS64-MSA-NEXT:    xori.b $w2, $w2, 255
-; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.b $w0, $w0, 27
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
@@ -147,29 +138,28 @@ define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v16i8:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    insert.w $w0[0], $6
-; MIPS32-MSA-NEXT:    lw $1, 16($sp)
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
+; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.b $w2, $4
-; MIPS32-MSA-NEXT:    insert.w $w0[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.b $w2, $w2, 7
 ; MIPS32-MSA-NEXT:    srai.b $w2, $w2, 7
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
+; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 20($sp)
+; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
-; MIPS32-MSA-NEXT:    lw $1, 28($sp)
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $1
-; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
+; MIPS32-MSA-NEXT:    lw $1, 16($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
-; MIPS32-MSA-NEXT:    lw $1, 36($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w2, $w0
-; MIPS32-MSA-NEXT:    xori.b $w2, $w2, 255
+; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
 ; MIPS32-MSA-NEXT:    shf.b $w1, $w1, 27
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    shf.b $w0, $w0, 27
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
@@ -184,18 +174,16 @@ define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
 define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v2i64:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    fill.d $w2, $4
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
-; MIPS64-MSA-NEXT:    ldi.b $w1, -1
-; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
-; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    fill.d $w2, $4
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
 ; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
@@ -214,31 +202,28 @@ define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
 ; MIPS32-MSA-NEXT:    and $sp, $sp, $1
 ; MIPS32-MSA-NEXT:    lw $2, 56($fp)
 ; MIPS32-MSA-NEXT:    lw $1, 60($fp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    sw $4, 12($sp)
 ; MIPS32-MSA-NEXT:    sw $4, 4($sp)
-; MIPS32-MSA-NEXT:    ldi.b $w0, -1
-; MIPS32-MSA-NEXT:    ld.d $w1, 0($sp)
-; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
-; MIPS32-MSA-NEXT:    slli.d $w1, $w1, 63
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    ld.d $w2, 0($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 64($fp)
-; MIPS32-MSA-NEXT:    srai.d $w1, $w1, 63
-; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 68($fp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 48($fp)
-; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 52($fp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
-; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
@@ -257,21 +242,19 @@ define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
 define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4f32:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -281,26 +264,24 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $5
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
 ; MIPS32-MSA-NEXT:    st.w $w0, 0($4)
   %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b)
@@ -311,18 +292,16 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
 define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v2f64:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    fill.d $w2, $4
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
-; MIPS64-MSA-NEXT:    ldi.b $w1, -1
-; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
-; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
-; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
+; MIPS64-MSA-NEXT:    fill.d $w2, $4
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
 ; MIPS64-MSA-NEXT:    copy_s.d $3, $w0[1]
@@ -341,31 +320,28 @@ define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double>
 ; MIPS32-MSA-NEXT:    and $sp, $sp, $1
 ; MIPS32-MSA-NEXT:    lw $2, 56($fp)
 ; MIPS32-MSA-NEXT:    lw $1, 60($fp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    sw $5, 12($sp)
 ; MIPS32-MSA-NEXT:    sw $5, 4($sp)
-; MIPS32-MSA-NEXT:    ldi.b $w0, -1
-; MIPS32-MSA-NEXT:    ld.d $w1, 0($sp)
-; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
-; MIPS32-MSA-NEXT:    slli.d $w1, $w1, 63
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    ld.d $w2, 0($sp)
+; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
+; MIPS32-MSA-NEXT:    slli.d $w2, $w2, 63
+; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 64($fp)
-; MIPS32-MSA-NEXT:    srai.d $w1, $w1, 63
-; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    srai.d $w2, $w2, 63
+; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 68($fp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 48($fp)
-; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 52($fp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
-; MIPS32-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS32-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    st.d $w0, 0($4)
 ; MIPS32-MSA-NEXT:    move $sp, $fp
 ; MIPS32-MSA-NEXT:    lw $fp, 24($sp) # 4-byte Folded Reload
@@ -381,16 +357,14 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_aligned_load:
 ; MIPS64-MSA:       # %bb.0:
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS64-MSA-NEXT:    ldi.b $w2, -1
-; MIPS64-MSA-NEXT:    fill.w $w0, $1
-; MIPS64-MSA-NEXT:    slli.w $w0, $w0, 31
-; MIPS64-MSA-NEXT:    srai.w $w0, $w0, 31
-; MIPS64-MSA-NEXT:    and.v $w1, $w0, $w1
-; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -398,16 +372,14 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v4i32_aligned_load:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    fill.w $w0, $4
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS32-MSA-NEXT:    ldi.b $w2, -1
-; MIPS32-MSA-NEXT:    slli.w $w0, $w0, 31
-; MIPS32-MSA-NEXT:    srai.w $w0, $w0, 31
-; MIPS32-MSA-NEXT:    and.v $w1, $w0, $w1
-; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -424,16 +396,14 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
 ; MIPS64-MSA:       # %bb.0:
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
+; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS64-MSA-NEXT:    ldi.b $w2, -1
-; MIPS64-MSA-NEXT:    fill.w $w0, $1
-; MIPS64-MSA-NEXT:    slli.w $w0, $w0, 31
-; MIPS64-MSA-NEXT:    srai.w $w0, $w0, 31
-; MIPS64-MSA-NEXT:    and.v $w1, $w0, $w1
-; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -441,16 +411,14 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v4i32_unaligned_load:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    fill.w $w0, $4
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
 ; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
-; MIPS32-MSA-NEXT:    ldi.b $w2, -1
-; MIPS32-MSA-NEXT:    slli.w $w0, $w0, 31
-; MIPS32-MSA-NEXT:    srai.w $w0, $w0, 31
-; MIPS32-MSA-NEXT:    and.v $w1, $w0, $w1
-; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -466,21 +434,19 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
 define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_store:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    jr $ra
 ; MIPS64-MSA-NEXT:    st.w $w0, 0($9)
 ;
@@ -488,27 +454,25 @@ define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 40($sp)
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
 ; MIPS32-MSA-NEXT:    st.w $w0, 0($1)
   %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
@@ -521,31 +485,28 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a,
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_chain:
 ; MIPS64-MSA:       # %bb.0:
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $6
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS64-MSA-NEXT:    fill.w $w2, $1
 ; MIPS64-MSA-NEXT:    sll $1, $5, 0
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $9
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $7
 ; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
-; MIPS64-MSA-NEXT:    xor.v $w3, $w2, $w1
-; MIPS64-MSA-NEXT:    and.v $w0, $w3, $w0
-; MIPS64-MSA-NEXT:    insert.d $w3[0], $6
-; MIPS64-MSA-NEXT:    insert.d $w3[1], $7
-; MIPS64-MSA-NEXT:    shf.w $w3, $w3, 177
-; MIPS64-MSA-NEXT:    and.v $w2, $w2, $w3
-; MIPS64-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $10
 ; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $11
 ; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
-; MIPS64-MSA-NEXT:    and.v $w0, $w2, $w0
-; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w1
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $10
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $11
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -555,41 +516,38 @@ define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a,
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    lw $2, 40($sp)
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w3, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w3, $w0
-; MIPS32-MSA-NEXT:    insert.w $w3[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w3[1], $7
-; MIPS32-MSA-NEXT:    insert.w $w3[2], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
-; MIPS32-MSA-NEXT:    insert.w $w3[3], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 44($sp)
-; MIPS32-MSA-NEXT:    and.v $w2, $w2, $w3
-; MIPS32-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
 ; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $2
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
-; MIPS32-MSA-NEXT:    and.v $w0, $w2, $w0
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    insert.w $w2[0], $2
-; MIPS32-MSA-NEXT:    insert.w $w2[1], $1
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 48($sp)
-; MIPS32-MSA-NEXT:    insert.w $w2[2], $1
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 52($sp)
-; MIPS32-MSA-NEXT:    insert.w $w2[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    and.v $w0, $w0, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w1, $w0
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -607,20 +565,18 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 ; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
 ; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    fill.w $w3, $1
 ; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
 ; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
-; MIPS64-MSA-NEXT:    slli.w $w3, $w3, 31
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
-; MIPS64-MSA-NEXT:    srai.w $w3, $w3, 31
 ; MIPS64-MSA-NEXT:    fadd.w $w2, $w1, $w0
 ; MIPS64-MSA-NEXT:    fsub.w $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    ldi.b $w1, -1
-; MIPS64-MSA-NEXT:    xor.v $w1, $w3, $w1
-; MIPS64-MSA-NEXT:    and.v $w2, $w3, $w2
-; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS64-MSA-NEXT:    xor.v $w1, $w2, $w0
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -631,11 +587,8 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    fill.w $w3, $5
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
 ; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
-; MIPS32-MSA-NEXT:    slli.w $w3, $w3, 31
-; MIPS32-MSA-NEXT:    srai.w $w3, $w3, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
@@ -647,11 +600,12 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
 ; MIPS32-MSA-NEXT:    fadd.w $w2, $w1, $w0
 ; MIPS32-MSA-NEXT:    fsub.w $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
-; MIPS32-MSA-NEXT:    xor.v $w1, $w3, $w1
-; MIPS32-MSA-NEXT:    and.v $w2, $w3, $w2
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    or.v $w0, $w2, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w0
+; MIPS32-MSA-NEXT:    fill.w $w2, $5
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
 ; MIPS32-MSA-NEXT:    st.w $w0, 0($4)
   %sum = fadd <4 x float> %x, %y
@@ -664,36 +618,32 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
 define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_mixed:
 ; MIPS64-MSA:       # %bb.0:
+; MIPS64-MSA-NEXT:    ld.w $w0, 0($6)
+; MIPS64-MSA-NEXT:    ld.w $w1, 0($5)
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ld.w $w0, 0($5)
-; MIPS64-MSA-NEXT:    ldi.b $w2, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    addvi.w $w0, $w0, 1
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS64-MSA-NEXT:    addvi.w $w2, $w2, 2
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    addvi.w $w0, $w0, 2
+; MIPS64-MSA-NEXT:    addvi.w $w1, $w1, 1
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    jr $ra
 ; MIPS64-MSA-NEXT:    st.w $w0, 0($7)
 ;
 ; MIPS32-MSA-LABEL: test_ctselect_v4i32_mixed:
 ; MIPS32-MSA:       # %bb.0:
-; MIPS32-MSA-NEXT:    ld.w $w0, 0($5)
-; MIPS32-MSA-NEXT:    fill.w $w1, $4
-; MIPS32-MSA-NEXT:    ldi.b $w2, -1
-; MIPS32-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS32-MSA-NEXT:    addvi.w $w0, $w0, 1
-; MIPS32-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    ld.w $w2, 0($6)
-; MIPS32-MSA-NEXT:    addvi.w $w2, $w2, 2
+; MIPS32-MSA-NEXT:    ld.w $w0, 0($6)
+; MIPS32-MSA-NEXT:    ld.w $w1, 0($5)
+; MIPS32-MSA-NEXT:    fill.w $w2, $4
+; MIPS32-MSA-NEXT:    addvi.w $w0, $w0, 2
+; MIPS32-MSA-NEXT:    addvi.w $w1, $w1, 1
+; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
 ; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS32-MSA-NEXT:    or.v $w0, $w0, $w1
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    jr $ra
 ; MIPS32-MSA-NEXT:    st.w $w0, 0($7)
   %a = load <4 x i32>, ptr %p1, align 16
@@ -709,21 +659,19 @@ define void @test_ctselect_v4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
 define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_args:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
 ; MIPS64-MSA-NEXT:    jr $ra
@@ -733,26 +681,24 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b)
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
 ; MIPS32-MSA-NEXT:    copy_s.w $4, $w0[2]
@@ -766,21 +712,19 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b)
 define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
 ; MIPS64-MSA-LABEL: test_ctselect_v4i32_multi_use:
 ; MIPS64-MSA:       # %bb.0:
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w0[0], $7
+; MIPS64-MSA-NEXT:    insert.d $w1[0], $5
 ; MIPS64-MSA-NEXT:    sll $1, $4, 0
-; MIPS64-MSA-NEXT:    ldi.b $w0, -1
-; MIPS64-MSA-NEXT:    fill.w $w1, $1
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $8
-; MIPS64-MSA-NEXT:    slli.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    srai.w $w1, $w1, 31
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
-; MIPS64-MSA-NEXT:    xor.v $w0, $w1, $w0
-; MIPS64-MSA-NEXT:    and.v $w0, $w0, $w2
-; MIPS64-MSA-NEXT:    insert.d $w2[0], $5
-; MIPS64-MSA-NEXT:    insert.d $w2[1], $6
-; MIPS64-MSA-NEXT:    shf.w $w2, $w2, 177
+; MIPS64-MSA-NEXT:    fill.w $w2, $1
+; MIPS64-MSA-NEXT:    insert.d $w0[1], $8
+; MIPS64-MSA-NEXT:    insert.d $w1[1], $6
+; MIPS64-MSA-NEXT:    slli.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS64-MSA-NEXT:    srai.w $w2, $w2, 31
+; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
+; MIPS64-MSA-NEXT:    shf.w $w1, $w1, 177
 ; MIPS64-MSA-NEXT:    and.v $w1, $w1, $w2
-; MIPS64-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS64-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS64-MSA-NEXT:    addv.w $w0, $w0, $w0
 ; MIPS64-MSA-NEXT:    shf.w $w0, $w0, 177
 ; MIPS64-MSA-NEXT:    copy_s.d $2, $w0[0]
@@ -791,26 +735,24 @@ define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32
 ; MIPS32-MSA:       # %bb.0:
 ; MIPS32-MSA-NEXT:    lw $2, 24($sp)
 ; MIPS32-MSA-NEXT:    lw $1, 28($sp)
+; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
 ; MIPS32-MSA-NEXT:    fill.w $w2, $4
-; MIPS32-MSA-NEXT:    ldi.b $w1, -1
 ; MIPS32-MSA-NEXT:    insert.w $w0[0], $2
+; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    slli.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    srai.w $w2, $w2, 31
 ; MIPS32-MSA-NEXT:    insert.w $w0[1], $1
 ; MIPS32-MSA-NEXT:    lw $1, 32($sp)
-; MIPS32-MSA-NEXT:    xor.v $w1, $w2, $w1
 ; MIPS32-MSA-NEXT:    insert.w $w0[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 36($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w0[3], $1
 ; MIPS32-MSA-NEXT:    lw $1, 16($sp)
-; MIPS32-MSA-NEXT:    and.v $w0, $w1, $w0
-; MIPS32-MSA-NEXT:    insert.w $w1[0], $6
-; MIPS32-MSA-NEXT:    insert.w $w1[1], $7
 ; MIPS32-MSA-NEXT:    insert.w $w1[2], $1
 ; MIPS32-MSA-NEXT:    lw $1, 20($sp)
 ; MIPS32-MSA-NEXT:    insert.w $w1[3], $1
-; MIPS32-MSA-NEXT:    and.v $w1, $w2, $w1
-; MIPS32-MSA-NEXT:    or.v $w0, $w1, $w0
+; MIPS32-MSA-NEXT:    xor.v $w1, $w1, $w0
+; MIPS32-MSA-NEXT:    and.v $w1, $w1, $w2
+; MIPS32-MSA-NEXT:    xor.v $w0, $w0, $w1
 ; MIPS32-MSA-NEXT:    addv.w $w0, $w0, $w0
 ; MIPS32-MSA-NEXT:    copy_s.w $2, $w0[0]
 ; MIPS32-MSA-NEXT:    copy_s.w $3, $w0[1]
diff --git a/llvm/test/CodeGen/Mips/ctselect-fallback.ll b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
index d89d7fc698712..6a61412367f76 100644
--- a/llvm/test/CodeGen/Mips/ctselect-fallback.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-fallback.ll
@@ -11,7 +11,7 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
 ; M32-NEXT:    negu $2, $2
 ; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $2, $1, $6
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_i8:
 ; M64:       # %bb.0:
@@ -23,7 +23,7 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
 ; M64-NEXT:    and $1, $2, $1
 ; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    xor $2, $1, $2
+; M64-NEXT:    xor $2, $2, $1
   %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
   ret i8 %result
 }
@@ -36,7 +36,7 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
 ; M32-NEXT:    negu $2, $2
 ; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $2, $1, $6
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_i16:
 ; M64:       # %bb.0:
@@ -48,7 +48,7 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
 ; M64-NEXT:    and $1, $2, $1
 ; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    xor $2, $1, $2
+; M64-NEXT:    xor $2, $2, $1
   %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
   ret i16 %result
 }
@@ -56,26 +56,24 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
 define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_i32:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_i32:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    xor $2, $5, $6
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -88,22 +86,21 @@ define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
 ; M32-NEXT:    negu $3, $3
 ; M32-NEXT:    xor $2, $6, $1
 ; M32-NEXT:    and $2, $2, $3
-; M32-NEXT:    xor $2, $2, $1
+; M32-NEXT:    xor $2, $1, $2
 ; M32-NEXT:    lw $1, 20($sp)
 ; M32-NEXT:    xor $4, $7, $1
 ; M32-NEXT:    and $3, $4, $3
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    xor $3, $3, $1
+; M32-NEXT:    xor $3, $1, $3
 ;
 ; M64-LABEL: test_ctselect_i64:
 ; M64:       # %bb.0:
-; M64-NEXT:    andi $1, $4, 1
-; M64-NEXT:    dnegu $2, $1
-; M64-NEXT:    daddiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $5
-; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    andi $2, $4, 1
+; M64-NEXT:    xor $1, $5, $6
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $6, $1
   %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
   ret i64 %result
 }
@@ -111,23 +108,21 @@ define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
 define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
 ; M32-LABEL: test_ctselect_ptr:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_ctselect_ptr:
 ; M64:       # %bb.0:
-; M64-NEXT:    andi $1, $4, 1
-; M64-NEXT:    dnegu $2, $1
-; M64-NEXT:    daddiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $5
-; M64-NEXT:    and $1, $1, $6
+; M64-NEXT:    andi $2, $4, 1
+; M64-NEXT:    xor $1, $5, $6
+; M64-NEXT:    dnegu $2, $2
+; M64-NEXT:    and $1, $1, $2
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $6, $1
   %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
   ret ptr %result
 }
@@ -151,13 +146,12 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_const_false:
 ; M32:       # %bb.0:
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $zero, $5
+; M32-NEXT:    move $2, $5
 ;
 ; M64-LABEL: test_ctselect_const_false:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $zero, $1
+; M64-NEXT:    sll $2, $5, 0
   %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
   ret i32 %result
 }
@@ -166,29 +160,27 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
 define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_icmp_eq:
 ; M32:       # %bb.0:
-; M32-NEXT:    xor $1, $4, $5
-; M32-NEXT:    sltu $1, $zero, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    xor $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    sltiu $2, $2, 1
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_icmp_eq:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    sltu $1, $zero, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    sltiu $1, $1, 1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cond = icmp eq i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
@@ -197,29 +189,27 @@ define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_icmp_ne:
 ; M32:       # %bb.0:
-; M32-NEXT:    xor $1, $4, $5
-; M32-NEXT:    sltiu $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    xor $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    sltu $2, $zero, $2
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_icmp_ne:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    xor $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    sltiu $1, $1, 1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    sltu $1, $zero, $1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cond = icmp ne i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
@@ -228,29 +218,25 @@ define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_icmp_slt:
 ; M32:       # %bb.0:
-; M32-NEXT:    slt $1, $4, $5
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    slt $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_icmp_slt:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    slt $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    xori $1, $1, 1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cond = icmp slt i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
@@ -259,29 +245,25 @@ define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
 ; M32-LABEL: test_ctselect_icmp_ult:
 ; M32:       # %bb.0:
-; M32-NEXT:    sltu $1, $4, $5
-; M32-NEXT:    xori $1, $1, 1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $1, $6
-; M32-NEXT:    not $1, $1
-; M32-NEXT:    and $1, $1, $7
+; M32-NEXT:    sltu $2, $4, $5
+; M32-NEXT:    xor $1, $6, $7
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $7, $1
 ;
 ; M64-LABEL: test_ctselect_icmp_ult:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
 ; M64-NEXT:    sll $2, $4, 0
-; M64-NEXT:    sll $3, $7, 0
 ; M64-NEXT:    sltu $1, $2, $1
-; M64-NEXT:    sll $2, $6, 0
-; M64-NEXT:    xori $1, $1, 1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $1, $2
-; M64-NEXT:    not $1, $1
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %cond = icmp ult i32 %x, %y
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
@@ -291,28 +273,26 @@ define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
 define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
 ; M32-LABEL: test_ctselect_load:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    lw $2, 0($6)
 ; M32-NEXT:    lw $3, 0($5)
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $3
-; M32-NEXT:    lw $3, 0($6)
-; M32-NEXT:    and $1, $1, $3
+; M32-NEXT:    andi $1, $4, 1
+; M32-NEXT:    negu $1, $1
+; M32-NEXT:    xor $3, $3, $2
+; M32-NEXT:    and $1, $3, $1
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_load:
 ; M64:       # %bb.0:
-; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    lw $3, 0($5)
-; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
+; M64-NEXT:    sll $3, $4, 0
+; M64-NEXT:    lw $1, 0($6)
+; M64-NEXT:    lw $2, 0($5)
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    xor $2, $2, $1
+; M64-NEXT:    negu $3, $3
 ; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    lw $3, 0($6)
-; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $1, $2
   %a = load i32, ptr %p1
   %b = load i32, ptr %p2
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
@@ -323,41 +303,37 @@ define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
 define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
 ; M32-LABEL: test_ctselect_nested:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $5, 1
+; M32-NEXT:    andi $2, $5, 1
+; M32-NEXT:    xor $1, $6, $7
 ; M32-NEXT:    andi $3, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    negu $4, $3
-; M32-NEXT:    and $2, $2, $6
-; M32-NEXT:    and $1, $1, $7
-; M32-NEXT:    or $1, $2, $1
-; M32-NEXT:    addiu $2, $3, -1
-; M32-NEXT:    lw $3, 16($sp)
-; M32-NEXT:    and $1, $4, $1
-; M32-NEXT:    and $2, $2, $3
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    negu $3, $3
+; M32-NEXT:    and $1, $1, $2
+; M32-NEXT:    lw $2, 16($sp)
+; M32-NEXT:    xor $1, $7, $1
+; M32-NEXT:    xor $1, $1, $2
+; M32-NEXT:    and $1, $1, $3
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $1, $2
+; M32-NEXT:    xor $2, $2, $1
 ;
 ; M64-LABEL: test_ctselect_nested:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $5, 0
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    sll $4, $4, 0
+; M64-NEXT:    xor $2, $6, $7
+; M64-NEXT:    sll $3, $4, 0
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    andi $4, $4, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    negu $5, $4
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $7, 0
-; M64-NEXT:    and $1, $1, $3
-; M64-NEXT:    addiu $3, $4, -1
-; M64-NEXT:    or $1, $2, $1
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    andi $3, $3, 1
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    negu $3, $3
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $7, 0
+; M64-NEXT:    xor $1, $2, $1
 ; M64-NEXT:    sll $2, $8, 0
-; M64-NEXT:    and $1, $5, $1
-; M64-NEXT:    and $2, $3, $2
+; M64-NEXT:    xor $1, $1, $2
+; M64-NEXT:    and $1, $1, $3
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $1, $2
+; M64-NEXT:    xor $2, $2, $1
   %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
   %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
   ret i32 %result
diff --git a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
index 6cfa07afdd51e..069100e2d2a79 100644
--- a/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
+++ b/llvm/test/CodeGen/Mips/ctselect-side-effects.ll
@@ -38,26 +38,24 @@ define i32 @test_constant_fold() {
 define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
 ; M32-LABEL: test_protected_no_branch:
 ; M32:       # %bb.0:
-; M32-NEXT:    andi $1, $4, 1
-; M32-NEXT:    negu $2, $1
-; M32-NEXT:    addiu $1, $1, -1
-; M32-NEXT:    and $2, $2, $5
-; M32-NEXT:    and $1, $1, $6
+; M32-NEXT:    andi $2, $4, 1
+; M32-NEXT:    xor $1, $5, $6
+; M32-NEXT:    negu $2, $2
+; M32-NEXT:    and $1, $1, $2
 ; M32-NEXT:    jr $ra
-; M32-NEXT:    or $2, $2, $1
+; M32-NEXT:    xor $2, $6, $1
 ;
 ; M64-LABEL: test_protected_no_branch:
 ; M64:       # %bb.0:
 ; M64-NEXT:    sll $1, $4, 0
-; M64-NEXT:    sll $3, $5, 0
+; M64-NEXT:    xor $2, $5, $6
 ; M64-NEXT:    andi $1, $1, 1
-; M64-NEXT:    negu $2, $1
-; M64-NEXT:    addiu $1, $1, -1
-; M64-NEXT:    and $2, $2, $3
-; M64-NEXT:    sll $3, $6, 0
-; M64-NEXT:    and $1, $1, $3
+; M64-NEXT:    sll $2, $2, 0
+; M64-NEXT:    negu $1, $1
+; M64-NEXT:    and $1, $2, $1
+; M64-NEXT:    sll $2, $6, 0
 ; M64-NEXT:    jr $ra
-; M64-NEXT:    or $2, $2, $1
+; M64-NEXT:    xor $2, $2, $1
   %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
   ret i32 %result
 }