[llvm-branch-commits] [llvm] [ConstantTime][WebAssembly] Add comprehensive tests for ct.select (PR #166709)
Julius Alexandre via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Mar 7 13:34:27 PST 2026
https://github.com/wizardengineer updated https://github.com/llvm/llvm-project/pull/166709
>From b4f4ffbb61aad480a45bfa8c6c4e166e8848320c Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 11:03:23 -0500
Subject: [PATCH 1/2] [ConstantTime][WebAssembly] Add comprehensive tests for
ct.select
---
.../ctselect-fallback-edge-cases.ll | 376 +++++++++
.../WebAssembly/ctselect-fallback-patterns.ll | 641 ++++++++++++++++
.../WebAssembly/ctselect-fallback-vector.ll | 714 ++++++++++++++++++
.../CodeGen/WebAssembly/ctselect-fallback.ll | 552 ++++++++++++++
.../WebAssembly/ctselect-side-effects.ll | 226 ++++++
5 files changed, 2509 insertions(+)
create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
create mode 100644 llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
new file mode 100644
index 0000000000000..b0f7f2807debd
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
@@ -0,0 +1,376 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W64
+
+; Test with small integer types
+define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
+; W32-LABEL: test_ctselect_i1:
+; W32: .functype test_ctselect_i1 (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i1:
+; W64: .functype test_ctselect_i1 (i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
+ ret i1 %result
+}
+
+; Test with extremal values
+define i32 @test_ctselect_extremal_values(i1 %cond) {
+; W32-LABEL: test_ctselect_extremal_values:
+; W32: .functype test_ctselect_extremal_values (i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.const 2147483647
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: i32.const -2147483648
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_extremal_values:
+; W64: .functype test_ctselect_extremal_values (i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 0
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.const 2147483647
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: i32.const -2147483648
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
+ ret i32 %result
+}
+
+; Test with null pointers
+define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
+; W32-LABEL: test_ctselect_null_ptr:
+; W32: .functype test_ctselect_null_ptr (i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_null_ptr:
+; W64: .functype test_ctselect_null_ptr (i32, i64) -> (i64)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i64.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i64.extend_i32_u
+; W64-NEXT: i64.const 1
+; W64-NEXT: i64.and
+; W64-NEXT: i64.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i64.and
+; W64-NEXT: # fallthrough-return
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
+ ret ptr %result
+}
+
+; Test with function pointers
+define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
+; W32-LABEL: test_ctselect_function_ptr:
+; W32: .functype test_ctselect_function_ptr (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_function_ptr:
+; W64: .functype test_ctselect_function_ptr (i32, i64, i64) -> (i64)
+; W64-NEXT: .local i64
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i64.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i64.extend_i32_u
+; W64-NEXT: i64.const 1
+; W64-NEXT: i64.and
+; W64-NEXT: local.tee 3
+; W64-NEXT: i64.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i64.and
+; W64-NEXT: local.get 3
+; W64-NEXT: i64.const -1
+; W64-NEXT: i64.add
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.and
+; W64-NEXT: i64.or
+; W64-NEXT: # fallthrough-return
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
+ ret ptr %result
+}
+
+; Test with condition from icmp on pointers
+define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
+; W32-LABEL: test_ctselect_ptr_cmp:
+; W32: .functype test_ctselect_ptr_cmp (i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.eq
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_ptr_cmp:
+; W64: .functype test_ctselect_ptr_cmp (i64, i64, i64, i64) -> (i64)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i64.const -1
+; W64-NEXT: i64.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i64.eq
+; W64-NEXT: i64.select
+; W64-NEXT: local.tee 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i64.const -1
+; W64-NEXT: i64.xor
+; W64-NEXT: local.get 3
+; W64-NEXT: i64.and
+; W64-NEXT: i64.or
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp eq ptr %p1, %p2
+ %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
+ ret ptr %result
+}
+
+; Test with struct pointer types
+%struct.pair = type { i32, i32 }
+
+define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
+; W32-LABEL: test_ctselect_struct_ptr:
+; W32: .functype test_ctselect_struct_ptr (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_struct_ptr:
+; W64: .functype test_ctselect_struct_ptr (i32, i64, i64) -> (i64)
+; W64-NEXT: .local i64
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i64.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i64.extend_i32_u
+; W64-NEXT: i64.const 1
+; W64-NEXT: i64.and
+; W64-NEXT: local.tee 3
+; W64-NEXT: i64.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i64.and
+; W64-NEXT: local.get 3
+; W64-NEXT: i64.const -1
+; W64-NEXT: i64.add
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.and
+; W64-NEXT: i64.or
+; W64-NEXT: # fallthrough-return
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+ ret ptr %result
+}
+
+; Test with deeply nested conditions
+define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; W32-LABEL: test_ctselect_deeply_nested:
+; W32: .functype test_ctselect_deeply_nested (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 3
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 2
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 1
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 4
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 5
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 6
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 7
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 8
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_deeply_nested:
+; W64: .functype test_ctselect_deeply_nested (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 3
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 2
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 1
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 0
+; W64-NEXT: i32.sub
+; W64-NEXT: local.get 4
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 5
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 6
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 7
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 8
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+ %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+ %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+ %sel4 = call i32 @llvm.ct.select.i32(i1 %c4, i32 %sel3, i32 %e)
+ ret i32 %sel4
+}
+
+; Declare the intrinsics
+declare i1 @llvm.ct.select.i1(i1, i1, i1)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
new file mode 100644
index 0000000000000..040ee44addb69
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
@@ -0,0 +1,641 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W64
+
+; Test smin(x, 0) pattern
+define i32 @test_ctselect_smin_zero(i32 %x) {
+; W32-LABEL: test_ctselect_smin_zero:
+; W32: .functype test_ctselect_smin_zero (i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 31
+; W32-NEXT: i32.shr_s
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.and
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_smin_zero:
+; W64: .functype test_ctselect_smin_zero (i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 31
+; W64-NEXT: i32.shr_s
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.and
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp slt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+ ret i32 %result
+}
+
+; Test smax(x, 0) pattern
+define i32 @test_ctselect_smax_zero(i32 %x) {
+; W32-LABEL: test_ctselect_smax_zero:
+; W32: .functype test_ctselect_smax_zero (i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 0
+; W32-NEXT: i32.gt_s
+; W32-NEXT: i32.select
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_smax_zero:
+; W64: .functype test_ctselect_smax_zero (i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 0
+; W64-NEXT: i32.gt_s
+; W64-NEXT: i32.select
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp sgt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+ ret i32 %result
+}
+
+; Test generic smin pattern
+define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
+; W32-LABEL: test_ctselect_smin_generic:
+; W32: .functype test_ctselect_smin_generic (i32, i32) -> (i32)
+; W32-NEXT: .local i32
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.lt_s
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 2
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_smin_generic:
+; W64: .functype test_ctselect_smin_generic (i32, i32) -> (i32)
+; W64-NEXT: .local i32
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.lt_s
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 2
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp slt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+ ret i32 %result
+}
+
+; Test generic smax pattern
+define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
+; W32-LABEL: test_ctselect_smax_generic:
+; W32: .functype test_ctselect_smax_generic (i32, i32) -> (i32)
+; W32-NEXT: .local i32
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.gt_s
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 2
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_smax_generic:
+; W64: .functype test_ctselect_smax_generic (i32, i32) -> (i32)
+; W64-NEXT: .local i32
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.gt_s
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 2
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp sgt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+ ret i32 %result
+}
+
+; Test umin pattern
+define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
+; W32-LABEL: test_ctselect_umin_generic:
+; W32: .functype test_ctselect_umin_generic (i32, i32) -> (i32)
+; W32-NEXT: .local i32
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.lt_u
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 2
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_umin_generic:
+; W64: .functype test_ctselect_umin_generic (i32, i32) -> (i32)
+; W64-NEXT: .local i32
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.lt_u
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 2
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp ult i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+ ret i32 %result
+}
+
+; Test umax pattern
+define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
+; W32-LABEL: test_ctselect_umax_generic:
+; W32: .functype test_ctselect_umax_generic (i32, i32) -> (i32)
+; W32-NEXT: .local i32
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.gt_u
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 2
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_umax_generic:
+; W64: .functype test_ctselect_umax_generic (i32, i32) -> (i32)
+; W64-NEXT: .local i32
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.gt_u
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 2
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp ugt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+ ret i32 %result
+}
+
+; Test abs pattern
+define i32 @test_ctselect_abs(i32 %x) {
+; W32-LABEL: test_ctselect_abs:
+; W32: .functype test_ctselect_abs (i32) -> (i32)
+; W32-NEXT: .local i32
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 31
+; W32-NEXT: i32.shr_s
+; W32-NEXT: local.tee 1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_abs:
+; W64: .functype test_ctselect_abs (i32) -> (i32)
+; W64-NEXT: .local i32
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 31
+; W64-NEXT: i32.shr_s
+; W64-NEXT: local.tee 1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %neg = sub i32 0, %x
+ %cmp = icmp slt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x)
+ ret i32 %result
+}
+
+; Test nabs pattern (negative abs)
+define i32 @test_ctselect_nabs(i32 %x) {
+; W32-LABEL: test_ctselect_nabs:
+; W32: .functype test_ctselect_nabs (i32) -> (i32)
+; W32-NEXT: .local i32
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 31
+; W32-NEXT: i32.shr_s
+; W32-NEXT: local.tee 1
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_nabs:
+; W64: .functype test_ctselect_nabs (i32) -> (i32)
+; W64-NEXT: .local i32
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 31
+; W64-NEXT: i32.shr_s
+; W64-NEXT: local.tee 1
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %neg = sub i32 0, %x
+ %cmp = icmp slt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg)
+ ret i32 %result
+}
+
+; Test sign extension pattern
+define i32 @test_ctselect_sign_extend(i32 %x) {
+; W32-LABEL: test_ctselect_sign_extend:
+; W32: .functype test_ctselect_sign_extend (i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 31
+; W32-NEXT: i32.shr_s
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_sign_extend:
+; W64: .functype test_ctselect_sign_extend (i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 31
+; W64-NEXT: i32.shr_s
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp slt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0)
+ ret i32 %result
+}
+
+; Test zero extension pattern
+define i32 @test_ctselect_zero_extend(i32 %x) {
+; W32-LABEL: test_ctselect_zero_extend:
+; W32: .functype test_ctselect_zero_extend (i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 0
+; W32-NEXT: i32.ne
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_zero_extend:
+; W64: .functype test_ctselect_zero_extend (i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 0
+; W64-NEXT: i32.ne
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp ne i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 1, i32 0)
+ ret i32 %result
+}
+
+; Test constant folding with known condition
+define i32 @test_ctselect_constant_folding_true(i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_constant_folding_true:
+; W32: .functype test_ctselect_constant_folding_true (i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_constant_folding_true:
+; W64: .functype test_ctselect_constant_folding_true (i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: # fallthrough-return
+ %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_constant_folding_false:
+; W32: .functype test_ctselect_constant_folding_false (i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_constant_folding_false:
+; W64: .functype test_ctselect_constant_folding_false (i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test with identical operands
+define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
+; W32-LABEL: test_ctselect_identical_operands:
+; W32: .functype test_ctselect_identical_operands (i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_identical_operands:
+; W64: .functype test_ctselect_identical_operands (i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 0
+; W64-NEXT: i32.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
+ ret i32 %result
+}
+
+; Test with inverted condition
+define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_inverted_condition:
+; W32: .functype test_ctselect_inverted_condition (i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.ne
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_inverted_condition:
+; W64: .functype test_ctselect_inverted_condition (i32, i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.ne
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp eq i32 %x, %y
+ %not_cmp = xor i1 %cmp, true
+ %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test chain of ct.select operations
+define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) {
+; W32-LABEL: test_ctselect_chain:
+; W32: .functype test_ctselect_chain (i32, i32, i32, i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 2
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 1
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 4
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 5
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 6
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_chain:
+; W64: .functype test_ctselect_chain (i32, i32, i32, i32, i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 2
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 1
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 0
+; W64-NEXT: i32.sub
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 4
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 5
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 6
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+ %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+ %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+ ret i32 %sel3
+}
+
+; Test for 64-bit operations (supported on all 64-bit architectures)
+define i64 @test_ctselect_i64_smin_zero(i64 %x) {
+; W32-LABEL: test_ctselect_i64_smin_zero:
+; W32: .functype test_ctselect_i64_smin_zero (i64) -> (i64)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: i64.const 63
+; W32-NEXT: i64.shr_s
+; W32-NEXT: local.get 0
+; W32-NEXT: i64.and
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i64_smin_zero:
+; W64: .functype test_ctselect_i64_smin_zero (i64) -> (i64)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: i64.const 63
+; W64-NEXT: i64.shr_s
+; W64-NEXT: local.get 0
+; W64-NEXT: i64.and
+; W64-NEXT: # fallthrough-return
+ %cmp = icmp slt i64 %x, 0
+ %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
+ ret i64 %result
+}
+
+; Declare the intrinsics
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
new file mode 100644
index 0000000000000..75e38e1856a03
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
@@ -0,0 +1,714 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -mattr=+simd128 | FileCheck %s --check-prefix=WASM32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -mattr=+simd128 | FileCheck %s --check-prefix=WASM64
+
+; Test 32-bit integer vector (4 x i32 = 128-bit)
+define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
+; WASM32-LABEL: test_ctselect_v4i32:
+; WASM32: .functype test_ctselect_v4i32 (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32:
+; WASM64: .functype test_ctselect_v4i32 (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %result
+}
+
+; Test 16-bit integer vector (8 x i16 = 128-bit)
+define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
+; WASM32-LABEL: test_ctselect_v8i16:
+; WASM32: .functype test_ctselect_v8i16 (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i16x8.splat
+; WASM32-NEXT: i32.const 15
+; WASM32-NEXT: i16x8.shl
+; WASM32-NEXT: i32.const 15
+; WASM32-NEXT: i16x8.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v8i16:
+; WASM64: .functype test_ctselect_v8i16 (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i16x8.splat
+; WASM64-NEXT: i32.const 15
+; WASM64-NEXT: i16x8.shl
+; WASM64-NEXT: i32.const 15
+; WASM64-NEXT: i16x8.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %result = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %result
+}
+
+; Test byte vector (16 x i8 = 128-bit)
+define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
+; WASM32-LABEL: test_ctselect_v16i8:
+; WASM32: .functype test_ctselect_v16i8 (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i8x16.splat
+; WASM32-NEXT: i32.const 7
+; WASM32-NEXT: i8x16.shl
+; WASM32-NEXT: i32.const 7
+; WASM32-NEXT: i8x16.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v16i8:
+; WASM64: .functype test_ctselect_v16i8 (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i8x16.splat
+; WASM64-NEXT: i32.const 7
+; WASM64-NEXT: i8x16.shl
+; WASM64-NEXT: i32.const 7
+; WASM64-NEXT: i8x16.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %result
+}
+
+; Test 64-bit integer vector (2 x i64 = 128-bit)
+define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
+; WASM32-LABEL: test_ctselect_v2i64:
+; WASM32: .functype test_ctselect_v2i64 (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 63
+; WASM32-NEXT: i64x2.shl
+; WASM32-NEXT: i32.const 63
+; WASM32-NEXT: i64x2.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v2i64:
+; WASM64: .functype test_ctselect_v2i64 (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 63
+; WASM64-NEXT: i64x2.shl
+; WASM64-NEXT: i32.const 63
+; WASM64-NEXT: i64x2.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %result = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %result
+}
+
+; Test single-precision float vector (4 x float = 128-bit)
+define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) {
+; WASM32-LABEL: test_ctselect_v4f32:
+; WASM32: .functype test_ctselect_v4f32 (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4f32:
+; WASM64: .functype test_ctselect_v4f32 (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b)
+ ret <4 x float> %result
+}
+
+; Test double-precision float vector (2 x double = 128-bit)
+define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) {
+; WASM32-LABEL: test_ctselect_v2f64:
+; WASM32: .functype test_ctselect_v2f64 (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 63
+; WASM32-NEXT: i64x2.shl
+; WASM32-NEXT: i32.const 63
+; WASM32-NEXT: i64x2.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v2f64:
+; WASM64: .functype test_ctselect_v2f64 (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 63
+; WASM64-NEXT: i64x2.shl
+; WASM64-NEXT: i32.const 63
+; WASM64-NEXT: i64x2.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %result = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b)
+ ret <2 x double> %result
+}
+
+; Test with aligned loads (common case)
+define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
+; WASM32-LABEL: test_ctselect_v4i32_aligned_load:
+; WASM32: .functype test_ctselect_v4i32_aligned_load (i32, i32, i32) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.load 0
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.load 0
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_aligned_load:
+; WASM64: .functype test_ctselect_v4i32_aligned_load (i32, i64, i64) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.load 0
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.load 0
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %a = load <4 x i32>, ptr %p1, align 16
+ %b = load <4 x i32>, ptr %p2, align 16
+ %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %result
+}
+
+; Test with unaligned loads (stress test)
+define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2) {
+; WASM32-LABEL: test_ctselect_v4i32_unaligned_load:
+; WASM32: .functype test_ctselect_v4i32_unaligned_load (i32, i32, i32) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.load 0:p2align=2
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.load 0:p2align=2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_unaligned_load:
+; WASM64: .functype test_ctselect_v4i32_unaligned_load (i32, i64, i64) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.load 0:p2align=2
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.load 0:p2align=2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %a = load <4 x i32>, ptr %p1, align 4
+ %b = load <4 x i32>, ptr %p2, align 4
+ %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %result
+}
+
+; Test with stores to verify result handling
+define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) {
+; WASM32-LABEL: test_ctselect_v4i32_store:
+; WASM32: .functype test_ctselect_v4i32_store (i32, v128, v128, i32) -> ()
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 4
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 4
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.store 0
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_store:
+; WASM64: .functype test_ctselect_v4i32_store (i32, v128, v128, i64) -> ()
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 4
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 4
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.store 0
+; WASM64-NEXT: # fallthrough-return
+ %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+ store <4 x i32> %result, ptr %out, align 16
+ ret void
+}
+
+; Test chained selects (multiple conditions)
+define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; WASM32-LABEL: test_ctselect_v4i32_chain:
+; WASM32: .functype test_ctselect_v4i32_chain (i32, i32, v128, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128, v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 5
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 6
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: local.get 6
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 4
+; WASM32-NEXT: local.get 5
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_chain:
+; WASM64: .functype test_ctselect_v4i32_chain (i32, i32, v128, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128, v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 5
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 6
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: local.get 6
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 4
+; WASM64-NEXT: local.get 5
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %tmp = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond1, <4 x i32> %a, <4 x i32> %b)
+ %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond2, <4 x i32> %tmp, <4 x i32> %c)
+ ret <4 x i32> %result
+}
+
+; Test with arithmetic operations (ensure float vectors work with FP ops)
+define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4 x float> %y) {
+; WASM32-LABEL: test_ctselect_v4f32_arithmetic:
+; WASM32: .functype test_ctselect_v4f32_arithmetic (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: f32x4.add
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: f32x4.sub
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4f32_arithmetic:
+; WASM64: .functype test_ctselect_v4f32_arithmetic (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: f32x4.add
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: f32x4.sub
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %sum = fadd <4 x float> %x, %y
+ %diff = fsub <4 x float> %x, %y
+ %result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %sum, <4 x float> %diff)
+ ret <4 x float> %result
+}
+
+; Test with zero vectors
+define <4 x i32> @test_ctselect_v4i32_zeros(i1 %cond, <4 x i32> %a) {
+; WASM32-LABEL: test_ctselect_v4i32_zeros:
+; WASM32: .functype test_ctselect_v4i32_zeros (i32, v128) -> (v128)
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_zeros:
+; WASM64: .functype test_ctselect_v4i32_zeros (i32, v128) -> (v128)
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: # fallthrough-return
+ %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond,
+ <4 x i32> %a,
+ <4 x i32> zeroinitializer)
+ ret <4 x i32> %result
+}
+
+; Test with function arguments directly (no loads)
+define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind {
+; WASM32-LABEL: test_ctselect_v4i32_args:
+; WASM32: .functype test_ctselect_v4i32_args (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_args:
+; WASM64: .functype test_ctselect_v4i32_args (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %result
+}
+
+; Test with multiple uses of result
+define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
+; WASM32-LABEL: test_ctselect_v4i32_multi_use:
+; WASM32: .functype test_ctselect_v4i32_multi_use (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i32x4.splat
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shl
+; WASM32-NEXT: i32.const 31
+; WASM32-NEXT: i32x4.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: local.tee 1
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: i32x4.add
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v4i32_multi_use:
+; WASM64: .functype test_ctselect_v4i32_multi_use (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i32x4.splat
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shl
+; WASM64-NEXT: i32.const 31
+; WASM64-NEXT: i32x4.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: local.tee 1
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: i32x4.add
+; WASM64-NEXT: # fallthrough-return
+ %sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+ %add = add <4 x i32> %sel, %sel ; Use result twice
+ ret <4 x i32> %add
+}
+
+; Test byte vector with operations
+define <16 x i8> @test_ctselect_v16i8_ops(i1 %cond, <16 x i8> %x, <16 x i8> %y) {
+; WASM32-LABEL: test_ctselect_v16i8_ops:
+; WASM32: .functype test_ctselect_v16i8_ops (i32, v128, v128) -> (v128)
+; WASM32-NEXT: .local v128
+; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 0
+; WASM32-NEXT: i8x16.splat
+; WASM32-NEXT: i32.const 7
+; WASM32-NEXT: i8x16.shl
+; WASM32-NEXT: i32.const 7
+; WASM32-NEXT: i8x16.shr_s
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.andnot
+; WASM32-NEXT: v128.or
+; WASM32-NEXT: # fallthrough-return
+;
+; WASM64-LABEL: test_ctselect_v16i8_ops:
+; WASM64: .functype test_ctselect_v16i8_ops (i32, v128, v128) -> (v128)
+; WASM64-NEXT: .local v128
+; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 0
+; WASM64-NEXT: i8x16.splat
+; WASM64-NEXT: i32.const 7
+; WASM64-NEXT: i8x16.shl
+; WASM64-NEXT: i32.const 7
+; WASM64-NEXT: i8x16.shr_s
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.andnot
+; WASM64-NEXT: v128.or
+; WASM64-NEXT: # fallthrough-return
+ %xor = xor <16 x i8> %x, %y
+ %and = and <16 x i8> %x, %y
+ %result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %xor, <16 x i8> %and)
+ ret <16 x i8> %result
+}
+
+declare <4 x i32> @llvm.ct.select.v4i32(i1, <4 x i32>, <4 x i32>)
+declare <8 x i16> @llvm.ct.select.v8i16(i1, <8 x i16>, <8 x i16>)
+declare <16 x i8> @llvm.ct.select.v16i8(i1, <16 x i8>, <16 x i8>)
+declare <2 x i64> @llvm.ct.select.v2i64(i1, <2 x i64>, <2 x i64>)
+declare <4 x float> @llvm.ct.select.v4f32(i1, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.ct.select.v2f64(i1, <2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
new file mode 100644
index 0000000000000..bd318960536df
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
@@ -0,0 +1,552 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W64
+
+; Test basic ct.select functionality for scalar types
+define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
+; W32-LABEL: test_ctselect_i8:
+; W32: .functype test_ctselect_i8 (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i8:
+; W64: .functype test_ctselect_i8 (i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.xor
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.xor
+; W64-NEXT: # fallthrough-return
+ %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
+ ret i8 %result
+}
+
+define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
+; W32-LABEL: test_ctselect_i16:
+; W32: .functype test_ctselect_i16 (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i16:
+; W64: .functype test_ctselect_i16 (i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.xor
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.xor
+; W64-NEXT: # fallthrough-return
+ %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
+ ret i16 %result
+}
+
+define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_i32:
+; W32: .functype test_ctselect_i32 (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i32:
+; W64: .functype test_ctselect_i32 (i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 0
+; W64-NEXT: i32.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
+; W32-LABEL: test_ctselect_i64:
+; W32: .functype test_ctselect_i64 (i32, i64, i64) -> (i64)
+; W32-NEXT: .local i64
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i64.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i64.extend_i32_u
+; W32-NEXT: i64.const 1
+; W32-NEXT: i64.and
+; W32-NEXT: local.tee 3
+; W32-NEXT: i64.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i64.and
+; W32-NEXT: local.get 3
+; W32-NEXT: i64.const -1
+; W32-NEXT: i64.add
+; W32-NEXT: local.get 2
+; W32-NEXT: i64.and
+; W32-NEXT: i64.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_i64:
+; W64: .functype test_ctselect_i64 (i32, i64, i64) -> (i64)
+; W64-NEXT: .local i64
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i64.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i64.extend_i32_u
+; W64-NEXT: i64.const 1
+; W64-NEXT: i64.and
+; W64-NEXT: local.tee 3
+; W64-NEXT: i64.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i64.and
+; W64-NEXT: local.get 3
+; W64-NEXT: i64.const -1
+; W64-NEXT: i64.add
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.and
+; W64-NEXT: i64.or
+; W64-NEXT: # fallthrough-return
+ %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
+ ret i64 %result
+}
+
+define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
+; W32-LABEL: test_ctselect_ptr:
+; W32: .functype test_ctselect_ptr (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_ptr:
+; W64: .functype test_ctselect_ptr (i32, i64, i64) -> (i64)
+; W64-NEXT: .local i64
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i64.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i64.extend_i32_u
+; W64-NEXT: i64.const 1
+; W64-NEXT: i64.and
+; W64-NEXT: local.tee 3
+; W64-NEXT: i64.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i64.and
+; W64-NEXT: local.get 3
+; W64-NEXT: i64.const -1
+; W64-NEXT: i64.add
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.and
+; W64-NEXT: i64.or
+; W64-NEXT: # fallthrough-return
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+ ret ptr %result
+}
+
+; Test with constant conditions
+define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_const_true:
+; W32: .functype test_ctselect_const_true (i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_const_true:
+; W64: .functype test_ctselect_const_true (i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: # fallthrough-return
+ %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_const_false:
+; W32: .functype test_ctselect_const_false (i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_const_false:
+; W64: .functype test_ctselect_const_false (i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test with comparison conditions
+define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_icmp_eq:
+; W32: .functype test_ctselect_icmp_eq (i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.eq
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_icmp_eq:
+; W64: .functype test_ctselect_icmp_eq (i32, i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.eq
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cond = icmp eq i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_icmp_ne:
+; W32: .functype test_ctselect_icmp_ne (i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.ne
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_icmp_ne:
+; W64: .functype test_ctselect_icmp_ne (i32, i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.ne
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cond = icmp ne i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_icmp_slt:
+; W32: .functype test_ctselect_icmp_slt (i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.lt_s
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_icmp_slt:
+; W64: .functype test_ctselect_icmp_slt (i32, i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.lt_s
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cond = icmp slt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
+; W32-LABEL: test_ctselect_icmp_ult:
+; W32: .functype test_ctselect_icmp_ult (i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.lt_u
+; W32-NEXT: i32.select
+; W32-NEXT: local.tee 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_icmp_ult:
+; W64: .functype test_ctselect_icmp_ult (i32, i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.lt_u
+; W64-NEXT: i32.select
+; W64-NEXT: local.tee 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %cond = icmp ult i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test with memory operands
+define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
+; W32-LABEL: test_ctselect_load:
+; W32: .functype test_ctselect_load (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.load 0
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.load 0
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_load:
+; W64: .functype test_ctselect_load (i32, i64, i64) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 0
+; W64-NEXT: i32.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.load 0
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.load 0
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %a = load i32, ptr %p1
+ %b = load i32, ptr %p2
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test nested ctselect calls
+define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
+; W32-LABEL: test_ctselect_nested:
+; W32: .functype test_ctselect_nested (i32, i32, i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 1
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 4
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_ctselect_nested:
+; W64: .functype test_ctselect_nested (i32, i32, i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 0
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 1
+; W64-NEXT: i32.sub
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 4
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
+ %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
+ ret i32 %result
+}
+
+; Declare the intrinsics
+declare i8 @llvm.ct.select.i8(i1, i8, i8)
+declare i16 @llvm.ct.select.i16(i1, i16, i16)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll b/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll
new file mode 100644
index 0000000000000..5b20e892c64d2
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll
@@ -0,0 +1,226 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=wasm32-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W32
+; RUN: llc < %s -mtriple=wasm64-unknown-unknown -O3 -filetype=asm | FileCheck %s --check-prefix=W64
+
+; Test 1: Basic optimizations should still work
+define i32 @test_basic_opts(i32 %x) {
+; W32-LABEL: test_basic_opts:
+; W32: .functype test_basic_opts (i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_basic_opts:
+; W64: .functype test_basic_opts (i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: # fallthrough-return
+ %a = or i32 %x, 0 ; Should eliminate
+ %b = and i32 %a, -1 ; Should eliminate
+ %c = xor i32 %b, 0 ; Should eliminate
+ ret i32 %c
+}
+
+; Test 2: Constant folding should work
+define i32 @test_constant_fold() {
+; W32-LABEL: test_constant_fold:
+; W32: .functype test_constant_fold () -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_constant_fold:
+; W64: .functype test_constant_fold () -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: # fallthrough-return
+ %a = xor i32 -1, -1 ; Should fold to 0
+ ret i32 %a
+}
+
+; Test 3: Protected pattern should NOT have branches
+define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
+; W32-LABEL: test_protected_no_branch:
+; W32: .functype test_protected_no_branch (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.tee 0
+; W32-NEXT: i32.sub
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.and
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const -1
+; W32-NEXT: i32.add
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.and
+; W32-NEXT: i32.or
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_protected_no_branch:
+; W64: .functype test_protected_no_branch (i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.tee 0
+; W64-NEXT: i32.sub
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.and
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const -1
+; W64-NEXT: i32.add
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.and
+; W64-NEXT: i32.or
+; W64-NEXT: # fallthrough-return
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test 4: Explicit branch should still generate branches
+define i32 @test_explicit_branch(i1 %cond, i32 %a, i32 %b) {
+; W32-LABEL: test_explicit_branch:
+; W32: .functype test_explicit_branch (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: block
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.eqz
+; W32-NEXT: br_if 0 # 0: down to label0
+; W32-NEXT: # %bb.1: # %true
+; W32-NEXT: local.get 1
+; W32-NEXT: return
+; W32-NEXT: .LBB3_2: # %false
+; W32-NEXT: end_block # label0:
+; W32-NEXT: local.get 2
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_explicit_branch:
+; W64: .functype test_explicit_branch (i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: block
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.eqz
+; W64-NEXT: br_if 0 # 0: down to label0
+; W64-NEXT: # %bb.1: # %true
+; W64-NEXT: local.get 1
+; W64-NEXT: return
+; W64-NEXT: .LBB3_2: # %false
+; W64-NEXT: end_block # label0:
+; W64-NEXT: local.get 2
+; W64-NEXT: # fallthrough-return
+ br i1 %cond, label %true, label %false
+true:
+ ret i32 %a
+false:
+ ret i32 %b
+}
+
+; Test 5: Regular select (not ct.select) - whatever wasm wants to do
+define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) {
+; W32-LABEL: test_regular_select:
+; W32: .functype test_regular_select (i32, i32, i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.select
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_regular_select:
+; W64: .functype test_regular_select (i32, i32, i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 1
+; W64-NEXT: i32.and
+; W64-NEXT: i32.select
+; W64-NEXT: # fallthrough-return
+ %result = select i1 %cond, i32 %a, i32 %b
+ ret i32 %result
+}
+
+; Test if XOR with all-ones still gets optimized
+define i32 @test_xor_all_ones() {
+; W32-LABEL: test_xor_all_ones:
+; W32: .functype test_xor_all_ones () -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_xor_all_ones:
+; W64: .functype test_xor_all_ones () -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: # fallthrough-return
+ %xor1 = xor i32 -1, -1 ; Should optimize to 0
+ ret i32 %xor1
+}
+
+define i32 @test_xor_same_value(i32 %x) {
+; W32-LABEL: test_xor_same_value:
+; W32: .functype test_xor_same_value (i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_xor_same_value:
+; W64: .functype test_xor_same_value (i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: # fallthrough-return
+ %xor2 = xor i32 %x, %x ; Should optimize to 0
+ ret i32 %xor2
+}
+
+define i32 @test_normal_ops(i32 %x) {
+; W32-LABEL: test_normal_ops:
+; W32: .functype test_normal_ops (i32) -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 0
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_normal_ops:
+; W64: .functype test_normal_ops (i32) -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 0
+; W64-NEXT: # fallthrough-return
+ %or1 = or i32 %x, 0
+ %and1 = and i32 %or1, -1
+ %xor1 = xor i32 %and1, 0
+ ret i32 %xor1
+}
+
+; This simulates what the reviewer is worried about
+define i32 @test_xor_with_const_operands() {
+; W32-LABEL: test_xor_with_const_operands:
+; W32: .functype test_xor_with_const_operands () -> (i32)
+; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
+; W32-NEXT: # fallthrough-return
+;
+; W64-LABEL: test_xor_with_const_operands:
+; W64: .functype test_xor_with_const_operands () -> (i32)
+; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
+; W64-NEXT: # fallthrough-return
+ %a = xor i32 -1, -1
+ %b = xor i32 0, 0
+ %c = xor i32 42, 42
+ %result = or i32 %a, %b
+ %final = or i32 %result, %c
+ ret i32 %final ; Should optimize to 0
+}
+
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+
>From edf4afdb42c6b82b883cb73330421e6cb874e423 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Sat, 7 Mar 2026 15:39:12 -0500
Subject: [PATCH 2/2] [LLVM][WebAssembly] Regenerate ct.select test CHECK lines
Update CHECK lines to match the new constant-time AND/OR/XOR expansion
from the CT_SELECT legalization fix.
---
.../ctselect-fallback-edge-cases.ll | 253 ++++-------
.../WebAssembly/ctselect-fallback-patterns.ll | 350 ++++++---------
.../WebAssembly/ctselect-fallback-vector.ll | 422 ++++++++----------
.../CodeGen/WebAssembly/ctselect-fallback.ll | 302 +++++--------
.../WebAssembly/ctselect-side-effects.ll | 26 +-
5 files changed, 542 insertions(+), 811 deletions(-)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
index b0f7f2807debd..99bfae3b13ed0 100644
--- a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-edge-cases.ll
@@ -7,29 +7,33 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
; W32-LABEL: test_ctselect_i1:
; W32: .functype test_ctselect_i1 (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: local.get 0
+; W32-NEXT: local.get 2
; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
+; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
-; W32-NEXT: i32.xor
-; W32-NEXT: local.get 2
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_i1:
; W64: .functype test_ctselect_i1 (i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: local.get 0
+; W64-NEXT: local.get 2
; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.xor
+; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i32.const 1
-; W64-NEXT: i32.xor
-; W64-NEXT: local.get 2
; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
ret i1 %result
@@ -40,39 +44,21 @@ define i32 @test_ctselect_extremal_values(i1 %cond) {
; W32-LABEL: test_ctselect_extremal_values:
; W32: .functype test_ctselect_extremal_values (i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const 0
+; W32-NEXT: i32.const -2147483648
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: i32.const 2147483647
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: i32.const -2147483648
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_extremal_values:
; W64: .functype test_ctselect_extremal_values (i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const 0
+; W64-NEXT: i32.const -2147483648
; W64-NEXT: local.get 0
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 0
; W64-NEXT: i32.sub
-; W64-NEXT: i32.const 2147483647
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: i32.const -2147483648
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
ret i32 %result
@@ -83,25 +69,25 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
; W32-LABEL: test_ctselect_null_ptr:
; W32: .functype test_ctselect_null_ptr (i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 1
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 1
; W32-NEXT: i32.and
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_null_ptr:
; W64: .functype test_ctselect_null_ptr (i32, i64) -> (i64)
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 1
; W64-NEXT: i64.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i64.extend_i32_u
; W64-NEXT: i64.const 1
; W64-NEXT: i64.and
; W64-NEXT: i64.sub
-; W64-NEXT: local.get 1
; W64-NEXT: i64.and
; W64-NEXT: # fallthrough-return
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
@@ -113,41 +99,34 @@ define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
; W32-LABEL: test_ctselect_function_ptr:
; W32: .functype test_ctselect_function_ptr (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 2
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_function_ptr:
; W64: .functype test_ctselect_function_ptr (i32, i64, i64) -> (i64)
-; W64-NEXT: .local i64
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.xor
; W64-NEXT: i64.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i64.extend_i32_u
; W64-NEXT: i64.const 1
; W64-NEXT: i64.and
-; W64-NEXT: local.tee 3
; W64-NEXT: i64.sub
-; W64-NEXT: local.get 1
; W64-NEXT: i64.and
-; W64-NEXT: local.get 3
-; W64-NEXT: i64.const -1
-; W64-NEXT: i64.add
-; W64-NEXT: local.get 2
-; W64-NEXT: i64.and
-; W64-NEXT: i64.or
+; W64-NEXT: i64.xor
; W64-NEXT: # fallthrough-return
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
ret ptr %result
@@ -158,41 +137,34 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
; W32-LABEL: test_ctselect_ptr_cmp:
; W32: .functype test_ctselect_ptr_cmp (i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 3
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.eq
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 1
-; W32-NEXT: local.get 2
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_ptr_cmp:
; W64: .functype test_ctselect_ptr_cmp (i64, i64, i64, i64) -> (i64)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i64.const -1
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 3
+; W64-NEXT: i64.xor
; W64-NEXT: i64.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i64.eq
-; W64-NEXT: i64.select
-; W64-NEXT: local.tee 1
-; W64-NEXT: local.get 2
+; W64-NEXT: i64.extend_i32_u
+; W64-NEXT: i64.sub
; W64-NEXT: i64.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i64.const -1
; W64-NEXT: i64.xor
-; W64-NEXT: local.get 3
-; W64-NEXT: i64.and
-; W64-NEXT: i64.or
; W64-NEXT: # fallthrough-return
%cmp = icmp eq ptr %p1, %p2
%result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
@@ -206,41 +178,34 @@ define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
; W32-LABEL: test_ctselect_struct_ptr:
; W32: .functype test_ctselect_struct_ptr (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 1
; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_struct_ptr:
; W64: .functype test_ctselect_struct_ptr (i32, i64, i64) -> (i64)
-; W64-NEXT: .local i64
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.xor
; W64-NEXT: i64.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i64.extend_i32_u
; W64-NEXT: i64.const 1
; W64-NEXT: i64.and
-; W64-NEXT: local.tee 3
; W64-NEXT: i64.sub
-; W64-NEXT: local.get 1
; W64-NEXT: i64.and
-; W64-NEXT: local.get 3
-; W64-NEXT: i64.const -1
-; W64-NEXT: i64.add
-; W64-NEXT: local.get 2
-; W64-NEXT: i64.and
-; W64-NEXT: i64.or
+; W64-NEXT: i64.xor
; W64-NEXT: # fallthrough-return
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
ret ptr %result
@@ -251,117 +216,93 @@ define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a,
; W32-LABEL: test_ctselect_deeply_nested:
; W32: .functype test_ctselect_deeply_nested (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 8
+; W32-NEXT: local.get 7
+; W32-NEXT: local.get 6
+; W32-NEXT: local.get 5
+; W32-NEXT: local.get 4
+; W32-NEXT: local.get 5
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
-; W32-NEXT: local.get 3
+; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 3
; W32-NEXT: i32.sub
-; W32-NEXT: i32.const 0
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 2
-; W32-NEXT: i32.sub
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 6
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 1
; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 7
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
-; W32-NEXT: local.get 0
+; W32-NEXT: local.get 2
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 4
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 5
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 6
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 7
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: i32.and
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
+; W32-NEXT: i32.xor
; W32-NEXT: local.get 8
+; W32-NEXT: i32.xor
+; W32-NEXT: i32.const 0
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.const 1
+; W32-NEXT: i32.and
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_deeply_nested:
; W64: .functype test_ctselect_deeply_nested (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 8
+; W64-NEXT: local.get 7
+; W64-NEXT: local.get 6
+; W64-NEXT: local.get 5
+; W64-NEXT: local.get 4
+; W64-NEXT: local.get 5
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
-; W64-NEXT: local.get 3
+; W64-NEXT: local.get 0
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 3
; W64-NEXT: i32.sub
-; W64-NEXT: i32.const 0
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 2
-; W64-NEXT: i32.sub
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 6
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 1
; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 7
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
-; W64-NEXT: local.get 0
+; W64-NEXT: local.get 2
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 0
; W64-NEXT: i32.sub
-; W64-NEXT: local.get 4
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 5
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 6
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 7
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: i32.and
-; W64-NEXT: local.get 3
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
+; W64-NEXT: i32.xor
; W64-NEXT: local.get 8
+; W64-NEXT: i32.xor
+; W64-NEXT: i32.const 0
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
%sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
index 040ee44addb69..3bd2bf6ed609e 100644
--- a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-patterns.ll
@@ -8,9 +8,11 @@ define i32 @test_ctselect_smin_zero(i32 %x) {
; W32: .functype test_ctselect_smin_zero (i32) -> (i32)
; W32-NEXT: # %bb.0:
; W32-NEXT: local.get 0
-; W32-NEXT: i32.const 31
-; W32-NEXT: i32.shr_s
+; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 0
+; W32-NEXT: i32.lt_s
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
; W32-NEXT: # fallthrough-return
;
@@ -18,9 +20,11 @@ define i32 @test_ctselect_smin_zero(i32 %x) {
; W64: .functype test_ctselect_smin_zero (i32) -> (i32)
; W64-NEXT: # %bb.0:
; W64-NEXT: local.get 0
-; W64-NEXT: i32.const 31
-; W64-NEXT: i32.shr_s
+; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 0
+; W64-NEXT: i32.lt_s
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
; W64-NEXT: # fallthrough-return
%cmp = icmp slt i32 %x, 0
@@ -38,7 +42,8 @@ define i32 @test_ctselect_smax_zero(i32 %x) {
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 0
; W32-NEXT: i32.gt_s
-; W32-NEXT: i32.select
+; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_smax_zero:
@@ -49,7 +54,8 @@ define i32 @test_ctselect_smax_zero(i32 %x) {
; W64-NEXT: local.get 0
; W64-NEXT: i32.const 0
; W64-NEXT: i32.gt_s
-; W64-NEXT: i32.select
+; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
; W64-NEXT: # fallthrough-return
%cmp = icmp sgt i32 %x, 0
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
@@ -60,44 +66,34 @@ define i32 @test_ctselect_smax_zero(i32 %x) {
define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
; W32-LABEL: test_ctselect_smin_generic:
; W32: .functype test_ctselect_smin_generic (i32, i32) -> (i32)
-; W32-NEXT: .local i32
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.lt_s
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 2
-; W32-NEXT: local.get 0
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_smin_generic:
; W64: .functype test_ctselect_smin_generic (i32, i32) -> (i32)
-; W64-NEXT: .local i32
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.lt_s
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 2
-; W64-NEXT: local.get 0
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cmp = icmp slt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -108,44 +104,34 @@ define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
; W32-LABEL: test_ctselect_smax_generic:
; W32: .functype test_ctselect_smax_generic (i32, i32) -> (i32)
-; W32-NEXT: .local i32
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.gt_s
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 2
-; W32-NEXT: local.get 0
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_smax_generic:
; W64: .functype test_ctselect_smax_generic (i32, i32) -> (i32)
-; W64-NEXT: .local i32
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.gt_s
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 2
-; W64-NEXT: local.get 0
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cmp = icmp sgt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -156,44 +142,34 @@ define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
; W32-LABEL: test_ctselect_umin_generic:
; W32: .functype test_ctselect_umin_generic (i32, i32) -> (i32)
-; W32-NEXT: .local i32
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.lt_u
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 2
-; W32-NEXT: local.get 0
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_umin_generic:
; W64: .functype test_ctselect_umin_generic (i32, i32) -> (i32)
-; W64-NEXT: .local i32
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.lt_u
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 2
-; W64-NEXT: local.get 0
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cmp = icmp ult i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -204,44 +180,34 @@ define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
; W32-LABEL: test_ctselect_umax_generic:
; W32: .functype test_ctselect_umax_generic (i32, i32) -> (i32)
-; W32-NEXT: .local i32
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.gt_u
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 2
-; W32-NEXT: local.get 0
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_umax_generic:
; W64: .functype test_ctselect_umax_generic (i32, i32) -> (i32)
-; W64-NEXT: .local i32
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.gt_u
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 2
-; W64-NEXT: local.get 0
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cmp = icmp ugt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -252,42 +218,38 @@ define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
define i32 @test_ctselect_abs(i32 %x) {
; W32-LABEL: test_ctselect_abs:
; W32: .functype test_ctselect_abs (i32) -> (i32)
-; W32-NEXT: .local i32
; W32-NEXT: # %bb.0:
; W32-NEXT: local.get 0
-; W32-NEXT: i32.const 31
-; W32-NEXT: i32.shr_s
-; W32-NEXT: local.tee 1
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.sub
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 0
; W32-NEXT: i32.xor
+; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 0
+; W32-NEXT: i32.lt_s
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_abs:
; W64: .functype test_ctselect_abs (i32) -> (i32)
-; W64-NEXT: .local i32
; W64-NEXT: # %bb.0:
; W64-NEXT: local.get 0
-; W64-NEXT: i32.const 31
-; W64-NEXT: i32.shr_s
-; W64-NEXT: local.tee 1
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i32.sub
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 0
; W64-NEXT: i32.xor
+; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 0
+; W64-NEXT: i32.lt_s
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%neg = sub i32 0, %x
%cmp = icmp slt i32 %x, 0
@@ -301,40 +263,40 @@ define i32 @test_ctselect_nabs(i32 %x) {
; W32: .functype test_ctselect_nabs (i32) -> (i32)
; W32-NEXT: .local i32
; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
-; W32-NEXT: i32.const 31
-; W32-NEXT: i32.shr_s
+; W32-NEXT: i32.sub
; W32-NEXT: local.tee 1
; W32-NEXT: local.get 0
-; W32-NEXT: i32.and
; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
+; W32-NEXT: i32.const 0
+; W32-NEXT: i32.lt_s
; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_nabs:
; W64: .functype test_ctselect_nabs (i32) -> (i32)
; W64-NEXT: .local i32
; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
-; W64-NEXT: i32.const 31
-; W64-NEXT: i32.shr_s
+; W64-NEXT: i32.sub
; W64-NEXT: local.tee 1
; W64-NEXT: local.get 0
-; W64-NEXT: i32.and
; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
+; W64-NEXT: i32.const 0
+; W64-NEXT: i32.lt_s
; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%neg = sub i32 0, %x
%cmp = icmp slt i32 %x, 0
@@ -347,17 +309,21 @@ define i32 @test_ctselect_sign_extend(i32 %x) {
; W32-LABEL: test_ctselect_sign_extend:
; W32: .functype test_ctselect_sign_extend (i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
-; W32-NEXT: i32.const 31
-; W32-NEXT: i32.shr_s
+; W32-NEXT: i32.const 0
+; W32-NEXT: i32.lt_s
+; W32-NEXT: i32.sub
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_sign_extend:
; W64: .functype test_ctselect_sign_extend (i32) -> (i32)
; W64-NEXT: # %bb.0:
+; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
-; W64-NEXT: i32.const 31
-; W64-NEXT: i32.shr_s
+; W64-NEXT: i32.const 0
+; W64-NEXT: i32.lt_s
+; W64-NEXT: i32.sub
; W64-NEXT: # fallthrough-return
%cmp = icmp slt i32 %x, 0
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0)
@@ -407,17 +373,13 @@ define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
; W32-LABEL: test_ctselect_constant_folding_false:
; W32: .functype test_ctselect_constant_folding_false (i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const 0
; W32-NEXT: local.get 1
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_constant_folding_false:
; W64: .functype test_ctselect_constant_folding_false (i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const 0
; W64-NEXT: local.get 1
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
ret i32 %result
@@ -428,39 +390,13 @@ define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
; W32-LABEL: test_ctselect_identical_operands:
; W32: .functype test_ctselect_identical_operands (i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const 0
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const 1
-; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
-; W32-NEXT: i32.sub
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_identical_operands:
; W64: .functype test_ctselect_identical_operands (i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const 0
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const 1
-; W64-NEXT: i32.and
-; W64-NEXT: local.tee 0
-; W64-NEXT: i32.sub
; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
ret i32 %result
@@ -471,41 +407,33 @@ define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
; W32-LABEL: test_ctselect_inverted_condition:
; W32: .functype test_ctselect_inverted_condition (i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 3
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
-; W32-NEXT: i32.ne
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 1
-; W32-NEXT: local.get 2
+; W32-NEXT: i32.eq
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_inverted_condition:
; W64: .functype test_ctselect_inverted_condition (i32, i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
-; W64-NEXT: i32.ne
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 1
-; W64-NEXT: local.get 2
+; W64-NEXT: i32.eq
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 3
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cmp = icmp eq i32 %x, %y
%not_cmp = xor i1 %cmp, true
@@ -518,91 +446,73 @@ define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c,
; W32-LABEL: test_ctselect_chain:
; W32: .functype test_ctselect_chain (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 6
+; W32-NEXT: local.get 5
+; W32-NEXT: local.get 4
+; W32-NEXT: local.get 3
+; W32-NEXT: local.get 4
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
-; W32-NEXT: local.get 2
+; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 2
; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 5
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 1
; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 6
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
-; W32-NEXT: local.get 0
+; W32-NEXT: local.get 2
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 4
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 5
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 6
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_chain:
; W64: .functype test_ctselect_chain (i32, i32, i32, i32, i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 6
+; W64-NEXT: local.get 5
+; W64-NEXT: local.get 4
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 4
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
-; W64-NEXT: local.get 2
+; W64-NEXT: local.get 0
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 2
; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 5
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 1
; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 6
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
-; W64-NEXT: local.get 0
+; W64-NEXT: local.get 2
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 0
; W64-NEXT: i32.sub
-; W64-NEXT: local.get 3
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 4
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 5
; W64-NEXT: i32.and
-; W64-NEXT: i32.or
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 6
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
%sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
@@ -616,9 +526,9 @@ define i64 @test_ctselect_i64_smin_zero(i64 %x) {
; W32: .functype test_ctselect_i64_smin_zero (i64) -> (i64)
; W32-NEXT: # %bb.0:
; W32-NEXT: local.get 0
+; W32-NEXT: local.get 0
; W32-NEXT: i64.const 63
; W32-NEXT: i64.shr_s
-; W32-NEXT: local.get 0
; W32-NEXT: i64.and
; W32-NEXT: # fallthrough-return
;
@@ -626,9 +536,9 @@ define i64 @test_ctselect_i64_smin_zero(i64 %x) {
; W64: .functype test_ctselect_i64_smin_zero (i64) -> (i64)
; W64-NEXT: # %bb.0:
; W64-NEXT: local.get 0
+; W64-NEXT: local.get 0
; W64-NEXT: i64.const 63
; W64-NEXT: i64.shr_s
-; W64-NEXT: local.get 0
; W64-NEXT: i64.and
; W64-NEXT: # fallthrough-return
%cmp = icmp slt i64 %x, 0
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
index 75e38e1856a03..a19e19b579d0f 100644
--- a/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback-vector.ll
@@ -6,40 +6,36 @@
define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
; WASM32-LABEL: test_ctselect_v4i32:
; WASM32: .functype test_ctselect_v4i32 (i32, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4i32:
; WASM64: .functype test_ctselect_v4i32 (i32, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %result
@@ -49,40 +45,36 @@ define <4 x i32> @test_ctselect_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
; WASM32-LABEL: test_ctselect_v8i16:
; WASM32: .functype test_ctselect_v8i16 (i32, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i16x8.splat
; WASM32-NEXT: i32.const 15
; WASM32-NEXT: i16x8.shl
; WASM32-NEXT: i32.const 15
; WASM32-NEXT: i16x8.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v8i16:
; WASM64: .functype test_ctselect_v8i16 (i32, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i16x8.splat
; WASM64-NEXT: i32.const 15
; WASM64-NEXT: i16x8.shl
; WASM64-NEXT: i32.const 15
; WASM64-NEXT: i16x8.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%result = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %result
@@ -92,40 +84,36 @@ define <8 x i16> @test_ctselect_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
; WASM32-LABEL: test_ctselect_v16i8:
; WASM32: .functype test_ctselect_v16i8 (i32, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i8x16.splat
; WASM32-NEXT: i32.const 7
; WASM32-NEXT: i8x16.shl
; WASM32-NEXT: i32.const 7
; WASM32-NEXT: i8x16.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v16i8:
; WASM64: .functype test_ctselect_v16i8 (i32, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i8x16.splat
; WASM64-NEXT: i32.const 7
; WASM64-NEXT: i8x16.shl
; WASM64-NEXT: i32.const 7
; WASM64-NEXT: i8x16.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%result = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %result
@@ -135,40 +123,36 @@ define <16 x i8> @test_ctselect_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
; WASM32-LABEL: test_ctselect_v2i64:
; WASM32: .functype test_ctselect_v2i64 (i32, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 63
; WASM32-NEXT: i64x2.shl
; WASM32-NEXT: i32.const 63
; WASM32-NEXT: i64x2.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v2i64:
; WASM64: .functype test_ctselect_v2i64 (i32, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 63
; WASM64-NEXT: i64x2.shl
; WASM64-NEXT: i32.const 63
; WASM64-NEXT: i64x2.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%result = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %result
@@ -178,40 +162,36 @@ define <2 x i64> @test_ctselect_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) {
; WASM32-LABEL: test_ctselect_v4f32:
; WASM32: .functype test_ctselect_v4f32 (i32, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4f32:
; WASM64: .functype test_ctselect_v4f32 (i32, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%result = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b)
ret <4 x float> %result
@@ -221,40 +201,36 @@ define <4 x float> @test_ctselect_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b
define <2 x double> @test_ctselect_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) {
; WASM32-LABEL: test_ctselect_v2f64:
; WASM32: .functype test_ctselect_v2f64 (i32, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 63
; WASM32-NEXT: i64x2.shl
; WASM32-NEXT: i32.const 63
; WASM32-NEXT: i64x2.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v2f64:
; WASM64: .functype test_ctselect_v2f64 (i32, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 63
; WASM64-NEXT: i64x2.shl
; WASM64-NEXT: i32.const 63
; WASM64-NEXT: i64x2.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%result = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b)
ret <2 x double> %result
@@ -266,42 +242,42 @@ define <4 x i32> @test_ctselect_v4i32_aligned_load(i1 %cond, ptr %p1, ptr %p2) {
; WASM32: .functype test_ctselect_v4i32_aligned_load (i32, i32, i32) -> (v128)
; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.load 0
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.load 0
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
-; WASM32-NEXT: v128.load 0
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: v128.load 0
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4i32_aligned_load:
; WASM64: .functype test_ctselect_v4i32_aligned_load (i32, i64, i64) -> (v128)
; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.load 0
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.load 0
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
-; WASM64-NEXT: v128.load 0
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: v128.load 0
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%a = load <4 x i32>, ptr %p1, align 16
%b = load <4 x i32>, ptr %p2, align 16
@@ -315,42 +291,42 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
; WASM32: .functype test_ctselect_v4i32_unaligned_load (i32, i32, i32) -> (v128)
; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.load 0:p2align=2
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: v128.load 0:p2align=2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
-; WASM32-NEXT: v128.load 0:p2align=2
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: v128.load 0:p2align=2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4i32_unaligned_load:
; WASM64: .functype test_ctselect_v4i32_unaligned_load (i32, i64, i64) -> (v128)
; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.load 0:p2align=2
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: v128.load 0:p2align=2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
-; WASM64-NEXT: v128.load 0:p2align=2
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: v128.load 0:p2align=2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%a = load <4 x i32>, ptr %p1, align 4
%b = load <4 x i32>, ptr %p2, align 4
@@ -362,43 +338,39 @@ define <4 x i32> @test_ctselect_v4i32_unaligned_load(i1 %cond, ptr %p1, ptr %p2)
define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr %out) {
; WASM32-LABEL: test_ctselect_v4i32_store:
; WASM32: .functype test_ctselect_v4i32_store (i32, v128, v128, i32) -> ()
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
; WASM32-NEXT: local.get 3
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 4
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 4
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: v128.store 0
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4i32_store:
; WASM64: .functype test_ctselect_v4i32_store (i32, v128, v128, i64) -> ()
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
; WASM64-NEXT: local.get 3
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 4
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 4
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: v128.store 0
; WASM64-NEXT: # fallthrough-return
%result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
@@ -410,64 +382,58 @@ define void @test_ctselect_v4i32_store(i1 %cond, <4 x i32> %a, <4 x i32> %b, ptr
define <4 x i32> @test_ctselect_v4i32_chain(i1 %cond1, i1 %cond2, <4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
; WASM32-LABEL: test_ctselect_v4i32_chain:
; WASM32: .functype test_ctselect_v4i32_chain (i32, i32, v128, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128, v128
; WASM32-NEXT: # %bb.0:
-; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 4
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: v128.xor
+; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 5
-; WASM32-NEXT: local.get 0
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: v128.xor
+; WASM32-NEXT: local.get 4
+; WASM32-NEXT: v128.xor
+; WASM32-NEXT: local.get 1
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 6
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: local.get 6
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 4
-; WASM32-NEXT: local.get 5
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4i32_chain:
; WASM64: .functype test_ctselect_v4i32_chain (i32, i32, v128, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128, v128
; WASM64-NEXT: # %bb.0:
-; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 4
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: v128.xor
+; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 5
-; WASM64-NEXT: local.get 0
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: v128.xor
+; WASM64-NEXT: local.get 4
+; WASM64-NEXT: v128.xor
+; WASM64-NEXT: local.get 1
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 6
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: local.get 6
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 4
-; WASM64-NEXT: local.get 5
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%tmp = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond1, <4 x i32> %a, <4 x i32> %b)
%result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond2, <4 x i32> %tmp, <4 x i32> %c)
@@ -480,46 +446,46 @@ define <4 x float> @test_ctselect_v4f32_arithmetic(i1 %cond, <4 x float> %x, <4
; WASM32: .functype test_ctselect_v4f32_arithmetic (i32, v128, v128) -> (v128)
; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: f32x4.sub
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: f32x4.add
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: f32x4.add
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 1
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: f32x4.sub
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4f32_arithmetic:
; WASM64: .functype test_ctselect_v4f32_arithmetic (i32, v128, v128) -> (v128)
; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: f32x4.sub
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: f32x4.add
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: f32x4.add
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 1
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: f32x4.sub
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%sum = fadd <4 x float> %x, %y
%diff = fsub <4 x float> %x, %y
@@ -532,26 +498,26 @@ define <4 x i32> @test_ctselect_v4i32_zeros(i1 %cond, <4 x i32> %a) {
; WASM32-LABEL: test_ctselect_v4i32_zeros:
; WASM32: .functype test_ctselect_v4i32_zeros (i32, v128) -> (v128)
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 1
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4i32_zeros:
; WASM64: .functype test_ctselect_v4i32_zeros (i32, v128) -> (v128)
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 1
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
; WASM64-NEXT: # fallthrough-return
%result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond,
@@ -564,40 +530,36 @@ define <4 x i32> @test_ctselect_v4i32_zeros(i1 %cond, <4 x i32> %a) {
define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b) nounwind {
; WASM32-LABEL: test_ctselect_v4i32_args:
; WASM32: .functype test_ctselect_v4i32_args (i32, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4i32_args:
; WASM64: .functype test_ctselect_v4i32_args (i32, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%result = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %result
@@ -607,45 +569,41 @@ define <4 x i32> @test_ctselect_v4i32_args(i1 %cond, <4 x i32> %a, <4 x i32> %b)
define <4 x i32> @test_ctselect_v4i32_multi_use(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
; WASM32-LABEL: test_ctselect_v4i32_multi_use:
; WASM32: .functype test_ctselect_v4i32_multi_use (i32, v128, v128) -> (v128)
-; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i32x4.splat
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shl
; WASM32-NEXT: i32.const 31
; WASM32-NEXT: i32x4.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: v128.and
+; WASM32-NEXT: v128.xor
+; WASM32-NEXT: local.tee 2
; WASM32-NEXT: local.get 2
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
-; WASM32-NEXT: local.tee 1
-; WASM32-NEXT: local.get 1
; WASM32-NEXT: i32x4.add
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v4i32_multi_use:
; WASM64: .functype test_ctselect_v4i32_multi_use (i32, v128, v128) -> (v128)
-; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i32x4.splat
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shl
; WASM64-NEXT: i32.const 31
; WASM64-NEXT: i32x4.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: v128.and
+; WASM64-NEXT: v128.xor
+; WASM64-NEXT: local.tee 2
; WASM64-NEXT: local.get 2
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
-; WASM64-NEXT: local.tee 1
-; WASM64-NEXT: local.get 1
; WASM64-NEXT: i32x4.add
; WASM64-NEXT: # fallthrough-return
%sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
@@ -659,46 +617,46 @@ define <16 x i8> @test_ctselect_v16i8_ops(i1 %cond, <16 x i8> %x, <16 x i8> %y)
; WASM32: .functype test_ctselect_v16i8_ops (i32, v128, v128) -> (v128)
; WASM32-NEXT: .local v128
; WASM32-NEXT: # %bb.0:
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.and
+; WASM32-NEXT: local.tee 3
+; WASM32-NEXT: local.get 3
+; WASM32-NEXT: local.get 1
+; WASM32-NEXT: local.get 2
+; WASM32-NEXT: v128.xor
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: local.get 0
; WASM32-NEXT: i8x16.splat
; WASM32-NEXT: i32.const 7
; WASM32-NEXT: i8x16.shl
; WASM32-NEXT: i32.const 7
; WASM32-NEXT: i8x16.shr_s
-; WASM32-NEXT: local.tee 3
-; WASM32-NEXT: local.get 1
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: v128.xor
; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 1
-; WASM32-NEXT: local.get 2
-; WASM32-NEXT: v128.and
-; WASM32-NEXT: local.get 3
-; WASM32-NEXT: v128.andnot
-; WASM32-NEXT: v128.or
+; WASM32-NEXT: v128.xor
; WASM32-NEXT: # fallthrough-return
;
; WASM64-LABEL: test_ctselect_v16i8_ops:
; WASM64: .functype test_ctselect_v16i8_ops (i32, v128, v128) -> (v128)
; WASM64-NEXT: .local v128
; WASM64-NEXT: # %bb.0:
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.and
+; WASM64-NEXT: local.tee 3
+; WASM64-NEXT: local.get 3
+; WASM64-NEXT: local.get 1
+; WASM64-NEXT: local.get 2
+; WASM64-NEXT: v128.xor
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: local.get 0
; WASM64-NEXT: i8x16.splat
; WASM64-NEXT: i32.const 7
; WASM64-NEXT: i8x16.shl
; WASM64-NEXT: i32.const 7
; WASM64-NEXT: i8x16.shr_s
-; WASM64-NEXT: local.tee 3
-; WASM64-NEXT: local.get 1
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: v128.xor
; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 1
-; WASM64-NEXT: local.get 2
-; WASM64-NEXT: v128.and
-; WASM64-NEXT: local.get 3
-; WASM64-NEXT: v128.andnot
-; WASM64-NEXT: v128.or
+; WASM64-NEXT: v128.xor
; WASM64-NEXT: # fallthrough-return
%xor = xor <16 x i8> %x, %y
%and = and <16 x i8> %x, %y
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll b/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
index bd318960536df..29c3dc6e6aa02 100644
--- a/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-fallback.ll
@@ -7,6 +7,7 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
; W32-LABEL: test_ctselect_i8:
; W32: .functype test_ctselect_i8 (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
; W32-NEXT: local.get 1
; W32-NEXT: local.get 2
; W32-NEXT: i32.xor
@@ -16,13 +17,13 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
; W32-NEXT: i32.and
; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 2
; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_i8:
; W64: .functype test_ctselect_i8 (i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
; W64-NEXT: local.get 1
; W64-NEXT: local.get 2
; W64-NEXT: i32.xor
@@ -32,7 +33,6 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
; W64-NEXT: i32.and
; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 2
; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
@@ -43,6 +43,7 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
; W32-LABEL: test_ctselect_i16:
; W32: .functype test_ctselect_i16 (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
; W32-NEXT: local.get 1
; W32-NEXT: local.get 2
; W32-NEXT: i32.xor
@@ -52,13 +53,13 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
; W32-NEXT: i32.and
; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 2
; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_i16:
; W64: .functype test_ctselect_i16 (i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
; W64-NEXT: local.get 1
; W64-NEXT: local.get 2
; W64-NEXT: i32.xor
@@ -68,7 +69,6 @@ define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
; W64-NEXT: i32.and
; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 2
; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
@@ -79,39 +79,33 @@ define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
; W32-LABEL: test_ctselect_i32:
; W32: .functype test_ctselect_i32 (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 1
; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_i32:
; W64: .functype test_ctselect_i32 (i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 0
; W64-NEXT: i32.sub
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 2
; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
ret i32 %result
@@ -120,44 +114,36 @@ define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
; W32-LABEL: test_ctselect_i64:
; W32: .functype test_ctselect_i64 (i32, i64, i64) -> (i64)
-; W32-NEXT: .local i64
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i64.xor
; W32-NEXT: i64.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i64.extend_i32_u
; W32-NEXT: i64.const 1
; W32-NEXT: i64.and
-; W32-NEXT: local.tee 3
; W32-NEXT: i64.sub
-; W32-NEXT: local.get 1
-; W32-NEXT: i64.and
-; W32-NEXT: local.get 3
-; W32-NEXT: i64.const -1
-; W32-NEXT: i64.add
-; W32-NEXT: local.get 2
; W32-NEXT: i64.and
-; W32-NEXT: i64.or
+; W32-NEXT: i64.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_i64:
; W64: .functype test_ctselect_i64 (i32, i64, i64) -> (i64)
-; W64-NEXT: .local i64
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.xor
; W64-NEXT: i64.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i64.extend_i32_u
; W64-NEXT: i64.const 1
; W64-NEXT: i64.and
-; W64-NEXT: local.tee 3
; W64-NEXT: i64.sub
-; W64-NEXT: local.get 1
; W64-NEXT: i64.and
-; W64-NEXT: local.get 3
-; W64-NEXT: i64.const -1
-; W64-NEXT: i64.add
-; W64-NEXT: local.get 2
-; W64-NEXT: i64.and
-; W64-NEXT: i64.or
+; W64-NEXT: i64.xor
; W64-NEXT: # fallthrough-return
%result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
ret i64 %result
@@ -167,41 +153,34 @@ define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
; W32-LABEL: test_ctselect_ptr:
; W32: .functype test_ctselect_ptr (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 2
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_ptr:
; W64: .functype test_ctselect_ptr (i32, i64, i64) -> (i64)
-; W64-NEXT: .local i64
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i64.xor
; W64-NEXT: i64.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i64.extend_i32_u
; W64-NEXT: i64.const 1
; W64-NEXT: i64.and
-; W64-NEXT: local.tee 3
; W64-NEXT: i64.sub
-; W64-NEXT: local.get 1
-; W64-NEXT: i64.and
-; W64-NEXT: local.get 3
-; W64-NEXT: i64.const -1
-; W64-NEXT: i64.add
-; W64-NEXT: local.get 2
; W64-NEXT: i64.and
-; W64-NEXT: i64.or
+; W64-NEXT: i64.xor
; W64-NEXT: # fallthrough-return
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
ret ptr %result
@@ -228,17 +207,13 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
; W32-LABEL: test_ctselect_const_false:
; W32: .functype test_ctselect_const_false (i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const 0
; W32-NEXT: local.get 1
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_const_false:
; W64: .functype test_ctselect_const_false (i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const 0
; W64-NEXT: local.get 1
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
ret i32 %result
@@ -249,41 +224,33 @@ define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
; W32-LABEL: test_ctselect_icmp_eq:
; W32: .functype test_ctselect_icmp_eq (i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 3
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.eq
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 1
-; W32-NEXT: local.get 2
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_icmp_eq:
; W64: .functype test_ctselect_icmp_eq (i32, i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.eq
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 1
-; W64-NEXT: local.get 2
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 3
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cond = icmp eq i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
@@ -294,41 +261,33 @@ define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
; W32-LABEL: test_ctselect_icmp_ne:
; W32: .functype test_ctselect_icmp_ne (i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 3
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.ne
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 1
-; W32-NEXT: local.get 2
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_icmp_ne:
; W64: .functype test_ctselect_icmp_ne (i32, i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.ne
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 1
-; W64-NEXT: local.get 2
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 3
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cond = icmp ne i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
@@ -339,41 +298,33 @@ define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
; W32-LABEL: test_ctselect_icmp_slt:
; W32: .functype test_ctselect_icmp_slt (i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 3
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.lt_s
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 1
-; W32-NEXT: local.get 2
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_icmp_slt:
; W64: .functype test_ctselect_icmp_slt (i32, i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.lt_s
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 1
-; W64-NEXT: local.get 2
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 3
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cond = icmp slt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
@@ -384,41 +335,33 @@ define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
; W32-LABEL: test_ctselect_icmp_ult:
; W32: .functype test_ctselect_icmp_ult (i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
-; W32-NEXT: i32.const -1
+; W32-NEXT: local.get 3
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: local.get 1
; W32-NEXT: i32.lt_u
-; W32-NEXT: i32.select
-; W32-NEXT: local.tee 1
-; W32-NEXT: local.get 2
+; W32-NEXT: i32.sub
; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
; W32-NEXT: i32.xor
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_icmp_ult:
; W64: .functype test_ctselect_icmp_ult (i32, i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
-; W64-NEXT: i32.const -1
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: local.get 1
; W64-NEXT: i32.lt_u
-; W64-NEXT: i32.select
-; W64-NEXT: local.tee 1
-; W64-NEXT: local.get 2
+; W64-NEXT: i32.sub
; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
; W64-NEXT: i32.xor
-; W64-NEXT: local.get 3
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
; W64-NEXT: # fallthrough-return
%cond = icmp ult i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
@@ -430,43 +373,40 @@ define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
; W32-LABEL: test_ctselect_load:
; W32: .functype test_ctselect_load (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.load 0
+; W32-NEXT: local.tee 2
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 1
+; W32-NEXT: i32.load 0
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.load 0
; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.load 0
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_load:
; W64: .functype test_ctselect_load (i32, i64, i64) -> (i32)
+; W64-NEXT: .local i32
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.load 0
+; W64-NEXT: local.tee 3
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 1
+; W64-NEXT: i32.load 0
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 0
; W64-NEXT: i32.sub
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.load 0
; W64-NEXT: i32.and
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.load 0
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%a = load i32, ptr %p1
%b = load i32, ptr %p2
@@ -479,65 +419,53 @@ define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
; W32-LABEL: test_ctselect_nested:
; W32: .functype test_ctselect_nested (i32, i32, i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 4
+; W32-NEXT: local.get 3
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 3
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
-; W32-NEXT: local.get 0
+; W32-NEXT: local.get 1
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
+; W32-NEXT: i32.and
+; W32-NEXT: i32.xor
+; W32-NEXT: local.get 4
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
-; W32-NEXT: local.get 1
+; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 1
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 2
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 3
-; W32-NEXT: i32.and
-; W32-NEXT: i32.or
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 4
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_ctselect_nested:
; W64: .functype test_ctselect_nested (i32, i32, i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 4
+; W64-NEXT: local.get 3
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 3
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
-; W64-NEXT: local.get 0
+; W64-NEXT: local.get 1
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 0
; W64-NEXT: i32.sub
+; W64-NEXT: i32.and
+; W64-NEXT: i32.xor
+; W64-NEXT: local.get 4
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
-; W64-NEXT: local.get 1
+; W64-NEXT: local.get 0
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 1
; W64-NEXT: i32.sub
-; W64-NEXT: local.get 2
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 3
-; W64-NEXT: i32.and
-; W64-NEXT: i32.or
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 4
; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
%result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
diff --git a/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll b/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll
index 5b20e892c64d2..a4d9f4cd0bf15 100644
--- a/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll
+++ b/llvm/test/CodeGen/WebAssembly/ctselect-side-effects.ll
@@ -43,39 +43,33 @@ define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
; W32-LABEL: test_protected_no_branch:
; W32: .functype test_protected_no_branch (i32, i32, i32) -> (i32)
; W32-NEXT: # %bb.0:
+; W32-NEXT: local.get 2
+; W32-NEXT: local.get 1
+; W32-NEXT: local.get 2
+; W32-NEXT: i32.xor
; W32-NEXT: i32.const 0
; W32-NEXT: local.get 0
; W32-NEXT: i32.const 1
; W32-NEXT: i32.and
-; W32-NEXT: local.tee 0
; W32-NEXT: i32.sub
-; W32-NEXT: local.get 1
-; W32-NEXT: i32.and
-; W32-NEXT: local.get 0
-; W32-NEXT: i32.const -1
-; W32-NEXT: i32.add
-; W32-NEXT: local.get 2
; W32-NEXT: i32.and
-; W32-NEXT: i32.or
+; W32-NEXT: i32.xor
; W32-NEXT: # fallthrough-return
;
; W64-LABEL: test_protected_no_branch:
; W64: .functype test_protected_no_branch (i32, i32, i32) -> (i32)
; W64-NEXT: # %bb.0:
+; W64-NEXT: local.get 2
+; W64-NEXT: local.get 1
+; W64-NEXT: local.get 2
+; W64-NEXT: i32.xor
; W64-NEXT: i32.const 0
; W64-NEXT: local.get 0
; W64-NEXT: i32.const 1
; W64-NEXT: i32.and
-; W64-NEXT: local.tee 0
; W64-NEXT: i32.sub
-; W64-NEXT: local.get 1
-; W64-NEXT: i32.and
-; W64-NEXT: local.get 0
-; W64-NEXT: i32.const -1
-; W64-NEXT: i32.add
-; W64-NEXT: local.get 2
; W64-NEXT: i32.and
-; W64-NEXT: i32.or
+; W64-NEXT: i32.xor
; W64-NEXT: # fallthrough-return
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
ret i32 %result
More information about the llvm-branch-commits
mailing list