[llvm-branch-commits] [clang] [llvm] [ConstantTime][RISCV] Add comprehensive tests for ct.select (PR #166708)
Julius Alexandre via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Mar 7 13:14:41 PST 2026
https://github.com/wizardengineer updated https://github.com/llvm/llvm-project/pull/166708
>From cea17086440ee6deb04d8b2461812dc86423434b Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Sat, 7 Mar 2026 15:36:09 -0500
Subject: [PATCH 1/4] [ConstantTime] Fix CT_SELECT expansion to preserve
constant-time guarantees
Create CT_SELECT nodes for scalar types regardless of target support, so
they survive DAGCombiner (visitCT_SELECT is conservative). Expand to
AND/OR/XOR during operation legalization after SETCC is lowered, preventing
the sext(setcc)->select fold chain that converts constant-time patterns
into data-dependent conditional moves (e.g. movn/movz on MIPS).
The mask uses SUB(0, AND(Cond, 1)) instead of SIGN_EXTEND because type
legalization already promoted i1 to the SetCC result type, making
SIGN_EXTEND a no-op for same-width types.
---
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 20 +-
.../SelectionDAG/SelectionDAGBuilder.cpp | 16 +-
llvm/test/CodeGen/RISCV/ctselect-fallback.ll | 22 +-
llvm/test/CodeGen/X86/ctselect.ll | 259 ++++++++++--------
4 files changed, 179 insertions(+), 138 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 81f992678626c..74446c8742e62 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4369,14 +4369,18 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Node->getFlags()));
} else {
assert(VT.isInteger());
- EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
- auto [Tmp2Lo, Tmp2Hi] = DAG.SplitScalar(Tmp2, dl, HalfVT, HalfVT);
- auto [Tmp3Lo, Tmp3Hi] = DAG.SplitScalar(Tmp3, dl, HalfVT, HalfVT);
- SDValue ResLo =
- DAG.getCTSelect(dl, HalfVT, Tmp1, Tmp2Lo, Tmp3Lo, Node->getFlags());
- SDValue ResHi =
- DAG.getCTSelect(dl, HalfVT, Tmp1, Tmp2Hi, Tmp3Hi, Node->getFlags());
- Tmp1 = DAG.getNode(ISD::BUILD_PAIR, dl, VT, ResLo, ResHi);
+ // Expand: Result = F ^ ((T ^ F) & Mask), Mask = 0 - (Cond & 1).
+ // SUB+AND creates the mask because i1 is already type-promoted;
+ // SIGN_EXTEND(i32, i32) would be a no-op leaving mask as 0/1.
+ SDValue Cond = Tmp1;
+ if (Cond.getValueType() != VT)
+ Cond = DAG.getNode(ISD::ANY_EXTEND, dl, VT, Cond);
+ SDValue Mask = DAG.getNode(
+ ISD::SUB, dl, VT, DAG.getConstant(0, dl, VT),
+ DAG.getNode(ISD::AND, dl, VT, Cond, DAG.getConstant(1, dl, VT)));
+ SDValue Diff = DAG.getNode(ISD::XOR, dl, VT, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(ISD::XOR, dl, VT, Tmp3,
+ DAG.getNode(ISD::AND, dl, VT, Diff, Mask));
Tmp1->setFlags(Node->getFlags());
}
Results.push_back(Tmp1);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 156d82e96b2ae..d9ceacc7c98f6 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6872,9 +6872,19 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
// assert if Cond type is Vector
assert(!CondVT.isVector() && "Vector type cond not supported yet");
- // Handle scalar types
- if (TLI.isOperationLegalOrCustom(ISD::CT_SELECT, VT) &&
- !CondVT.isVector()) {
+ // Create a CT_SELECT node for scalar types so it survives DAGCombiner
+ // (visitCT_SELECT is conservative) and expands to AND/OR/XOR during
+ // operation legalization, after SETCC is lowered. Unsupported vectors
+ // and floats with illegal integer equivalents (e.g. f64 on i386) use
+ // the inline fallback which runs before type legalization.
+ bool CreateNode =
+ TLI.isOperationLegalOrCustom(ISD::CT_SELECT, VT) ||
+ (!VT.isVector() &&
+ (!VT.isFloatingPoint() ||
+ TLI.isTypeLegal(
+ EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()))));
+
+ if (CreateNode) {
SDValue Result = DAG.getNode(ISD::CT_SELECT, DL, VT, Cond, A, B);
setValue(&I, Result);
return;
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback.ll
index d4617c7e75da7..ee8072703ee31 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-fallback.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback.ll
@@ -101,8 +101,6 @@ define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
;
; RV32-LABEL: test_ctselect_const_true:
; RV32: # %bb.0:
-; RV32-NEXT: xor a0, a0, a1
-; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
ret i32 %result
@@ -208,7 +206,7 @@ define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
define i32 @test_ctselect_nested_and_i1_to_i32(i1 %c0, i1 %c1, i32 %x, i32 %y) {
; RV64-LABEL: test_ctselect_nested_and_i1_to_i32:
; RV64: # %bb.0:
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: xor a2, a2, a3
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: srai a0, a0, 63
@@ -218,7 +216,7 @@ define i32 @test_ctselect_nested_and_i1_to_i32(i1 %c0, i1 %c1, i32 %x, i32 %y) {
;
; RV32-LABEL: test_ctselect_nested_and_i1_to_i32:
; RV32: # %bb.0:
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: xor a2, a2, a3
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai a0, a0, 31
@@ -265,8 +263,8 @@ define i32 @test_ctselect_nested_or_i1_to_i32(i1 %c0, i1 %c1, i32 %x, i32 %y) {
define i32 @test_ctselect_double_nested_and_i1(i1 %c0, i1 %c1, i1 %c2, i32 %x, i32 %y) {
; RV64-LABEL: test_ctselect_double_nested_and_i1:
; RV64: # %bb.0:
-; RV64-NEXT: and a1, a2, a1
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: xor a3, a3, a4
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: srai a0, a0, 63
@@ -276,8 +274,8 @@ define i32 @test_ctselect_double_nested_and_i1(i1 %c0, i1 %c1, i1 %c2, i32 %x, i
;
; RV32-LABEL: test_ctselect_double_nested_and_i1:
; RV32: # %bb.0:
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: and a0, a0, a2
; RV32-NEXT: xor a3, a3, a4
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai a0, a0, 31
@@ -295,7 +293,7 @@ define i32 @test_ctselect_double_nested_and_i1(i1 %c0, i1 %c1, i1 %c2, i32 %x, i
define i32 @test_ctselect_double_nested_mixed_i1(i1 %c0, i1 %c1, i1 %c2, i32 %x, i32 %y, i32 %z) {
; RV64-LABEL: test_ctselect_double_nested_mixed_i1:
; RV64: # %bb.0:
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: xor a3, a3, a4
; RV64-NEXT: or a0, a0, a2
; RV64-NEXT: slli a0, a0, 63
@@ -309,7 +307,7 @@ define i32 @test_ctselect_double_nested_mixed_i1(i1 %c0, i1 %c1, i1 %c2, i32 %x,
;
; RV32-LABEL: test_ctselect_double_nested_mixed_i1:
; RV32: # %bb.0:
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: xor a3, a3, a4
; RV32-NEXT: or a0, a0, a2
; RV32-NEXT: slli a0, a0, 31
@@ -382,7 +380,7 @@ define float @test_ctselect_f32_nan_inf(i1 %cond) {
; RV32-NEXT: srai a0, a0, 31
; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: lui a1, 522240
-; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: or a0, a0, a1
; RV32-NEXT: ret
%result = call float @llvm.ct.select.f32(i1 %cond, float 0x7FF8000000000000, float 0x7FF0000000000000)
ret float %result
@@ -398,7 +396,7 @@ define double @test_ctselect_f64_nan_inf(i1 %cond) {
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: li a1, 2047
; RV64-NEXT: slli a1, a1, 52
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: or a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_f64_nan_inf:
diff --git a/llvm/test/CodeGen/X86/ctselect.ll b/llvm/test/CodeGen/X86/ctselect.ll
index bf65e04721df1..e1abae80cef4f 100644
--- a/llvm/test/CodeGen/X86/ctselect.ll
+++ b/llvm/test/CodeGen/X86/ctselect.ll
@@ -9,8 +9,8 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
; X64-LABEL: test_ctselect_i8:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: xorl %edx, %esi
; X64-NEXT: andb $1, %al
+; X64-NEXT: xorl %edx, %esi
; X64-NEXT: negb %al
; X64-NEXT: andb %sil, %al
; X64-NEXT: xorb %dl, %al
@@ -20,10 +20,10 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
; X32-LABEL: test_ctselect_i8:
; X32: # %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andb $1, %al
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorb %cl, %dl
-; X32-NEXT: andb $1, %al
; X32-NEXT: negb %al
; X32-NEXT: andb %dl, %al
; X32-NEXT: xorb %cl, %al
@@ -32,10 +32,10 @@ define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
; X32-NOCMOV-LABEL: test_ctselect_i8:
; X32-NOCMOV: # %bb.0:
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %edx
; X32-NOCMOV-NEXT: xorb %cl, %dl
-; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: negb %al
; X32-NOCMOV-NEXT: andb %dl, %al
; X32-NOCMOV-NEXT: xorb %cl, %al
@@ -58,10 +58,11 @@ define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
; X32-LABEL: test_ctselect_i32:
; X32: # %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andb $1, %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl %ecx, %edx
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
; X32-NEXT: andl %edx, %eax
; X32-NEXT: xorl %ecx, %eax
@@ -70,10 +71,11 @@ define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
; X32-NOCMOV-LABEL: test_ctselect_i32:
; X32-NOCMOV: # %bb.0:
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
; X32-NOCMOV-NEXT: andl %edx, %eax
; X32-NOCMOV-NEXT: xorl %ecx, %eax
@@ -95,45 +97,57 @@ define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
;
; X32-LABEL: test_ctselect_i64:
; X32: # %bb.0:
-; X32-NEXT: pushl %esi
+; X32-NEXT: pushl %edi
; X32-NEXT: .cfi_def_cfa_offset 8
-; X32-NEXT: .cfi_offset %esi, -8
-; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 12
+; X32-NEXT: .cfi_offset %esi, -12
+; X32-NEXT: .cfi_offset %edi, -8
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: andb $1, %dl
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: xorl %edx, %eax
-; X32-NEXT: andl $1, %esi
-; X32-NEXT: negl %esi
-; X32-NEXT: andl %esi, %eax
-; X32-NEXT: xorl %edx, %eax
+; X32-NEXT: xorl %esi, %eax
+; X32-NEXT: movzbl %dl, %edi
+; X32-NEXT: negl %edi
+; X32-NEXT: andl %edi, %eax
+; X32-NEXT: xorl %esi, %eax
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl %ecx, %edx
-; X32-NEXT: andl %esi, %edx
+; X32-NEXT: andl %edi, %edx
; X32-NEXT: xorl %ecx, %edx
; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %edi
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl
;
; X32-NOCMOV-LABEL: test_ctselect_i64:
; X32-NOCMOV: # %bb.0:
-; X32-NOCMOV-NEXT: pushl %esi
+; X32-NOCMOV-NEXT: pushl %edi
; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
-; X32-NOCMOV-NEXT: .cfi_offset %esi, -8
-; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %esi
-; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: pushl %esi
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 12
+; X32-NOCMOV-NEXT: .cfi_offset %esi, -12
+; X32-NOCMOV-NEXT: .cfi_offset %edi, -8
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: andb $1, %dl
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %esi
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOCMOV-NEXT: xorl %edx, %eax
-; X32-NOCMOV-NEXT: andl $1, %esi
-; X32-NOCMOV-NEXT: negl %esi
-; X32-NOCMOV-NEXT: andl %esi, %eax
-; X32-NOCMOV-NEXT: xorl %edx, %eax
+; X32-NOCMOV-NEXT: xorl %esi, %eax
+; X32-NOCMOV-NEXT: movzbl %dl, %edi
+; X32-NOCMOV-NEXT: negl %edi
+; X32-NOCMOV-NEXT: andl %edi, %eax
+; X32-NOCMOV-NEXT: xorl %esi, %eax
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: andl %esi, %edx
+; X32-NOCMOV-NEXT: andl %edi, %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
; X32-NOCMOV-NEXT: popl %esi
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: popl %edi
; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4
; X32-NOCMOV-NEXT: retl
%result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
@@ -155,37 +169,47 @@ define float @test_ctselect_f32(i1 %cond, float %a, float %b) {
;
; X32-LABEL: test_ctselect_f32:
; X32: # %bb.0:
-; X32-NEXT: pushl %eax
-; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: fstps {{[0-9]+}}(%esp)
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: fstps (%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andb $1, %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: xorl %ecx, %edx
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
-; X32-NEXT: andl %edx, %eax
-; X32-NEXT: xorl %ecx, %eax
-; X32-NEXT: movl %eax, (%esp)
-; X32-NEXT: flds (%esp)
-; X32-NEXT: popl %eax
+; X32-NEXT: movl (%esp), %edx
+; X32-NEXT: xorl %ecx, %edx
+; X32-NEXT: andl %eax, %edx
+; X32-NEXT: xorl %ecx, %edx
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: addl $12, %esp
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl
;
; X32-NOCMOV-LABEL: test_ctselect_f32:
; X32-NOCMOV: # %bb.0:
-; X32-NOCMOV-NEXT: pushl %eax
-; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: subl $12, %esp
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 16
+; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: fstps {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: fstps (%esp)
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
-; X32-NOCMOV-NEXT: andl %edx, %eax
-; X32-NOCMOV-NEXT: xorl %ecx, %eax
-; X32-NOCMOV-NEXT: movl %eax, (%esp)
-; X32-NOCMOV-NEXT: flds (%esp)
-; X32-NOCMOV-NEXT: popl %eax
+; X32-NOCMOV-NEXT: movl (%esp), %edx
+; X32-NOCMOV-NEXT: xorl %ecx, %edx
+; X32-NOCMOV-NEXT: andl %eax, %edx
+; X32-NOCMOV-NEXT: xorl %ecx, %edx
+; X32-NOCMOV-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: addl $12, %esp
; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4
; X32-NOCMOV-NEXT: retl
%result = call float @llvm.ct.select.f32(i1 %cond, float %a, float %b)
@@ -281,10 +305,11 @@ define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
; X32-LABEL: test_ctselect_ptr:
; X32: # %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andb $1, %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl %ecx, %edx
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
; X32-NEXT: andl %edx, %eax
; X32-NEXT: xorl %ecx, %eax
@@ -293,10 +318,11 @@ define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
; X32-NOCMOV-LABEL: test_ctselect_ptr:
; X32-NOCMOV: # %bb.0:
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
; X32-NOCMOV-NEXT: andl %edx, %eax
; X32-NOCMOV-NEXT: xorl %ecx, %eax
@@ -310,24 +336,16 @@ define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
; X64-LABEL: test_ctselect_const_true:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
-; X64-NEXT: xorl %esi, %eax
-; X64-NEXT: xorl %esi, %eax
; X64-NEXT: retq
;
; X32-LABEL: test_ctselect_const_true:
; X32: # %bb.0:
-; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: xorl %ecx, %eax
-; X32-NEXT: xorl %ecx, %eax
; X32-NEXT: retl
;
; X32-NOCMOV-LABEL: test_ctselect_const_true:
; X32-NOCMOV: # %bb.0:
-; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X32-NOCMOV-NEXT: xorl %ecx, %eax
-; X32-NOCMOV-NEXT: xorl %ecx, %eax
; X32-NOCMOV-NEXT: retl
%result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
ret i32 %result
@@ -341,14 +359,12 @@ define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
;
; X32-LABEL: test_ctselect_const_false:
; X32: # %bb.0:
-; X32-NEXT: xorl %eax, %eax
-; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: retl
;
; X32-NOCMOV-LABEL: test_ctselect_const_false:
; X32-NOCMOV: # %bb.0:
-; X32-NOCMOV-NEXT: xorl %eax, %eax
-; X32-NOCMOV-NEXT: xorl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NOCMOV-NEXT: retl
%result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
ret i32 %result
@@ -443,19 +459,20 @@ define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
define float @test_ctselect_fcmp_oeq(float %x, float %y, float %a, float %b) {
; X64-LABEL: test_ctselect_fcmp_oeq:
; X64: # %bb.0:
-; X64-NEXT: movd %xmm3, %eax
; X64-NEXT: cmpeqss %xmm1, %xmm0
-; X64-NEXT: pxor %xmm3, %xmm2
-; X64-NEXT: pand %xmm0, %xmm2
-; X64-NEXT: movd %xmm2, %ecx
-; X64-NEXT: xorl %eax, %ecx
-; X64-NEXT: movd %ecx, %xmm0
+; X64-NEXT: xorps %xmm3, %xmm2
+; X64-NEXT: andps %xmm2, %xmm0
+; X64-NEXT: xorps %xmm3, %xmm0
; X64-NEXT: retq
;
; X32-LABEL: test_ctselect_fcmp_oeq:
; X32: # %bb.0:
-; X32-NEXT: pushl %eax
-; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: subl $12, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: fstps {{[0-9]+}}(%esp)
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: fstps (%esp)
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: flds {{[0-9]+}}(%esp)
; X32-NEXT: flds {{[0-9]+}}(%esp)
@@ -466,20 +483,24 @@ define float @test_ctselect_fcmp_oeq(float %x, float %y, float %a, float %b) {
; X32-NEXT: andb %cl, %dl
; X32-NEXT: movzbl %dl, %ecx
; X32-NEXT: negl %ecx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl (%esp), %edx
; X32-NEXT: xorl %eax, %edx
; X32-NEXT: andl %ecx, %edx
; X32-NEXT: xorl %eax, %edx
-; X32-NEXT: movl %edx, (%esp)
-; X32-NEXT: flds (%esp)
-; X32-NEXT: popl %eax
+; X32-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: addl $12, %esp
; X32-NEXT: .cfi_def_cfa_offset 4
; X32-NEXT: retl
;
; X32-NOCMOV-LABEL: test_ctselect_fcmp_oeq:
; X32-NOCMOV: # %bb.0:
-; X32-NOCMOV-NEXT: pushl %eax
-; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: subl $12, %esp
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 16
+; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: fstps {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: fstps (%esp)
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
@@ -492,13 +513,13 @@ define float @test_ctselect_fcmp_oeq(float %x, float %y, float %a, float %b) {
; X32-NOCMOV-NEXT: andb %al, %dl
; X32-NOCMOV-NEXT: movzbl %dl, %eax
; X32-NOCMOV-NEXT: negl %eax
-; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: movl (%esp), %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
; X32-NOCMOV-NEXT: andl %eax, %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: movl %edx, (%esp)
-; X32-NOCMOV-NEXT: flds (%esp)
-; X32-NOCMOV-NEXT: popl %eax
+; X32-NOCMOV-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: addl $12, %esp
; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4
; X32-NOCMOV-NEXT: retl
%cond = fcmp oeq float %x, %y
@@ -522,12 +543,13 @@ define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
; X32-LABEL: test_ctselect_load:
; X32: # %bb.0:
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andb $1, %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: movl (%edx), %edx
; X32-NEXT: movl (%ecx), %ecx
; X32-NEXT: xorl %edx, %ecx
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
; X32-NEXT: andl %ecx, %eax
; X32-NEXT: xorl %edx, %eax
@@ -536,12 +558,13 @@ define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
; X32-NOCMOV-LABEL: test_ctselect_load:
; X32-NOCMOV: # %bb.0:
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NOCMOV-NEXT: movl (%edx), %edx
; X32-NOCMOV-NEXT: movl (%ecx), %ecx
; X32-NOCMOV-NEXT: xorl %edx, %ecx
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
; X32-NOCMOV-NEXT: andl %ecx, %eax
; X32-NOCMOV-NEXT: xorl %edx, %eax
@@ -578,17 +601,19 @@ define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
; X32-NEXT: .cfi_offset %esi, -12
; X32-NEXT: .cfi_offset %edi, -8
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andb $1, %al
+; X32-NEXT: movb {{[0-9]+}}(%esp), %ah
+; X32-NEXT: andb $1, %ah
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NEXT: xorl %edx, %edi
-; X32-NEXT: andl $1, %esi
-; X32-NEXT: negl %esi
-; X32-NEXT: andl %edi, %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: xorl %edx, %esi
+; X32-NEXT: movzbl %ah, %edi
+; X32-NEXT: negl %edi
+; X32-NEXT: andl %esi, %edi
; X32-NEXT: xorl %ecx, %edx
-; X32-NEXT: xorl %esi, %edx
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: xorl %edi, %edx
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
; X32-NEXT: andl %edx, %eax
; X32-NEXT: xorl %ecx, %eax
@@ -607,17 +632,19 @@ define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
; X32-NOCMOV-NEXT: .cfi_offset %esi, -12
; X32-NOCMOV-NEXT: .cfi_offset %edi, -8
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andb $1, %al
+; X32-NOCMOV-NEXT: movb {{[0-9]+}}(%esp), %ah
+; X32-NOCMOV-NEXT: andb $1, %ah
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %esi
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X32-NOCMOV-NEXT: xorl %edx, %edi
-; X32-NOCMOV-NEXT: andl $1, %esi
-; X32-NOCMOV-NEXT: negl %esi
-; X32-NOCMOV-NEXT: andl %edi, %esi
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NOCMOV-NEXT: xorl %edx, %esi
+; X32-NOCMOV-NEXT: movzbl %ah, %edi
+; X32-NOCMOV-NEXT: negl %edi
+; X32-NOCMOV-NEXT: andl %esi, %edi
; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: xorl %esi, %edx
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: xorl %edi, %edx
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
; X32-NOCMOV-NEXT: andl %edx, %eax
; X32-NOCMOV-NEXT: xorl %ecx, %eax
@@ -651,10 +678,10 @@ define i32 @test_ctselect_nested_and_i1_to_i32(i1 %c0, i1 %c1, i32 %x, i32 %y) {
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb {{[0-9]+}}(%esp), %al
-; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: andb $1, %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl %ecx, %edx
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
; X32-NEXT: andl %edx, %eax
; X32-NEXT: xorl %ecx, %eax
@@ -665,10 +692,10 @@ define i32 @test_ctselect_nested_and_i1_to_i32(i1 %c0, i1 %c1, i32 %x, i32 %y) {
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NOCMOV-NEXT: andb {{[0-9]+}}(%esp), %al
-; X32-NOCMOV-NEXT: movzbl %al, %eax
+; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
; X32-NOCMOV-NEXT: andl %edx, %eax
; X32-NOCMOV-NEXT: xorl %ecx, %eax
@@ -699,10 +726,10 @@ define i32 @test_ctselect_nested_or_i1_to_i32(i1 %c0, i1 %c1, i32 %x, i32 %y) {
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: orb {{[0-9]+}}(%esp), %al
-; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: andb $1, %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl %ecx, %edx
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
; X32-NEXT: andl %edx, %eax
; X32-NEXT: xorl %ecx, %eax
@@ -713,10 +740,10 @@ define i32 @test_ctselect_nested_or_i1_to_i32(i1 %c0, i1 %c1, i32 %x, i32 %y) {
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NOCMOV-NEXT: orb {{[0-9]+}}(%esp), %al
-; X32-NOCMOV-NEXT: movzbl %al, %eax
+; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
; X32-NOCMOV-NEXT: andl %edx, %eax
; X32-NOCMOV-NEXT: xorl %ecx, %eax
@@ -735,9 +762,9 @@ define i32 @test_ctselect_nested_or_i1_to_i32(i1 %c0, i1 %c1, i32 %x, i32 %y) {
define i32 @test_ctselect_double_nested_and_i1(i1 %c0, i1 %c1, i1 %c2, i32 %x, i32 %y) {
; X64-LABEL: test_ctselect_double_nested_and_i1:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: andl %esi, %eax
; X64-NEXT: andl %edx, %eax
-; X64-NEXT: andl %edi, %eax
; X64-NEXT: xorl %r8d, %ecx
; X64-NEXT: andl $1, %eax
; X64-NEXT: negl %eax
@@ -751,10 +778,10 @@ define i32 @test_ctselect_double_nested_and_i1(i1 %c0, i1 %c1, i1 %c2, i32 %x, i
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: andb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb {{[0-9]+}}(%esp), %al
-; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: andb $1, %al
; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NEXT: xorl %ecx, %edx
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
; X32-NEXT: andl %edx, %eax
; X32-NEXT: xorl %ecx, %eax
@@ -766,10 +793,10 @@ define i32 @test_ctselect_double_nested_and_i1(i1 %c0, i1 %c1, i1 %c2, i32 %x, i
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NOCMOV-NEXT: andb {{[0-9]+}}(%esp), %al
; X32-NOCMOV-NEXT: andb {{[0-9]+}}(%esp), %al
-; X32-NOCMOV-NEXT: movzbl %al, %eax
+; X32-NOCMOV-NEXT: andb $1, %al
; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
; X32-NOCMOV-NEXT: xorl %ecx, %edx
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
; X32-NOCMOV-NEXT: andl %edx, %eax
; X32-NOCMOV-NEXT: xorl %ecx, %eax
@@ -1403,7 +1430,7 @@ define float @test_ctselect_f32_nan_inf(i1 %cond) {
; X64-NEXT: andl $1, %edi
; X64-NEXT: negl %edi
; X64-NEXT: andl $4194304, %edi # imm = 0x400000
-; X64-NEXT: xorl $2139095040, %edi # imm = 0x7F800000
+; X64-NEXT: orl $2139095040, %edi # imm = 0x7F800000
; X64-NEXT: movd %edi, %xmm0
; X64-NEXT: retq
;
@@ -1412,10 +1439,11 @@ define float @test_ctselect_f32_nan_inf(i1 %cond) {
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: andb $1, %al
+; X32-NEXT: movzbl %al, %eax
; X32-NEXT: negl %eax
; X32-NEXT: andl $4194304, %eax # imm = 0x400000
-; X32-NEXT: xorl $2139095040, %eax # imm = 0x7F800000
+; X32-NEXT: orl $2139095040, %eax # imm = 0x7F800000
; X32-NEXT: movl %eax, (%esp)
; X32-NEXT: flds (%esp)
; X32-NEXT: popl %eax
@@ -1427,10 +1455,11 @@ define float @test_ctselect_f32_nan_inf(i1 %cond) {
; X32-NOCMOV-NEXT: pushl %eax
; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: andb $1, %al
+; X32-NOCMOV-NEXT: movzbl %al, %eax
; X32-NOCMOV-NEXT: negl %eax
; X32-NOCMOV-NEXT: andl $4194304, %eax # imm = 0x400000
-; X32-NOCMOV-NEXT: xorl $2139095040, %eax # imm = 0x7F800000
+; X32-NOCMOV-NEXT: orl $2139095040, %eax # imm = 0x7F800000
; X32-NOCMOV-NEXT: movl %eax, (%esp)
; X32-NOCMOV-NEXT: flds (%esp)
; X32-NOCMOV-NEXT: popl %eax
@@ -1449,7 +1478,7 @@ define double @test_ctselect_f64_nan_inf(i1 %cond) {
; X64-NEXT: movabsq $2251799813685248, %rax # imm = 0x8000000000000
; X64-NEXT: andq %rdi, %rax
; X64-NEXT: movabsq $9218868437227405312, %rcx # imm = 0x7FF0000000000000
-; X64-NEXT: xorq %rax, %rcx
+; X64-NEXT: orq %rax, %rcx
; X64-NEXT: movq %rcx, %xmm0
; X64-NEXT: retq
;
>From 362e0a6968ffe0f7a996a1527557b702ed5e3879 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 10:56:34 -0500
Subject: [PATCH 2/4] [ConstantTime][Clang] Add __builtin_ct_select for
constant-time selection
---
clang/docs/LanguageExtensions.rst | 44 ++
clang/include/clang/Basic/Builtins.td | 8 +
clang/lib/CodeGen/CGBuiltin.cpp | 13 +
clang/lib/Sema/SemaChecking.cpp | 64 ++
.../test/Sema/builtin-ct-select-edge-cases.c | 373 ++++++++++
clang/test/Sema/builtin-ct-select.c | 683 ++++++++++++++++++
6 files changed, 1185 insertions(+)
create mode 100644 clang/test/Sema/builtin-ct-select-edge-cases.c
create mode 100644 clang/test/Sema/builtin-ct-select.c
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 29328355c3e6f..f65d7377ae39b 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -7005,3 +7005,47 @@ Clang fails to reject some code that should be rejected. e.g.,
// own initializer rather than rejecting the code with an undeclared identifier
// diagnostic.
auto x = x;
+
+.. _langext-__builtin_ct_select:
+
+``__builtin_ct_select``
+-----------------------
+
+``__builtin_ct_select`` performs a constant-time conditional selection between
+two values. Unlike the ternary operator ``?:``, this builtin is designed to
+execute in constant time regardless of the condition value, making it suitable
+for cryptographic and security-sensitive code where timing side-channels must
+be avoided.
+
+**Syntax**:
+
+.. code-block:: c++
+
+ __builtin_ct_select(condition, true_value, false_value)
+
+**Examples**:
+
+.. code-block:: c++
+
+ // Select between two integers
+ int result = __builtin_ct_select(secret_bit, value_a, value_b);
+
+ // Select between two pointers
+ int *ptr = __builtin_ct_select(condition, ptr_a, ptr_b);
+
+ // Select between two floating-point values
+ double d = __builtin_ct_select(flag, 1.0, 2.0);
+
+**Description**:
+
+The first argument is an integer condition that is converted to a boolean
+(non-zero is true, zero is false). The second and third arguments must have
+the same scalar or vector type. The builtin returns the second argument if
+the condition is true, otherwise the third argument.
+
+The operation is guaranteed to be lowered to constant-time machine code that
+does not branch on the condition value, preventing timing-based side-channel
+attacks.
+
+Query for this feature with ``__has_builtin(__builtin_ct_select)``.
+
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index ed4ec10375e48..9501893714c25 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -5452,3 +5452,11 @@ def CountedByRef : Builtin {
let Attributes = [NoThrow, CustomTypeChecking];
let Prototype = "int(...)";
}
+
+// Constant-time select builtin
+def CtSelect : Builtin {
+ let Spellings = ["__builtin_ct_select"];
+ let Attributes = [NoThrow, Const, UnevaluatedArguments,
+ ConstIgnoringExceptions, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 4e1b22f0a2241..9af3cd658703f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6443,6 +6443,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
auto Str = CGM.GetAddrOfConstantCString(Name, "");
return RValue::get(Str.getPointer());
}
+ case Builtin::BI__builtin_ct_select: {
+ auto *Cond = EmitScalarExpr(E->getArg(0));
+ auto *A = EmitScalarExpr(E->getArg(1));
+ auto *B = EmitScalarExpr(E->getArg(2));
+
+ if (Cond->getType()->getIntegerBitWidth() != 1)
+ Cond = Builder.CreateICmpNE(
+ Cond, llvm::ConstantInt::get(Cond->getType(), 0), "cond.bool");
+
+ llvm::Function *Fn =
+ CGM.getIntrinsic(llvm::Intrinsic::ct_select, {A->getType()});
+ return RValue::get(Builder.CreateCall(Fn, {Cond, A, B}));
+ }
}
// If this is an alias for a lib function (e.g. __builtin_sin), emit
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 89171246d0bcb..cfa35191ff815 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3753,6 +3753,70 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
if (BuiltinCountedByRef(TheCall))
return ExprError();
break;
+
+ case Builtin::BI__builtin_ct_select: {
+ if (TheCall->getNumArgs() != 3) {
+ // Simple argument count check without complex diagnostics
+ if (TheCall->getNumArgs() < 3) {
+ return Diag(TheCall->getEndLoc(),
+ diag::err_typecheck_call_too_few_args_at_least)
+ << 0 << 3 << TheCall->getNumArgs() << 0
+ << TheCall->getCallee()->getSourceRange();
+ } else {
+ return Diag(TheCall->getEndLoc(),
+ diag::err_typecheck_call_too_many_args)
+ << 0 << 3 << TheCall->getNumArgs() << 0
+ << TheCall->getCallee()->getSourceRange();
+ }
+ }
+ auto *Cond = TheCall->getArg(0);
+ auto *A = TheCall->getArg(1);
+ auto *B = TheCall->getArg(2);
+
+ QualType CondTy = Cond->getType();
+ if (!CondTy->isIntegerType()) {
+ return Diag(Cond->getBeginLoc(), diag::err_typecheck_cond_expect_scalar)
+ << CondTy << Cond->getSourceRange();
+ }
+
+ ExprResult ARes = DefaultFunctionArrayLvalueConversion(A);
+ ExprResult BRes = DefaultFunctionArrayLvalueConversion(B);
+ if (ARes.isInvalid() || BRes.isInvalid())
+ return ExprError();
+
+ A = ARes.get();
+ B = BRes.get();
+ TheCall->setArg(1, A);
+ TheCall->setArg(2, B);
+
+ QualType ATy = A->getType();
+ QualType BTy = B->getType();
+
+ // check for scalar or vector scalar type
+ if ((!ATy->isScalarType() && !ATy->isVectorType()) ||
+ (!BTy->isScalarType() && !BTy->isVectorType())) {
+ return Diag(A->getBeginLoc(),
+ diag::err_typecheck_cond_incompatible_operands)
+ << ATy << BTy << A->getSourceRange() << B->getSourceRange();
+ }
+
+ // Check if both operands have the same type or can be implicitly converted
+ if (!Context.hasSameType(ATy, BTy)) {
+ // For non-arithmetic types, they must be exactly the same
+ return Diag(A->getBeginLoc(),
+ diag::err_typecheck_cond_incompatible_operands)
+ << ATy << BTy << A->getSourceRange() << B->getSourceRange();
+ }
+
+ QualType ResultTy = ATy;
+ ExprResult CondRes = PerformContextuallyConvertToBool(Cond);
+ if (CondRes.isInvalid())
+ return ExprError();
+
+ TheCall->setArg(0, CondRes.get());
+ TheCall->setType(ResultTy);
+ return TheCall;
+ }
}
if (getLangOpts().HLSL && HLSL().CheckBuiltinFunctionCall(BuiltinID, TheCall))
diff --git a/clang/test/Sema/builtin-ct-select-edge-cases.c b/clang/test/Sema/builtin-ct-select-edge-cases.c
new file mode 100644
index 0000000000000..167b19bf20663
--- /dev/null
+++ b/clang/test/Sema/builtin-ct-select-edge-cases.c
@@ -0,0 +1,373 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s -fexperimental-new-constant-interpreter
+
+// Test with various condition expressions
+int test_conditional_expressions(int x, int y, int a, int b) {
+ // Logical expressions
+ int result1 = __builtin_ct_select(x && y, a, b);
+ int result2 = __builtin_ct_select(x || y, a, b);
+ int result3 = __builtin_ct_select(!x, a, b);
+
+ // Comparison expressions
+ int result4 = __builtin_ct_select(x == y, a, b);
+ int result5 = __builtin_ct_select(x != y, a, b);
+ int result6 = __builtin_ct_select(x < y, a, b);
+ int result7 = __builtin_ct_select(x > y, a, b);
+ int result8 = __builtin_ct_select(x <= y, a, b);
+ int result9 = __builtin_ct_select(x >= y, a, b);
+
+ // Bitwise expressions
+ int result10 = __builtin_ct_select(x & y, a, b);
+ int result11 = __builtin_ct_select(x | y, a, b);
+ int result12 = __builtin_ct_select(x ^ y, a, b);
+ int result13 = __builtin_ct_select(~x, a, b);
+
+ // Arithmetic expressions
+ int result14 = __builtin_ct_select(x + y, a, b);
+ int result15 = __builtin_ct_select(x - y, a, b);
+ int result16 = __builtin_ct_select(x * y, a, b);
+ int result17 = __builtin_ct_select(x / y, a, b);
+ int result18 = __builtin_ct_select(x % y, a, b);
+
+ return result1 + result2 + result3 + result4 + result5 + result6 + result7 + result8 + result9 + result10 + result11 + result12 + result13 + result14 + result15 + result16 + result17 + result18;
+}
+
+// Test with extreme values
+int test_extreme_values(int cond) {
+ // Maximum and minimum values
+ int max_int = __builtin_ct_select(cond, __INT_MAX__, -__INT_MAX__ - 1);
+
+ // Very large numbers
+ long long max_ll = __builtin_ct_select(cond, __LONG_LONG_MAX__, -__LONG_LONG_MAX__ - 1);
+
+ // Floating point extremes
+ float max_float = __builtin_ct_select(cond, __FLT_MAX__, -__FLT_MAX__);
+ double max_double = __builtin_ct_select(cond, __DBL_MAX__, -__DBL_MAX__);
+
+ return max_int;
+}
+
+// Test with zero and negative zero
+int test_zero_values(int cond) {
+ // Integer zeros
+ int zero_int = __builtin_ct_select(cond, 0, -0);
+
+ // Floating point zeros
+ float zero_float = __builtin_ct_select(cond, 0.0f, -0.0f);
+ double zero_double = __builtin_ct_select(cond, 0.0, -0.0);
+
+ return zero_int;
+}
+
+// Test with infinity and NaN
+int test_special_float_values(int cond) {
+ // Infinity
+ float inf_float = __builtin_ct_select(cond, __builtin_inff(), -__builtin_inff());
+ double inf_double = __builtin_ct_select(cond, __builtin_inf(), -__builtin_inf());
+
+ // NaN
+ float nan_float = __builtin_ct_select(cond, __builtin_nanf(""), __builtin_nanf(""));
+ double nan_double = __builtin_ct_select(cond, __builtin_nan(""), __builtin_nan(""));
+
+ return 0;
+}
+
+// Test with complex pointer scenarios
+int test_pointer_edge_cases(int cond) {
+ int arr[10];
+ int *ptr1 = arr;
+ int *ptr2 = arr + 5;
+
+ // Array pointers
+ int *result1 = __builtin_ct_select(cond, ptr1, ptr2);
+
+ // Pointer arithmetic
+ int *result2 = __builtin_ct_select(cond, arr + 1, arr + 2);
+
+ // NULL vs non-NULL
+ int *result3 = __builtin_ct_select(cond, ptr1, (int*)0);
+
+ // Different pointer types (should fail)
+ float *fptr = (float*)0;
+ int *result4 = __builtin_ct_select(cond, ptr1, fptr); // expected-error {{incompatible operand types ('int *' and 'float *')}}
+
+ return *result1;
+}
+
+// Test with function pointers
+int func1(int x) { return x; }
+int func2(int x) { return x * 2; }
+float func3(float x) { return x; }
+
+int test_function_pointers(int cond, int x) {
+ // Same signature function pointer
+ int (*fptr)(int) = __builtin_ct_select(cond, &func1, &func2);
+
+ // Different signature function pointers (should fail)
+ int (*bad_fptr)(int) = __builtin_ct_select(cond, &func1, &func3); // expected-error {{incompatible operand types ('int (*)(int)' and 'float (*)(float)')}}
+
+ return fptr(x);
+}
+
+// Test with void pointers
+void *test_void_pointers(int cond, void *a, void *b) {
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test with const/volatile qualifiers
+int test_qualifiers(int cond) {
+ const int ca = 10;
+ const int cb = 20;
+ volatile int va = 30;
+ volatile int vb = 40;
+ const volatile int cva = 50;
+ const volatile int cvb = 60;
+
+ // const to const
+ const int result1 = __builtin_ct_select(cond, ca, cb);
+
+ // volatile to volatile
+ volatile int result2 = __builtin_ct_select(cond, va, vb);
+
+ // const volatile to const volatile
+ const volatile int result3 = __builtin_ct_select(cond, cva, cvb);
+
+ return result1 + result2 + result3;
+}
+
+// Test with arrays (should fail as they're not arithmetic or pointer)
+int test_arrays(int cond) {
+ int arr1[5] = {1, 2, 3, 4, 5};
+ int arr2[5] = {6, 7, 8, 9, 10};
+
+ // This should fail??
+ int *result = __builtin_ct_select(cond, arr1, arr2);
+
+ return result[0];
+}
+
+// Test with structures (should fail)
+struct Point {
+ int x, y;
+};
+
+struct Point test_structs(int cond) {
+ struct Point p1 = {1, 2};
+ struct Point p2 = {3, 4};
+
+ return __builtin_ct_select(cond, p1, p2); // expected-error {{incompatible operand types ('struct Point' and 'struct Point')}}
+}
+
+// Test with unions (should fail)
+union Data {
+ int i;
+ float f;
+};
+
+union Data test_unions(int cond) {
+ union Data d1 = {.i = 10};
+ union Data d2 = {.i = 20};
+
+ return __builtin_ct_select(cond, d1, d2); // expected-error {{incompatible operand types ('union Data' and 'union Data')}}
+}
+
+// Test with bit fields (should work as they're integers)
+struct BitField {
+ int a : 4;
+ int b : 4;
+};
+
+int test_bit_fields(int cond) {
+ struct BitField bf1 = {1, 2};
+ struct BitField bf2 = {3, 4};
+
+ // Individual bit fields should work
+ int result1 = __builtin_ct_select(cond, bf1.a, bf2.a);
+ int result2 = __builtin_ct_select(cond, bf1.b, bf2.b);
+
+ return result1 + result2;
+}
+
+// Test with designated initializers
+int test_designated_init(int cond) {
+ int arr1[3] = {[0] = 1, [1] = 2, [2] = 3};
+ int arr2[3] = {[0] = 4, [1] = 5, [2] = 6};
+
+ // Access specific elements
+ int result1 = __builtin_ct_select(cond, arr1[0], arr2[0]);
+ int result2 = __builtin_ct_select(cond, arr1[1], arr2[1]);
+
+ return result1 + result2;
+}
+
+// Test with complex expressions in arguments
+int complex_expr(int x) { return x * x; }
+
+int test_complex_arguments(int cond, int x, int y) {
+ // Function calls as arguments
+ int result1 = __builtin_ct_select(cond, complex_expr(x), complex_expr(y));
+
+ // Ternary operator as arguments
+ int result2 = __builtin_ct_select(cond, x > 0 ? x : -x, y > 0 ? y : -y);
+
+ // Compound literals
+ int result3 = __builtin_ct_select(cond, (int){x}, (int){y});
+
+ return result1 + result2 + result3;
+}
+
+// Test with preprocessor macros
+#define MACRO_A 42
+#define MACRO_B 24
+#define MACRO_COND(x) (x > 0)
+
+int test_macros(int x) {
+ int result1 = __builtin_ct_select(MACRO_COND(x), MACRO_A, MACRO_B);
+
+ // Nested macros
+ #define NESTED_SELECT(c, a, b) __builtin_ct_select(c, a, b)
+ int result2 = NESTED_SELECT(x, 10, 20);
+
+ return result1 + result2;
+}
+
+// Test with string literals (should fail)
+const char *test_strings(int cond) {
+ return __builtin_ct_select(cond, "hello", "world");
+}
+
+// Test with variable length arrays (VLA)
+int test_vla(int cond, int n) {
+ int vla1[n];
+ int vla2[n];
+
+ // Individual elements should work
+ vla1[0] = 1;
+ vla2[0] = 2;
+ int result = __builtin_ct_select(cond, vla1[0], vla2[0]);
+
+ return result;
+}
+
+// Test with typedef
+typedef int MyInt;
+typedef float MyFloat;
+
+MyInt test_typedef(int cond, MyInt a, MyInt b) {
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test with different typedef types (should fail)
+MyInt test_different_typedef(int cond, MyInt a, MyFloat b) {
+ return __builtin_ct_select(cond, a, b); // expected-error {{incompatible operand types ('MyInt' (aka 'int') and 'MyFloat' (aka 'float'))}}
+}
+
+// Test with side effects (should be evaluated)
+int side_effect_counter = 0;
+int side_effect_func(int x) {
+ side_effect_counter++;
+ return x;
+}
+
+int test_side_effects(int cond) {
+ // Both arguments should be evaluated
+ int result = __builtin_ct_select(cond, side_effect_func(10), side_effect_func(20));
+ return result;
+}
+
+// Test with goto labels (context where expressions are used)
+int test_goto_context(int cond, int a, int b) {
+ int result = __builtin_ct_select(cond, a, b);
+
+ if (result > 0) {
+ goto positive;
+ } else {
+ goto negative;
+ }
+
+positive:
+ return result;
+
+negative:
+ return -result;
+}
+
+// Test with switch statements
+int test_switch_context(int cond, int a, int b) {
+ int result = __builtin_ct_select(cond, a, b);
+
+ switch (result) {
+ case 0:
+ return 0;
+ case 1:
+ return 1;
+ default:
+ return -1;
+ }
+}
+
+// Test with loops
+int test_loop_context(int cond, int a, int b) {
+ int result = __builtin_ct_select(cond, a, b);
+ int sum = 0;
+
+ for (int i = 0; i < result; i++) {
+ sum += i;
+ }
+
+ return sum;
+}
+
+// Test with recursive functions
+int factorial(int n) {
+ if (n <= 1) return 1;
+ return n * factorial(n - 1);
+}
+
+int test_recursive(int cond, int n) {
+ int result = __builtin_ct_select(cond, n, n + 1);
+ return factorial(result);
+}
+
+// Test with inline functions
+static inline int inline_func(int x) {
+ return x * 2;
+}
+
+int test_inline(int cond, int a, int b) {
+ return __builtin_ct_select(cond, inline_func(a), inline_func(b));
+}
+
+// Test with static variables
+int test_static_vars(int cond) {
+ static int static_a = 10;
+ static int static_b = 20;
+
+ return __builtin_ct_select(cond, static_a, static_b);
+}
+
+// Test with extern variables
+extern int extern_a;
+extern int extern_b;
+
+int test_extern_vars(int cond) {
+ return __builtin_ct_select(cond, extern_a, extern_b);
+}
+
+// Test with register variables
+int test_register_vars(int cond) {
+ register int reg_a = 30;
+ register int reg_b = 40;
+
+ return __builtin_ct_select(cond, reg_a, reg_b);
+}
+
+// Test with thread-local variables (C11)
+#if __STDC_VERSION__ >= 201112L
+_Thread_local int tls_a = 50;
+_Thread_local int tls_b = 60;
+
+int test_tls_vars(int cond) {
+ return __builtin_ct_select(cond, tls_a, tls_b);
+}
+#endif
diff --git a/clang/test/Sema/builtin-ct-select.c b/clang/test/Sema/builtin-ct-select.c
new file mode 100644
index 0000000000000..36ef95cc473a5
--- /dev/null
+++ b/clang/test/Sema/builtin-ct-select.c
@@ -0,0 +1,683 @@
+// RUN: %clang_cc1 -emit-llvm -o - %s | FileCheck %s
+
+// Test integer types
+int test_int(int cond, int a, int b) {
+ // CHECK-LABEL: define {{.*}} @test_int
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+long long test_long(int cond, long long a, long long b) {
+ // CHECK-LABEL: define {{.*}} @test_long
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+ // CHECK: ret i64 [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+short test_short(int cond, short a, short b) {
+ // CHECK-LABEL: define {{.*}} @test_short
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call i16 @llvm.ct.select.i16(i1 [[COND]], i16 %{{.*}}, i16 %{{.*}})
+ // CHECK: ret i16 [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+unsigned char test_uchar(int cond, unsigned char a, unsigned char b) {
+ // CHECK-LABEL: define {{.*}} @test_uchar
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call i8 @llvm.ct.select.i8(i1 [[COND]], i8 %{{.*}}, i8 %{{.*}})
+ // CHECK: ret i8 [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+long long test_longlong(int cond, long long a, long long b) {
+ // CHECK-LABEL: define {{.*}} @test_longlong
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+ // CHECK: ret i64 [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test floating point types
+float test_float(int cond, float a, float b) {
+ // CHECK-LABEL: define {{.*}} @test_float
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+ // CHECK: ret float [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+double test_double(int cond, double a, double b) {
+ // CHECK-LABEL: define {{.*}} @test_double
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+ // CHECK: ret double [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test pointer types
+int *test_pointer(int cond, int *a, int *b) {
+ // CHECK-LABEL: define {{.*}} @test_pointer
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[COND]], ptr %{{.*}}, ptr %{{.*}})
+ // CHECK: ret ptr [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test with different condition types
+int test_char_cond(char cond, int a, int b) {
+ // CHECK-LABEL: define {{.*}} @test_char_cond
+ // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+int test_long_cond(long long cond, int a, int b) {
+ // CHECK-LABEL: define {{.*}} @test_long_cond
+ // CHECK: [[COND:%.*]] = icmp ne i64 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test with boolean condition
+int test_bool_cond(_Bool cond, int a, int b) {
+ // CHECK-LABEL: define {{.*}} @test_bool_cond
+ // CHECK: [[COND:%.*]] = trunc i8 %{{.*}} to i1
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test with constants
+int test_constant_cond(void) {
+ // CHECK-LABEL: define {{.*}} @test_constant_cond
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 true, i32 42, i32 24)
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(1, 42, 24);
+}
+
+int test_zero_cond(void) {
+ // CHECK-LABEL: define {{.*}} @test_zero_cond
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 false, i32 42, i32 24)
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(0, 42, 24);
+}
+
+// Test type promotion
+int test_promotion(int cond, short a, short b) {
+ // CHECK-LABEL: define {{.*}} @test_promotion
+ // CHECK-DAG: [[A_EXT:%.*]] = sext i16 %{{.*}} to i32
+ // CHECK-DAG: [[B_EXT:%.*]] = sext i16 %{{.*}} to i32
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 [[A_EXT]], i32 [[B_EXT]])
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(cond, (int)a, (int)b);
+}
+
+// Test mixed signedness
+unsigned int test_mixed_signedness(int cond, int a, unsigned int b) {
+ // CHECK-LABEL: define {{.*}} @test_mixed_signedness
+ // CHECK-DAG: [[A_EXT:%.*]] = sext i32 %{{.*}} to i64
+ // CHECK-DAG: [[B_EXT:%.*]] = zext i32 %{{.*}} to i64
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 [[A_EXT]], i64 [[B_EXT]])
+ // CHECK: [[RESULT_TRUNC:%.*]] = trunc i64 [[RESULT]] to i32
+ // CHECK: ret i32 [[RESULT_TRUNC]]
+ return __builtin_ct_select(cond, (long long)a, (long long)b);
+}
+
+// Test complex expression
+int test_complex_expr_alt(int x, int y) {
+ // CHECK-LABEL: define {{.*}} @test_complex_expr_alt
+ // CHECK-DAG: [[CMP:%.*]] = icmp sgt i32 %{{.*}}, 0
+ // CHECK-DAG: [[ADD:%.*]] = add nsw i32 %{{.*}}, %{{.*}}
+ // CHECK-DAG: [[SUB:%.*]] = sub nsw i32 %{{.*}}, %{{.*}}
+ // Separate the final sequence to ensure proper ordering
+ // CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 [[ADD]], i32 [[SUB]])
+ // CHECK-NEXT: ret i32 [[RESULT]]
+ return __builtin_ct_select(x > 0, x + y, x - y);
+}
+
+// Test nested calls
+int test_nested_structured(int cond1, int cond2, int a, int b, int c) {
+ // CHECK-LABEL: define {{.*}} @test_nested_structured
+ // Phase 1: Conditions (order doesn't matter)
+ // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+
+ // Phase 2: Inner select (must happen before outer)
+ // CHECK: [[INNER:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+
+ // Phase 3: Outer select (must use inner result)
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 [[INNER]], i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
+}
+
+// Test with function calls
+int helper(int x) { return x * 2; }
+int test_function_calls(int cond, int x, int y) {
+ // CHECK-LABEL: define {{.*}} @test_function_calls
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[CALL1:%.*]] = call i32 @helper(i32 noundef %{{.*}})
+ // CHECK-DAG: [[CALL2:%.*]] = call i32 @helper(i32 noundef %{{.*}})
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 [[CALL1]], i32 [[CALL2]])
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(cond, helper(x), helper(y));
+}
+
+// Test using ct_select as condition for another ct_select
+int test_intrinsic_condition(int cond1, int cond2, int a, int b, int c, int d) {
+ // CHECK-LABEL: define {{.*}} @test_intrinsic_condition
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[INNER_COND:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 [[INNER_COND]], 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(__builtin_ct_select(cond1, cond2, a), b, c);
+}
+
+// Test using comparison result of ct_select as condition
+int test_comparison_condition(int cond, int a, int b, int c, int d) {
+ // CHECK-LABEL: define {{.*}} @test_comparison_condition
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: [[CMP:%.*]] = icmp sgt i32 [[FIRST_SELECT]], %{{.*}}
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(__builtin_ct_select(cond, a, b) > c, d, a);
+}
+
+// Test using ct_select result in arithmetic as condition
+int test_arithmetic_condition(int cond, int a, int b, int c, int d) {
+ // CHECK-LABEL: define {{.*}} @test_arithmetic_condition
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: [[ADD:%.*]] = add nsw i32 [[FIRST_SELECT]], %{{.*}}
+ // CHECK: [[FINAL_COND:%.*]] = icmp ne i32 [[ADD]], 0
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(__builtin_ct_select(cond, a, b) + c, d, a);
+}
+
+// Test chained ct_select as conditions
+int test_chained_conditions(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e) {
+ // CHECK-LABEL: define {{.*}} @test_chained_conditions
+ // CHECK: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[FIRST:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[SECOND:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ int first_select = __builtin_ct_select(cond1, a, b);
+ int second_select = __builtin_ct_select(cond2, first_select, c);
+ return __builtin_ct_select(second_select, d, e);
+}
+
+// Test using ct_select with pointer condition
+//int test_pointer_condition(int *ptr1, int *ptr2, int a, int b, int c) {
+ // NO-CHECK-LABEL: define {{.*}} @test_pointer_condition
+ // NO-CHECK: [[PTR_COND:%.*]] = icmp ne ptr %{{.*}}, null
+ // NO-CHECK: [[PTR_SELECT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[PTR_COND]], ptr %{{.*}}, ptr %{{.*}})
+ // NO-CHECK: [[FINAL_COND:%.*]] = icmp ne ptr [[PTR_SELECT]], null
+ // NO-CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+ // NO-CHECK: ret i32 [[RESULT]]
+// return __builtin_ct_select(__builtin_ct_select(ptr1, ptr1, ptr2), a, b);
+//}
+
+
+// Test using ct_select result in logical operations as condition
+int test_logical_condition(int cond1, int cond2, int a, int b, int c, int d) {
+ // CHECK-LABEL: define {{.*}} @test_logical_condition
+ // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[FIRST_SELECT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK-DAG: [[SELECT_BOOL:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 %{{.*}}, i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(__builtin_ct_select(cond1, a, b) && cond2, c, d);
+}
+
+// Test multiple levels of ct_select as conditions
+int test_deep_condition_nesting(int cond1, int cond2, int cond3, int a, int b, int c, int d, int e, int f) {
+ // CHECK-LABEL: define {{.*}} @test_deep_condition_nesting
+ // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[INNER1:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND2]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK-DAG: [[INNER1_COND:%.*]] = icmp ne i32 [[INNER1]], 0
+ // CHECK-DAG: [[INNER2:%.*]] = call i32 @llvm.ct.select.i32(i1 [[INNER1_COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK-DAG: [[OUTER:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND1]], i32 [[INNER2]], i32 %{{.*}})
+ // CHECK-DAG: [[FINAL_COND:%.*]] = icmp ne i32 [[OUTER]], 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[FINAL_COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(__builtin_ct_select(cond1, __builtin_ct_select(__builtin_ct_select(cond2, a, b), c, d), e), f, a);
+}
+
+// Test ct_select with complex condition expressions
+int test_complex_condition_expr(int x, int y, int z, int a, int b) {
+ // CHECK-LABEL: define {{.*}} @test_complex_condition_expr
+ // CHECK: [[CMP1:%.*]] = icmp sgt i32 %{{.*}}, %{{.*}}
+ // CHECK: [[SELECT1:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP1]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: [[CMP2:%.*]] = icmp slt i32 [[SELECT1]], %{{.*}}
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[CMP2]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ return __builtin_ct_select(__builtin_ct_select(x > y, x, y) < z, a, b);
+}
+
+// Test vector types - 128-bit vectors
+typedef int __attribute__((vector_size(16))) int4;
+typedef float __attribute__((vector_size(16))) float4;
+typedef short __attribute__((vector_size(16))) short8;
+typedef char __attribute__((vector_size(16))) char16;
+
+int4 test_vector_int4(int cond, int4 a, int4 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_int4
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: ret <4 x i32> [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+float4 test_vector_float4(int cond, float4 a, float4 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_float4
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK: ret <4 x float> [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+short8 test_vector_short8(int cond, short8 a, short8 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_short8
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <8 x i16> @llvm.ct.select.v8i16(i1 [[COND]], <8 x i16> %{{.*}}, <8 x i16> %{{.*}})
+ // CHECK: ret <8 x i16> [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+char16 test_vector_char16(int cond, char16 a, char16 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_char16
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <16 x i8> @llvm.ct.select.v16i8(i1 [[COND]], <16 x i8> %{{.*}}, <16 x i8> %{{.*}})
+ // CHECK: ret <16 x i8> [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test 256-bit vectors
+typedef int __attribute__((vector_size(32))) int8;
+typedef float __attribute__((vector_size(32))) float8;
+typedef double __attribute__((vector_size(32))) double4;
+
+int8 test_vector_int8(int cond, int8 a, int8 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_int8
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call <8 x i32> @llvm.ct.select.v8i32(i1 [[COND]], <8 x i32> %{{.*}}, <8 x i32> %{{.*}})
+ return __builtin_ct_select(cond, a, b);
+}
+
+float8 test_vector_float8(int cond, float8 a, float8 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_float8
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call <8 x float> @llvm.ct.select.v8f32(i1 [[COND]], <8 x float> %{{.*}}, <8 x float> %{{.*}})
+ return __builtin_ct_select(cond, a, b);
+}
+
+double4 test_vector_double4(int cond, double4 a, double4 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_double4
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call <4 x double> @llvm.ct.select.v4f64(i1 [[COND]], <4 x double> %{{.*}}, <4 x double> %{{.*}})
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test 512-bit vectors
+typedef int __attribute__((vector_size(64))) int16;
+typedef float __attribute__((vector_size(64))) float16;
+
+int16 test_vector_int16(int cond, int16 a, int16 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_int16
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <16 x i32> @llvm.ct.select.v16i32(i1 [[COND]], <16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ return __builtin_ct_select(cond, a, b);
+}
+
+float16 test_vector_float16(int cond, float16 a, float16 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_float16
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <16 x float> @llvm.ct.select.v16f32(i1 [[COND]], <16 x float> %{{.*}}, <16 x float> %{{.*}})
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test vector operations with different condition types
+int4 test_vector_char_cond(char cond, int4 a, int4 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_char_cond
+ // CHECK: [[COND:%.*]] = icmp ne i8 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: ret <4 x i32> [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+float4 test_vector_long_cond(long long cond, float4 a, float4 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_long_cond
+ // CHECK: [[COND:%.*]] = icmp ne i64 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK: ret <4 x float> [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test vector constants
+int4 test_vector_constant_cond(void) {
+ // CHECK-LABEL: define {{.*}} @test_vector_constant_cond
+ // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 true, <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: ret <4 x i32> [[RESULT]]
+ int4 a = {1, 2, 3, 4};
+ int4 b = {5, 6, 7, 8};
+ return __builtin_ct_select(1, a, b);
+}
+
+float4 test_vector_zero_cond(void) {
+ // CHECK-LABEL: define {{.*}} @test_vector_zero_cond
+ // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 false, <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK: ret <4 x float> [[RESULT]]
+ float4 a = {1.0f, 2.0f, 3.0f, 4.0f};
+ float4 b = {5.0f, 6.0f, 7.0f, 8.0f};
+ return __builtin_ct_select(0, a, b);
+}
+
+// Test nested vector selections
+int4 test_vector_nested(int cond1, int cond2, int4 a, int4 b, int4 c) {
+ // CHECK-LABEL: define {{.*}} @test_vector_nested
+ // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[INNER:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND2]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND1]], <4 x i32> [[INNER]], <4 x i32> %{{.*}})
+ // CHECK: ret <4 x i32> [[RESULT]]
+ return __builtin_ct_select(cond1, __builtin_ct_select(cond2, a, b), c);
+}
+
+// Test vector selection with complex expressions
+float4 test_vector_complex_expr(int x, int y, float4 a, float4 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_complex_expr
+ // CHECK: [[CMP:%.*]] = icmp sgt i32 %{{.*}}, %{{.*}}
+ // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[CMP]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK: ret <4 x float> [[RESULT]]
+ return __builtin_ct_select(x > y, a, b);
+}
+
+// Test vector with different element sizes
+typedef long long __attribute__((vector_size(16))) long2;
+typedef double __attribute__((vector_size(16))) double2;
+
+long2 test_vector_long2(int cond, long2 a, long2 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_long2
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <2 x i64> @llvm.ct.select.v2i64(i1 [[COND]], <2 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ // CHECK: ret <2 x i64> [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+double2 test_vector_double2(int cond, double2 a, double2 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_double2
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <2 x double> @llvm.ct.select.v2f64(i1 [[COND]], <2 x double> %{{.*}}, <2 x double> %{{.*}})
+ // CHECK: ret <2 x double> [[RESULT]]
+ return __builtin_ct_select(cond, a, b);
+}
+
+// Test mixed vector operations
+int4 test_vector_from_scalar_condition(int4 vec_cond, int4 a, int4 b) {
+ // CHECK-LABEL: define {{.*}} @test_vector_from_scalar_condition
+ // Extract first element and use as condition
+ int scalar_cond = vec_cond[0];
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <4 x i32> @llvm.ct.select.v4i32(i1 [[COND]], <4 x i32> %{{.*}}, <4 x i32> %{{.*}})
+ // CHECK: ret <4 x i32> [[RESULT]]
+ return __builtin_ct_select(scalar_cond, a, b);
+}
+
+// Test vector chaining
+float4 test_vector_chaining(int cond1, int cond2, int cond3, float4 a, float4 b, float4 c, float4 d) {
+ // CHECK-LABEL: define {{.*}} @test_vector_chaining
+ // CHECK-DAG: [[COND1:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[COND2:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[COND3:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[FIRST:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND1]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK-DAG: [[SECOND:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND2]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK-DAG: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND3]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK: ret <4 x float> [[RESULT]]
+ float4 first = __builtin_ct_select(cond1, a, b);
+ float4 second = __builtin_ct_select(cond2, first, c);
+ return __builtin_ct_select(cond3, second, d);
+}
+
+// Test special floating point values - NaN
+float test_nan_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_nan_operands
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float 1.000000e+00)
+ // CHECK: ret float [[RESULT]]
+ float nan_val = __builtin_nanf("");
+ return __builtin_ct_select(cond, nan_val, 1.0f);
+}
+
+double test_nan_double_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_nan_double_operands
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double 2.000000e+00)
+ // CHECK: ret double [[RESULT]]
+ double nan_val = __builtin_nan("");
+ return __builtin_ct_select(cond, nan_val, 2.0);
+}
+
+// Test infinity values
+float test_infinity_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_infinity_operands
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+ // CHECK: ret float [[RESULT]]
+ float pos_inf = __builtin_inff();
+ float neg_inf = -__builtin_inff();
+ return __builtin_ct_select(cond, pos_inf, neg_inf);
+}
+
+double test_infinity_double_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_infinity_double_operands
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+ // CHECK: ret double [[RESULT]]
+ double pos_inf = __builtin_inf();
+ double neg_inf = -__builtin_inf();
+ return __builtin_ct_select(cond, pos_inf, neg_inf);
+}
+
+// Test subnormal/denormal values
+float test_subnormal_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_subnormal_operands
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call float @llvm.ct.select.f32(i1 [[COND]], float %{{.*}}, float %{{.*}})
+ // CHECK: ret float [[RESULT]]
+ // Very small subnormal values
+ float subnormal1 = 1e-40f;
+ float subnormal2 = 1e-45f;
+ return __builtin_ct_select(cond, subnormal1, subnormal2);
+}
+
+// Test integer overflow boundaries
+int test_integer_overflow_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_integer_overflow_operands
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ int max_int = __INT_MAX__;
+ int min_int = (-__INT_MAX__ - 1);
+ return __builtin_ct_select(cond, max_int, min_int);
+}
+
+long long test_longlong_overflow_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_longlong_overflow_operands
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i64 @llvm.ct.select.i64(i1 [[COND]], i64 %{{.*}}, i64 %{{.*}})
+ // CHECK: ret i64 [[RESULT]]
+ long long max_ll = __LONG_LONG_MAX__;
+ long long min_ll = (-__LONG_LONG_MAX__ - 1);
+ return __builtin_ct_select(cond, max_ll, min_ll);
+}
+
+// Test unsigned overflow boundaries
+unsigned int test_unsigned_overflow_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_unsigned_overflow_operands
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ unsigned int max_uint = 4294967295;
+ unsigned int min_uint = 0;
+ return __builtin_ct_select(cond, max_uint, min_uint);
+}
+
+// Test null pointer dereference avoidance
+int* test_null_pointer_operands(int cond, int* valid_ptr) {
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call ptr @llvm.ct.select.p0(i1 [[COND]], ptr %{{.*}}, ptr %{{.*}})
+ // CHECK: ret ptr [[RESULT]]
+ int* null_ptr = (int*)0;
+ return __builtin_ct_select(cond, null_ptr, valid_ptr);
+}
+
+// Test volatile operations
+volatile int global_volatile = 42;
+int test_volatile_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_volatile_operands
+ // CHECK-DAG: [[VOLATILE_LOAD:%.*]] = load volatile i32, ptr {{.*}}
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 100)
+ // CHECK: ret i32 [[RESULT]]
+ volatile int vol_val = global_volatile;
+ return __builtin_ct_select(cond, vol_val, 100);
+}
+
+// Test uninitialized variable behavior (should still work with ct_select)
+int test_uninitialized_operands(int cond, int initialized) {
+ // CHECK-LABEL: define {{.*}} @test_uninitialized_operands
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ int uninitialized; // Intentionally uninitialized
+ return __builtin_ct_select(cond, uninitialized, initialized);
+}
+
+// Test zero division avoidance patterns
+int test_division_by_zero_avoidance(int cond, int dividend, int divisor) {
+ // CHECK-LABEL: define {{.*}} @test_division_by_zero_avoidance
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[DIV_RESULT:%.*]] = sdiv i32 %{{.*}}, %{{.*}}
+ // CHECK-DAG: [[SAFE_DIVISOR:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 1)
+ // First get a safe divisor (never zero)
+ int safe_divisor = __builtin_ct_select(divisor != 0, divisor, 1);
+ // Then perform division with guaranteed non-zero divisor
+ return dividend / safe_divisor;
+}
+
+// Test array bounds checking patterns
+int test_array_bounds_protection(int cond, int index, int* array) {
+ // CHECK-LABEL: define {{.*}} @test_array_bounds_protection
+ // CHECK-DAG: [[SAFE_INDEX:%.*]] = call i32 @llvm.ct.select.i32(i1 {{.*}}, i32 %{{.*}}, i32 0)
+ // Use ct_select to ensure safe array indexing
+ int safe_index = __builtin_ct_select(index >= 0 && index < 10, index, 0);
+ return array[safe_index];
+}
+
+// Test bit manipulation edge cases
+unsigned int test_bit_manipulation_edge_cases(int cond, unsigned int value) {
+ // CHECK-LABEL: define {{.*}} @test_bit_manipulation_edge_cases
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[SHIFT_LEFT:%.*]] = shl i32 %{{.*}}, 31
+ // CHECK-DAG: [[SHIFT_RIGHT:%.*]] = lshr i32 %{{.*}}, 31
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ // Test extreme bit shifts that could cause undefined behavior
+ unsigned int left_shift = value << 31; // Could overflow
+ unsigned int right_shift = value >> 31; // Extract sign bit
+ return __builtin_ct_select(cond, left_shift, right_shift);
+}
+
+// Test signed integer wraparound
+int test_signed_wraparound(int cond, int a, int b) {
+ // CHECK-LABEL: define {{.*}} @test_signed_wraparound
+ // CHECK-DAG: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK-DAG: [[ADD:%.*]] = add nsw i32 %{{.*}}, %{{.*}}
+ // CHECK-DAG: [[SUB:%.*]] = sub nsw i32 %{{.*}}, %{{.*}}
+ // CHECK-DAG: [[RESULT:%.*]] = call i32 @llvm.ct.select.i32(i1 [[COND]], i32 %{{.*}}, i32 %{{.*}})
+ // CHECK: ret i32 [[RESULT]]
+ int sum = a + b; // Could overflow
+ int diff = a - b; // Could underflow
+ return __builtin_ct_select(cond, sum, diff);
+}
+
+// Test vector NaN handling
+float4 test_vector_nan_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_vector_nan_operands
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK: ret <4 x float> [[RESULT]]
+ float nan_val = __builtin_nanf("");
+ float4 nan_vec = {nan_val, nan_val, nan_val, nan_val};
+ float4 normal_vec = {1.0f, 2.0f, 3.0f, 4.0f};
+ return __builtin_ct_select(cond, nan_vec, normal_vec);
+}
+
+// Test vector infinity handling
+float4 test_vector_infinity_operands(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_vector_infinity_operands
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call <4 x float> @llvm.ct.select.v4f32(i1 [[COND]], <4 x float> %{{.*}}, <4 x float> %{{.*}})
+ // CHECK: ret <4 x float> [[RESULT]]
+ float pos_inf = __builtin_inff();
+ float neg_inf = -__builtin_inff();
+ float4 inf_vec = {pos_inf, neg_inf, pos_inf, neg_inf};
+ float4 zero_vec = {0.0f, 0.0f, 0.0f, 0.0f};
+ return __builtin_ct_select(cond, inf_vec, zero_vec);
+}
+
+// Test mixed special values
+double test_mixed_special_values(int cond) {
+ // CHECK-LABEL: define {{.*}} @test_mixed_special_values
+ // CHECK: [[COND:%.*]] = icmp ne i32 %{{.*}}, 0
+ // CHECK: [[RESULT:%.*]] = call double @llvm.ct.select.f64(i1 [[COND]], double %{{.*}}, double %{{.*}})
+ // CHECK: ret double [[RESULT]]
+ double nan_val = __builtin_nan("");
+ double inf_val = __builtin_inf();
+ return __builtin_ct_select(cond, nan_val, inf_val);
+}
+
+// Test constant-time memory access pattern
+int test_constant_time_memory_access(int secret_index, int* data_array) {
+ // CHECK-LABEL: define {{.*}} @test_constant_time_memory_access
+ // This pattern ensures constant-time memory access regardless of secret_index value
+ int result = 0;
+ // Use ct_select to accumulate values without revealing the secret index
+ for (int i = 0; i < 8; i++) {
+ int is_target = (i == secret_index);
+ int current_value = data_array[i];
+ int selected_value = __builtin_ct_select(is_target, current_value, 0);
+ result += selected_value;
+ }
+ return result;
+}
+
+// Test timing-attack resistant comparison
+int test_timing_resistant_comparison(const char* secret, const char* guess) {
+ // CHECK-LABEL: define {{.*}} @test_timing_resistant_comparison
+ // Constant-time string comparison using ct_select
+ int match = 1;
+ for (int i = 0; i < 32; i++) {
+ int chars_equal = (secret[i] == guess[i]);
+ int both_null = (secret[i] == 0) && (guess[i] == 0);
+ int still_matching = __builtin_ct_select(chars_equal || both_null, match, 0);
+ match = __builtin_ct_select(both_null, match, still_matching);
+ }
+ return match;
+}
>From a0631669a0d597ced647f26756a84b9e75f71091 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 11:01:00 -0500
Subject: [PATCH 3/4] [ConstantTime][RISCV] Add comprehensive tests for
ct.select
Add comprehensive test suite for RISC-V fallback implementation:
- Edge cases (zero conditions, large integers, sign extension)
- Pattern matching (nested selects, chains)
- Vector support with RVV extensions
- Side effects and memory operations
The basic fallback test is in the core infrastructure PR.
---
.../RISCV/ctselect-fallback-edge-cases.ll | 214 +++++
.../RISCV/ctselect-fallback-patterns.ll | 383 +++++++++
.../RISCV/ctselect-fallback-vector-rvv.ll | 804 ++++++++++++++++++
.../CodeGen/RISCV/ctselect-side-effects.ll | 176 ++++
4 files changed, 1577 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
create mode 100644 llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
create mode 100644 llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
create mode 100644 llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
new file mode 100644
index 0000000000000..af1be0c8f3ddc
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
@@ -0,0 +1,214 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -O3 | FileCheck %s --check-prefix=RV64
+; RUN: llc < %s -mtriple=riscv32 -O3 | FileCheck %s --check-prefix=RV32
+
+; Test with small integer types
+define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
+; RV64-LABEL: test_ctselect_i1:
+; RV64: # %bb.0:
+; RV64-NEXT: and a1, a0, a1
+; RV64-NEXT: xori a0, a0, 1
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_i1:
+; RV32: # %bb.0:
+; RV32-NEXT: and a1, a0, a1
+; RV32-NEXT: xori a0, a0, 1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
+ ret i1 %result
+}
+
+; Test with extremal values
+define i32 @test_ctselect_extremal_values(i1 %cond) {
+; RV64-LABEL: test_ctselect_extremal_values:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: lui a1, 524288
+; RV64-NEXT: subw a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_extremal_values:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: lui a1, 524288
+; RV32-NEXT: addi a2, a0, -1
+; RV32-NEXT: neg a0, a0
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
+ ret i32 %result
+}
+
+; Test with null pointers
+define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
+; RV64-LABEL: test_ctselect_null_ptr:
+; RV64: # %bb.0:
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_null_ptr:
+; RV32: # %bb.0:
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: ret
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
+ ret ptr %result
+}
+
+; Test with function pointers
+define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
+; RV64-LABEL: test_ctselect_function_ptr:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: neg a3, a0
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a1, a3, a1
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_function_ptr:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: neg a3, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a3, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
+ ret ptr %result
+}
+
+; Test with condition from icmp on pointers
+define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
+; RV64-LABEL: test_ctselect_ptr_cmp:
+; RV64: # %bb.0:
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: snez a0, a0
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a2, a0, a2
+; RV64-NEXT: not a0, a0
+; RV64-NEXT: and a0, a0, a3
+; RV64-NEXT: or a0, a2, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_ptr_cmp:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a2, a0, a2
+; RV32-NEXT: not a0, a0
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: ret
+ %cmp = icmp eq ptr %p1, %p2
+ %result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
+ ret ptr %result
+}
+
+; Test with struct pointer types
+%struct.pair = type { i32, i32 }
+
+define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
+; RV64-LABEL: test_ctselect_struct_ptr:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: neg a3, a0
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a1, a3, a1
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_struct_ptr:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: neg a3, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a3, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+ ret ptr %result
+}
+
+; Test with deeply nested conditions
+define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
+; RV64-LABEL: test_ctselect_deeply_nested:
+; RV64: # %bb.0:
+; RV64-NEXT: lw t0, 0(sp)
+; RV64-NEXT: xor a4, a4, a5
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: xor a5, a5, a6
+; RV64-NEXT: slli a1, a1, 63
+; RV64-NEXT: xor a6, a6, a7
+; RV64-NEXT: slli a2, a2, 63
+; RV64-NEXT: slli a3, a3, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: srai a1, a1, 63
+; RV64-NEXT: srai a2, a2, 63
+; RV64-NEXT: and a0, a4, a0
+; RV64-NEXT: xor a0, a0, a5
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: xor a1, a7, t0
+; RV64-NEXT: xor a0, a0, a6
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: srai a3, a3, 63
+; RV64-NEXT: and a0, a0, a3
+; RV64-NEXT: xor a0, a0, t0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_deeply_nested:
+; RV32: # %bb.0:
+; RV32-NEXT: lw t0, 0(sp)
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: andi a1, a1, 1
+; RV32-NEXT: andi a2, a2, 1
+; RV32-NEXT: andi a3, a3, 1
+; RV32-NEXT: neg t1, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a4, t1, a4
+; RV32-NEXT: neg t1, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a0, a5
+; RV32-NEXT: neg a5, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a1, a6
+; RV32-NEXT: neg a6, a3
+; RV32-NEXT: addi a3, a3, -1
+; RV32-NEXT: and a2, a2, a7
+; RV32-NEXT: or a0, a4, a0
+; RV32-NEXT: and a0, t1, a0
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: and a0, a5, a0
+; RV32-NEXT: or a0, a0, a2
+; RV32-NEXT: and a0, a6, a0
+; RV32-NEXT: and a1, a3, t0
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+ %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+ %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+ %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+ %sel4 = call i32 @llvm.ct.select.i32(i1 %c4, i32 %sel3, i32 %e)
+ ret i32 %sel4
+}
+
+; Declare the intrinsics
+declare i1 @llvm.ct.select.i1(i1, i1, i1)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
new file mode 100644
index 0000000000000..1149971fd090e
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
@@ -0,0 +1,383 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -O3 | FileCheck %s --check-prefix=RV64
+; RUN: llc < %s -mtriple=riscv32 -O3 | FileCheck %s --check-prefix=RV32
+
+; Test smin(x, 0) pattern
+define i32 @test_ctselect_smin_zero(i32 %x) {
+; RV64-LABEL: test_ctselect_smin_zero:
+; RV64: # %bb.0:
+; RV64-NEXT: sraiw a1, a0, 31
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_smin_zero:
+; RV32: # %bb.0:
+; RV32-NEXT: srai a1, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: ret
+ %cmp = icmp slt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+ ret i32 %result
+}
+
+; Test smax(x, 0) pattern
+define i32 @test_ctselect_smax_zero(i32 %x) {
+; RV64-LABEL: test_ctselect_smax_zero:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a1, a0
+; RV64-NEXT: sgtz a1, a1
+; RV64-NEXT: neg a1, a1
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_smax_zero:
+; RV32: # %bb.0:
+; RV32-NEXT: sgtz a1, a0
+; RV32-NEXT: neg a1, a1
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: ret
+ %cmp = icmp sgt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
+ ret i32 %result
+}
+
+; Test generic smin pattern
+define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
+; RV64-LABEL: test_ctselect_smin_generic:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a2, a1
+; RV64-NEXT: sext.w a3, a0
+; RV64-NEXT: slt a2, a3, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: neg a2, a2
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_smin_generic:
+; RV32: # %bb.0:
+; RV32-NEXT: slt a2, a0, a1
+; RV32-NEXT: neg a3, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+ %cmp = icmp slt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+ ret i32 %result
+}
+
+; Test generic smax pattern
+define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
+; RV64-LABEL: test_ctselect_smax_generic:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a2, a0
+; RV64-NEXT: sext.w a3, a1
+; RV64-NEXT: slt a2, a3, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: neg a2, a2
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_smax_generic:
+; RV32: # %bb.0:
+; RV32-NEXT: slt a2, a1, a0
+; RV32-NEXT: neg a3, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+ %cmp = icmp sgt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+ ret i32 %result
+}
+
+; Test umin pattern
+define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
+; RV64-LABEL: test_ctselect_umin_generic:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a2, a1
+; RV64-NEXT: sext.w a3, a0
+; RV64-NEXT: sltu a2, a3, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: neg a2, a2
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_umin_generic:
+; RV32: # %bb.0:
+; RV32-NEXT: sltu a2, a0, a1
+; RV32-NEXT: neg a3, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+ %cmp = icmp ult i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+ ret i32 %result
+}
+
+; Test umax pattern
+define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
+; RV64-LABEL: test_ctselect_umax_generic:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a2, a0
+; RV64-NEXT: sext.w a3, a1
+; RV64-NEXT: sltu a2, a3, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: neg a2, a2
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_umax_generic:
+; RV32: # %bb.0:
+; RV32-NEXT: sltu a2, a1, a0
+; RV32-NEXT: neg a3, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+ %cmp = icmp ugt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
+ ret i32 %result
+}
+
+; Test abs pattern
+define i32 @test_ctselect_abs(i32 %x) {
+; RV64-LABEL: test_ctselect_abs:
+; RV64: # %bb.0:
+; RV64-NEXT: negw a1, a0
+; RV64-NEXT: xor a1, a1, a0
+; RV64-NEXT: sraiw a2, a0, 31
+; RV64-NEXT: and a1, a1, a2
+; RV64-NEXT: xor a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_abs:
+; RV32: # %bb.0:
+; RV32-NEXT: neg a1, a0
+; RV32-NEXT: srai a2, a0, 31
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: not a2, a2
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %neg = sub i32 0, %x
+ %cmp = icmp slt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %neg, i32 %x)
+ ret i32 %result
+}
+
+; Test nabs pattern (negative abs)
+define i32 @test_ctselect_nabs(i32 %x) {
+; RV64-LABEL: test_ctselect_nabs:
+; RV64: # %bb.0:
+; RV64-NEXT: negw a1, a0
+; RV64-NEXT: xor a2, a0, a1
+; RV64-NEXT: sraiw a0, a0, 31
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_nabs:
+; RV32: # %bb.0:
+; RV32-NEXT: neg a1, a0
+; RV32-NEXT: srai a2, a0, 31
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: not a2, a2
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+ %neg = sub i32 0, %x
+ %cmp = icmp slt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %neg)
+ ret i32 %result
+}
+
+; Test sign extension pattern
+define i32 @test_ctselect_sign_extend(i32 %x) {
+; RV64-LABEL: test_ctselect_sign_extend:
+; RV64: # %bb.0:
+; RV64-NEXT: sraiw a0, a0, 31
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_sign_extend:
+; RV32: # %bb.0:
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: ret
+ %cmp = icmp slt i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 -1, i32 0)
+ ret i32 %result
+}
+
+; Test zero extension pattern
+define i32 @test_ctselect_zero_extend(i32 %x) {
+; RV64-LABEL: test_ctselect_zero_extend:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: snez a0, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_zero_extend:
+; RV32: # %bb.0:
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: ret
+ %cmp = icmp ne i32 %x, 0
+ %result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 1, i32 0)
+ ret i32 %result
+}
+
+; Test constant folding with known condition
+define i32 @test_ctselect_constant_folding_true(i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_constant_folding_true:
+; RV64: # %bb.0:
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_constant_folding_true:
+; RV32: # %bb.0:
+; RV32-NEXT: ret
+ %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_constant_folding_false(i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_constant_folding_false:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_constant_folding_false:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: ret
+ %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test with identical operands
+define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
+; RV64-LABEL: test_ctselect_identical_operands:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_identical_operands:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: neg a2, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a2, a2, a1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: ret
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
+ ret i32 %result
+}
+
+; Test with inverted condition
+define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_inverted_condition:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: seqz a0, a0
+; RV64-NEXT: xor a2, a2, a3
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: xor a0, a0, a3
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_inverted_condition:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a2, a0, a2
+; RV32-NEXT: not a0, a0
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: ret
+ %cmp = icmp eq i32 %x, %y
+ %not_cmp = xor i1 %cmp, true
+ %result = call i32 @llvm.ct.select.i32(i1 %not_cmp, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test chain of ct.select operations
+define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c, i32 %d) {
+; RV64-LABEL: test_ctselect_chain:
+; RV64: # %bb.0:
+; RV64-NEXT: xor a3, a3, a4
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: xor a4, a4, a5
+; RV64-NEXT: slli a1, a1, 63
+; RV64-NEXT: xor a5, a5, a6
+; RV64-NEXT: slli a2, a2, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: srai a1, a1, 63
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: xor a0, a0, a4
+; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: xor a0, a0, a5
+; RV64-NEXT: srai a2, a2, 63
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: xor a0, a0, a6
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_chain:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: andi a1, a1, 1
+; RV32-NEXT: andi a2, a2, 1
+; RV32-NEXT: neg a7, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a3, a7, a3
+; RV32-NEXT: neg a7, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: neg a4, a2
+; RV32-NEXT: addi a2, a2, -1
+; RV32-NEXT: and a1, a1, a5
+; RV32-NEXT: or a0, a3, a0
+; RV32-NEXT: and a0, a7, a0
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: and a0, a4, a0
+; RV32-NEXT: and a1, a2, a6
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: ret
+ %sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
+ %sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
+ %sel3 = call i32 @llvm.ct.select.i32(i1 %c3, i32 %sel2, i32 %d)
+ ret i32 %sel3
+}
+
+; Test for 64-bit operations (supported on all 64-bit architectures)
+define i64 @test_ctselect_i64_smin_zero(i64 %x) {
+; RV64-LABEL: test_ctselect_i64_smin_zero:
+; RV64: # %bb.0:
+; RV64-NEXT: srai a1, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_i64_smin_zero:
+; RV32: # %bb.0:
+; RV32-NEXT: srai a2, a1, 31
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: ret
+ %cmp = icmp slt i64 %x, 0
+ %result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
+ ret i64 %result
+}
+
+; Declare the intrinsics
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
new file mode 100644
index 0000000000000..a02e1e4749443
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
@@ -0,0 +1,804 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -O3 | FileCheck %s --check-prefix=RV64
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v -O3 | FileCheck %s --check-prefix=RV32
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvl128b -O3 | FileCheck %s --check-prefix=RV32-V128
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvl256b -O3 | FileCheck %s --check-prefix=RV64-V256
+
+
+; Basic pass-through select on nxv4i32
+define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; RV64-LABEL: ctsel_nxv4i32_basic:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmv.v.i v12, 0
+; RV64-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vnot.v v12, v12
+; RV64-NEXT: vand.vv v10, v12, v10
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv4i32_basic:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.i v12, 0
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vand.vv v8, v12, v8
+; RV32-NEXT: vnot.v v12, v12
+; RV32-NEXT: vand.vv v10, v12, v10
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv4i32_basic:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v12, 0
+; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-V128-NEXT: vand.vv v8, v12, v8
+; RV32-V128-NEXT: vnot.v v12, v12
+; RV32-V128-NEXT: vand.vv v10, v12, v10
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv4i32_basic:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v12, 0
+; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-V256-NEXT: vand.vv v8, v12, v8
+; RV64-V256-NEXT: vnot.v v12, v12
+; RV64-V256-NEXT: vand.vv v10, v12, v10
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: ret
+ %r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 %cond, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
+; Select with loads (aligned)
+define <vscale x 4 x i32> @ctsel_nxv4i32_load(i1 %cond, ptr %p1, ptr %p2) {
+; RV64-LABEL: ctsel_nxv4i32_load:
+; RV64: # %bb.0:
+; RV64-NEXT: vl2re32.v v8, (a1)
+; RV64-NEXT: vl2re32.v v10, (a2)
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmv.v.i v12, 0
+; RV64-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vnot.v v12, v12
+; RV64-NEXT: vand.vv v10, v12, v10
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv4i32_load:
+; RV32: # %bb.0:
+; RV32-NEXT: vl2re32.v v8, (a1)
+; RV32-NEXT: vl2re32.v v10, (a2)
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.i v12, 0
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vand.vv v8, v12, v8
+; RV32-NEXT: vnot.v v12, v12
+; RV32-NEXT: vand.vv v10, v12, v10
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv4i32_load:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: vl2re32.v v8, (a1)
+; RV32-V128-NEXT: vl2re32.v v10, (a2)
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v12, 0
+; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-V128-NEXT: vand.vv v8, v12, v8
+; RV32-V128-NEXT: vnot.v v12, v12
+; RV32-V128-NEXT: vand.vv v10, v12, v10
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv4i32_load:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: vl2re32.v v8, (a1)
+; RV64-V256-NEXT: vl2re32.v v10, (a2)
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v12, 0
+; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-V256-NEXT: vand.vv v8, v12, v8
+; RV64-V256-NEXT: vnot.v v12, v12
+; RV64-V256-NEXT: vand.vv v10, v12, v10
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: ret
+ %a = load <vscale x 4 x i32>, ptr %p1, align 16
+ %b = load <vscale x 4 x i32>, ptr %p2, align 16
+ %r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 %cond, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
+; Mixed: do arithmetic first, then select, then store
+define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
+; RV64-LABEL: ctsel_nxv4i32_mixed:
+; RV64: # %bb.0:
+; RV64-NEXT: vl2re32.v v8, (a1)
+; RV64-NEXT: vl2re32.v v10, (a2)
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmv.v.i v12, 0
+; RV64-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-NEXT: vadd.vv v8, v8, v8
+; RV64-NEXT: vadd.vv v10, v10, v10
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vnot.v v12, v12
+; RV64-NEXT: vand.vv v10, v12, v10
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vs2r.v v8, (a3)
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv4i32_mixed:
+; RV32: # %bb.0:
+; RV32-NEXT: vl2re32.v v8, (a1)
+; RV32-NEXT: vl2re32.v v10, (a2)
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.i v12, 0
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vadd.vv v8, v8, v8
+; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vand.vv v8, v12, v8
+; RV32-NEXT: vnot.v v12, v12
+; RV32-NEXT: vand.vv v10, v12, v10
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vs2r.v v8, (a3)
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv4i32_mixed:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: vl2re32.v v8, (a1)
+; RV32-V128-NEXT: vl2re32.v v10, (a2)
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v12, 0
+; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-V128-NEXT: vadd.vv v8, v8, v8
+; RV32-V128-NEXT: vadd.vv v10, v10, v10
+; RV32-V128-NEXT: vand.vv v8, v12, v8
+; RV32-V128-NEXT: vnot.v v12, v12
+; RV32-V128-NEXT: vand.vv v10, v12, v10
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vs2r.v v8, (a3)
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv4i32_mixed:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: vl2re32.v v8, (a1)
+; RV64-V256-NEXT: vl2re32.v v10, (a2)
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v12, 0
+; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-V256-NEXT: vadd.vv v8, v8, v8
+; RV64-V256-NEXT: vadd.vv v10, v10, v10
+; RV64-V256-NEXT: vand.vv v8, v12, v8
+; RV64-V256-NEXT: vnot.v v12, v12
+; RV64-V256-NEXT: vand.vv v10, v12, v10
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vs2r.v v8, (a3)
+; RV64-V256-NEXT: ret
+ %a = load <vscale x 4 x i32>, ptr %p1, align 16
+ %b = load <vscale x 4 x i32>, ptr %p2, align 16
+ ; avoid scalable vector constants: use %a+%a and %b+%b
+ %a2 = add <vscale x 4 x i32> %a, %a
+ %b2 = add <vscale x 4 x i32> %b, %b
+ %r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 %cond, <vscale x 4 x i32> %a2, <vscale x 4 x i32> %b2)
+ store <vscale x 4 x i32> %r, ptr %out, align 16
+ ret void
+}
+
+; Const-true/false fold smoke tests
+define <vscale x 4 x i32> @ctsel_nxv4i32_true(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; RV64-LABEL: ctsel_nxv4i32_true:
+; RV64: # %bb.0:
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv4i32_true:
+; RV32: # %bb.0:
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv4i32_true:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv4i32_true:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: ret
+ %r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 true, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
+define <vscale x 4 x i32> @ctsel_nxv4i32_false(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
+; RV64-LABEL: ctsel_nxv4i32_false:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-NEXT: vmv2r.v v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv4i32_false:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-NEXT: vmv2r.v v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv4i32_false:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV32-V128-NEXT: vmv2r.v v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv4i32_false:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: vsetivli zero, 1, e8, m1, ta, ma
+; RV64-V256-NEXT: vmv2r.v v8, v10
+; RV64-V256-NEXT: ret
+ %r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 false, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ ret <vscale x 4 x i32> %r
+}
+
+; Chain two selects to ensure masks don’t get merged away
+define <vscale x 4 x i32> @ctsel_nxv4i32_chain(i1 %c1, i1 %c2,
+; RV64-LABEL: ctsel_nxv4i32_chain:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV64-NEXT: vmv.v.i v14, 0
+; RV64-NEXT: andi a1, a1, 1
+; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v16, a0
+; RV64-NEXT: vmsne.vi v0, v16, 0
+; RV64-NEXT: vmv.v.x v18, a1
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmerge.vim v16, v14, -1, v0
+; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmsne.vi v0, v18, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmerge.vim v14, v14, -1, v0
+; RV64-NEXT: vand.vv v8, v16, v8
+; RV64-NEXT: vnot.v v16, v16
+; RV64-NEXT: vand.vv v10, v16, v10
+; RV64-NEXT: vnot.v v16, v14
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vand.vv v8, v14, v8
+; RV64-NEXT: vand.vv v10, v16, v12
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv4i32_chain:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.i v14, 0
+; RV32-NEXT: andi a1, a1, 1
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v16, a0
+; RV32-NEXT: vmsne.vi v0, v16, 0
+; RV32-NEXT: vmv.v.x v18, a1
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmerge.vim v16, v14, -1, v0
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmsne.vi v0, v18, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmerge.vim v14, v14, -1, v0
+; RV32-NEXT: vand.vv v8, v16, v8
+; RV32-NEXT: vnot.v v16, v16
+; RV32-NEXT: vand.vv v10, v16, v10
+; RV32-NEXT: vnot.v v16, v14
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vand.vv v8, v14, v8
+; RV32-NEXT: vand.vv v10, v16, v12
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv4i32_chain:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v14, 0
+; RV32-V128-NEXT: andi a1, a1, 1
+; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v16, a0
+; RV32-V128-NEXT: vmsne.vi v0, v16, 0
+; RV32-V128-NEXT: vmv.v.x v18, a1
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vmerge.vim v16, v14, -1, v0
+; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmsne.vi v0, v18, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vmerge.vim v14, v14, -1, v0
+; RV32-V128-NEXT: vand.vv v8, v16, v8
+; RV32-V128-NEXT: vnot.v v16, v16
+; RV32-V128-NEXT: vand.vv v10, v16, v10
+; RV32-V128-NEXT: vnot.v v16, v14
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vand.vv v8, v14, v8
+; RV32-V128-NEXT: vand.vv v10, v16, v12
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv4i32_chain:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v14, 0
+; RV64-V256-NEXT: andi a1, a1, 1
+; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v16, a0
+; RV64-V256-NEXT: vmsne.vi v0, v16, 0
+; RV64-V256-NEXT: vmv.v.x v18, a1
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vmerge.vim v16, v14, -1, v0
+; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmsne.vi v0, v18, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vmerge.vim v14, v14, -1, v0
+; RV64-V256-NEXT: vand.vv v8, v16, v8
+; RV64-V256-NEXT: vnot.v v16, v16
+; RV64-V256-NEXT: vand.vv v10, v16, v10
+; RV64-V256-NEXT: vnot.v v16, v14
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vand.vv v8, v14, v8
+; RV64-V256-NEXT: vand.vv v10, v16, v12
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: ret
+ <vscale x 4 x i32> %a,
+ <vscale x 4 x i32> %b,
+ <vscale x 4 x i32> %c) {
+ %t = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 %c1, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
+ %r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 %c2, <vscale x 4 x i32> %t, <vscale x 4 x i32> %c)
+ ret <vscale x 4 x i32> %r
+}
+
+; A different element width
+define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
+; RV64-LABEL: ctsel_nxv8i16_basic:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64-NEXT: vmv.v.i v12, 0
+; RV64-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vnot.v v12, v12
+; RV64-NEXT: vand.vv v10, v12, v10
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv8i16_basic:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-NEXT: vmv.v.i v12, 0
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vand.vv v8, v12, v8
+; RV32-NEXT: vnot.v v12, v12
+; RV32-NEXT: vand.vv v10, v12, v10
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv8i16_basic:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v12, 0
+; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-V128-NEXT: vand.vv v8, v12, v8
+; RV32-V128-NEXT: vnot.v v12, v12
+; RV32-V128-NEXT: vand.vv v10, v12, v10
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv8i16_basic:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e16, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v12, 0
+; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-V256-NEXT: vand.vv v8, v12, v8
+; RV64-V256-NEXT: vnot.v v12, v12
+; RV64-V256-NEXT: vand.vv v10, v12, v10
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: ret
+ %r = call <vscale x 8 x i16> @llvm.ct.select.nxv8i16(i1 %cond, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
+ ret <vscale x 8 x i16> %r
+}
+
+define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
+; RV64-LABEL: ctsel_nxv16i8_basic:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vmv.v.i v12, 0
+; RV64-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vnot.v v12, v12
+; RV64-NEXT: vand.vv v10, v12, v10
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv16i8_basic:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vmv.v.i v12, 0
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vand.vv v8, v12, v8
+; RV32-NEXT: vnot.v v12, v12
+; RV32-NEXT: vand.vv v10, v12, v10
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv16i8_basic:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vmv.v.i v12, 0
+; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-V128-NEXT: vand.vv v8, v12, v8
+; RV32-V128-NEXT: vnot.v v12, v12
+; RV32-V128-NEXT: vand.vv v10, v12, v10
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv16i8_basic:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a1, zero, e8, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vmv.v.i v12, 0
+; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-V256-NEXT: vand.vv v8, v12, v8
+; RV64-V256-NEXT: vnot.v v12, v12
+; RV64-V256-NEXT: vand.vv v10, v12, v10
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: ret
+ %r = call <vscale x 16 x i8> @llvm.ct.select.nxv16i8(i1 %cond, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
+ ret <vscale x 16 x i8> %r
+}
+
+; 64-bit elements (useful on RV64)
+define <vscale x 2 x i64> @ctsel_nxv2i64_basic(i1 %cond, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
+; RV64-LABEL: ctsel_nxv2i64_basic:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.i v12, 0
+; RV64-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vnot.v v12, v12
+; RV64-NEXT: vand.vv v10, v12, v10
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv2i64_basic:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV32-NEXT: vmv.v.i v12, 0
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vand.vv v8, v12, v8
+; RV32-NEXT: vnot.v v12, v12
+; RV32-NEXT: vand.vv v10, v12, v10
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv2i64_basic:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v12, 0
+; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-V128-NEXT: vand.vv v8, v12, v8
+; RV32-V128-NEXT: vnot.v v12, v12
+; RV32-V128-NEXT: vand.vv v10, v12, v10
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv2i64_basic:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v12, 0
+; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-V256-NEXT: vand.vv v8, v12, v8
+; RV64-V256-NEXT: vnot.v v12, v12
+; RV64-V256-NEXT: vand.vv v10, v12, v10
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: ret
+ %r = call <vscale x 2 x i64> @llvm.ct.select.nxv2i64(i1 %cond, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
+ ret <vscale x 2 x i64> %r
+}
+
+; Floating-point scalable vectors (bitcasted in your fallback)
+define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; RV64-LABEL: ctsel_nxv4f32_basic:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmv.v.i v12, 0
+; RV64-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vnot.v v12, v12
+; RV64-NEXT: vand.vv v10, v12, v10
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv4f32_basic:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.i v12, 0
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vand.vv v8, v12, v8
+; RV32-NEXT: vnot.v v12, v12
+; RV32-NEXT: vand.vv v10, v12, v10
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv4f32_basic:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v12, 0
+; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-V128-NEXT: vand.vv v8, v12, v8
+; RV32-V128-NEXT: vnot.v v12, v12
+; RV32-V128-NEXT: vand.vv v10, v12, v10
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv4f32_basic:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v12, 0
+; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-V256-NEXT: vand.vv v8, v12, v8
+; RV64-V256-NEXT: vnot.v v12, v12
+; RV64-V256-NEXT: vand.vv v10, v12, v10
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: ret
+ %r = call <vscale x 4 x float> @llvm.ct.select.nxv4f32(i1 %cond, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
+ ret <vscale x 4 x float> %r
+}
+
+; FP arithmetic around select
+define <vscale x 4 x float> @ctsel_nxv4f32_arith(i1 %cond, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
+; RV64-LABEL: ctsel_nxv4f32_arith:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV64-NEXT: vfadd.vv v12, v8, v10
+; RV64-NEXT: vfsub.vv v8, v8, v10
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmv.v.i v10, 0
+; RV64-NEXT: vmerge.vim v10, v10, -1, v0
+; RV64-NEXT: vand.vv v12, v10, v12
+; RV64-NEXT: vnot.v v10, v10
+; RV64-NEXT: vand.vv v8, v10, v8
+; RV64-NEXT: vor.vv v8, v12, v8
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv4f32_arith:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV32-NEXT: vfadd.vv v12, v8, v10
+; RV32-NEXT: vfsub.vv v8, v8, v10
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a0
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmv.v.i v10, 0
+; RV32-NEXT: vmerge.vim v10, v10, -1, v0
+; RV32-NEXT: vand.vv v12, v10, v12
+; RV32-NEXT: vnot.v v10, v10
+; RV32-NEXT: vand.vv v8, v10, v8
+; RV32-NEXT: vor.vv v8, v12, v8
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv4f32_arith:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vfadd.vv v12, v8, v10
+; RV32-V128-NEXT: vfsub.vv v8, v8, v10
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v10, a0
+; RV32-V128-NEXT: vmsne.vi v0, v10, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v10, 0
+; RV32-V128-NEXT: vmerge.vim v10, v10, -1, v0
+; RV32-V128-NEXT: vand.vv v12, v10, v12
+; RV32-V128-NEXT: vnot.v v10, v10
+; RV32-V128-NEXT: vand.vv v8, v10, v8
+; RV32-V128-NEXT: vor.vv v8, v12, v8
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv4f32_arith:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vfadd.vv v12, v8, v10
+; RV64-V256-NEXT: vfsub.vv v8, v8, v10
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v10, a0
+; RV64-V256-NEXT: vmsne.vi v0, v10, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v10, 0
+; RV64-V256-NEXT: vmerge.vim v10, v10, -1, v0
+; RV64-V256-NEXT: vand.vv v12, v10, v12
+; RV64-V256-NEXT: vnot.v v10, v10
+; RV64-V256-NEXT: vand.vv v8, v10, v8
+; RV64-V256-NEXT: vor.vv v8, v12, v8
+; RV64-V256-NEXT: ret
+ %sum = fadd <vscale x 4 x float> %x, %y
+ %diff = fsub <vscale x 4 x float> %x, %y
+ %r = call <vscale x 4 x float> @llvm.ct.select.nxv4f32(i1 %cond, <vscale x 4 x float> %sum, <vscale x 4 x float> %diff)
+ ret <vscale x 4 x float> %r
+}
+
+define <vscale x 2 x double> @ctsel_nxv2f64_basic(i1 %cond, <vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; RV64-LABEL: ctsel_nxv2f64_basic:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-NEXT: vmv.v.i v12, 0
+; RV64-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-NEXT: vand.vv v8, v12, v8
+; RV64-NEXT: vnot.v v12, v12
+; RV64-NEXT: vand.vv v10, v12, v10
+; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: ret
+;
+; RV32-LABEL: ctsel_nxv2f64_basic:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV32-NEXT: vmv.v.i v12, 0
+; RV32-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-NEXT: vand.vv v8, v12, v8
+; RV32-NEXT: vnot.v v12, v12
+; RV32-NEXT: vand.vv v10, v12, v10
+; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: ret
+;
+; RV32-V128-LABEL: ctsel_nxv2f64_basic:
+; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: andi a0, a0, 1
+; RV32-V128-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV32-V128-NEXT: vmv.v.i v12, 0
+; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
+; RV32-V128-NEXT: vand.vv v8, v12, v8
+; RV32-V128-NEXT: vnot.v v12, v12
+; RV32-V128-NEXT: vand.vv v10, v12, v10
+; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: ret
+;
+; RV64-V256-LABEL: ctsel_nxv2f64_basic:
+; RV64-V256: # %bb.0:
+; RV64-V256-NEXT: andi a0, a0, 1
+; RV64-V256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e64, m2, ta, ma
+; RV64-V256-NEXT: vmv.v.i v12, 0
+; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
+; RV64-V256-NEXT: vand.vv v8, v12, v8
+; RV64-V256-NEXT: vnot.v v12, v12
+; RV64-V256-NEXT: vand.vv v10, v12, v10
+; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: ret
+ %r = call <vscale x 2 x double> @llvm.ct.select.nxv2f64(i1 %cond, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
+ ret <vscale x 2 x double> %r
+}
+
+declare <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i16> @llvm.ct.select.nxv8i16(i1, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.ct.select.nxv16i8(i1, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i64> @llvm.ct.select.nxv2i64(i1, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x float> @llvm.ct.select.nxv4f32(i1, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x double>@llvm.ct.select.nxv2f64(i1, <vscale x 2 x double>,<vscale x 2 x double>)
diff --git a/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll b/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
new file mode 100644
index 0000000000000..255d575ca8f9f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
@@ -0,0 +1,176 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -O3 -filetype=asm | FileCheck %s --check-prefix=RV64
+; RUN: llc < %s -mtriple=riscv32 -O3 -filetype=asm | FileCheck %s --check-prefix=RV32
+
+; Test 1: Basic optimizations should still work
+define i32 @test_basic_opts(i32 %x) {
+; RV64-LABEL: test_basic_opts:
+; RV64: # %bb.0:
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_basic_opts:
+; RV32: # %bb.0:
+; RV32-NEXT: ret
+ %a = or i32 %x, 0 ; Should eliminate
+ %b = and i32 %a, -1 ; Should eliminate
+ %c = xor i32 %b, 0 ; Should eliminate
+ ret i32 %c
+}
+
+; Test 2: Constant folding should work
+define i32 @test_constant_fold() {
+; RV64-LABEL: test_constant_fold:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_constant_fold:
+; RV32: # %bb.0:
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+ %a = xor i32 -1, -1 ; Should fold to 0
+ ret i32 %a
+}
+
+; Test 3: Protected pattern should NOT have branches
+define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
+; RV64-LABEL: test_protected_no_branch:
+; RV64: # %bb.0:
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_protected_no_branch:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: neg a3, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a3, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test 4: Explicit branch should still generate branches
+define i32 @test_explicit_branch(i1 %cond, i32 %a, i32 %b) {
+; RV64-LABEL: test_explicit_branch:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: beqz a0, .LBB3_2
+; RV64-NEXT: # %bb.1: # %true
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB3_2: # %false
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_explicit_branch:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: beqz a0, .LBB3_2
+; RV32-NEXT: # %bb.1: # %true
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB3_2: # %false
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: ret
+ br i1 %cond, label %true, label %false
+true:
+ ret i32 %a
+false:
+ ret i32 %b
+}
+
+; Test 5: Regular select (not ct.select) - whatever wasm wants to do
+define i32 @test_regular_select(i1 %cond, i32 %a, i32 %b) {
+; RV64-LABEL: test_regular_select:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a3, a0, 1
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: bnez a3, .LBB4_2
+; RV64-NEXT: # %bb.1:
+; RV64-NEXT: mv a0, a2
+; RV64-NEXT: .LBB4_2:
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_regular_select:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a3, a0, 1
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: bnez a3, .LBB4_2
+; RV32-NEXT: # %bb.1:
+; RV32-NEXT: mv a0, a2
+; RV32-NEXT: .LBB4_2:
+; RV32-NEXT: ret
+ %result = select i1 %cond, i32 %a, i32 %b
+ ret i32 %result
+}
+
+; Test if XOR with all-ones still gets optimized
+define i32 @test_xor_all_ones() {
+; RV64-LABEL: test_xor_all_ones:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_xor_all_ones:
+; RV32: # %bb.0:
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+ %xor1 = xor i32 -1, -1 ; Should optimize to 0
+ ret i32 %xor1
+}
+
+define i32 @test_xor_same_value(i32 %x) {
+; RV64-LABEL: test_xor_same_value:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_xor_same_value:
+; RV32: # %bb.0:
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+ %xor2 = xor i32 %x, %x ; Should optimize to 0
+ ret i32 %xor2
+}
+
+define i32 @test_normal_ops(i32 %x) {
+; RV64-LABEL: test_normal_ops:
+; RV64: # %bb.0:
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_normal_ops:
+; RV32: # %bb.0:
+; RV32-NEXT: ret
+ %or1 = or i32 %x, 0 ; Should optimize to %x
+ %and1 = and i32 %or1, -1 ; Should optimize to %x
+ %xor1 = xor i32 %and1, 0 ; Should optimize to %x
+ ret i32 %xor1
+}
+
+; This simulates what the reviewer is worried about
+define i32 @test_xor_with_const_operands() {
+; RV64-LABEL: test_xor_with_const_operands:
+; RV64: # %bb.0:
+; RV64-NEXT: li a0, 0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_xor_with_const_operands:
+; RV32: # %bb.0:
+; RV32-NEXT: li a0, 0
+; RV32-NEXT: ret
+ %a = xor i32 -1, -1 ; -1 ^ -1 should become 0
+ %b = xor i32 0, 0 ; 0 ^ 0 should become 0
+ %c = xor i32 42, 42 ; 42 ^ 42 should become 0
+ %result = or i32 %a, %b
+ %final = or i32 %result, %c
+ ret i32 %final ; Should optimize to 0
+}
+
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
>From 453c34fff783034010ed55c8dbd347373ced5dc9 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Sat, 7 Mar 2026 15:38:43 -0500
Subject: [PATCH 4/4] [LLVM][RISCV] Regenerate ct.select test CHECK lines
Update CHECK lines to match the new constant-time AND/OR/XOR expansion
from the CT_SELECT legalization fix.
---
.../RISCV/ctselect-fallback-edge-cases.ll | 144 +++---
.../RISCV/ctselect-fallback-patterns.ll | 139 +++--
.../RISCV/ctselect-fallback-vector-rvv.ll | 476 +++++-------------
.../CodeGen/RISCV/ctselect-side-effects.ll | 13 +-
4 files changed, 266 insertions(+), 506 deletions(-)
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
index af1be0c8f3ddc..06e1009485ac5 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-edge-cases.ll
@@ -6,18 +6,20 @@
define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
; RV64-LABEL: test_ctselect_i1:
; RV64: # %bb.0:
-; RV64-NEXT: and a1, a0, a1
-; RV64-NEXT: xori a0, a0, 1
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_i1:
; RV32: # %bb.0:
-; RV32-NEXT: and a1, a0, a1
-; RV32-NEXT: xori a0, a0, 1
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%result = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
ret i1 %result
@@ -27,21 +29,17 @@ define i1 @test_ctselect_i1(i1 %cond, i1 %a, i1 %b) {
define i32 @test_ctselect_extremal_values(i1 %cond) {
; RV64-LABEL: test_ctselect_extremal_values:
; RV64: # %bb.0:
-; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
; RV64-NEXT: lui a1, 524288
-; RV64-NEXT: subw a0, a1, a0
+; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_extremal_values:
; RV32: # %bb.0:
; RV32-NEXT: andi a0, a0, 1
; RV32-NEXT: lui a1, 524288
-; RV32-NEXT: addi a2, a0, -1
-; RV32-NEXT: neg a0, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: slli a0, a0, 1
-; RV32-NEXT: srli a0, a0, 1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: sub a0, a1, a0
; RV32-NEXT: ret
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 2147483647, i32 -2147483648)
ret i32 %result
@@ -53,14 +51,14 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
; RV64: # %bb.0:
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: srai a0, a0, 63
-; RV64-NEXT: and a0, a0, a1
+; RV64-NEXT: and a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_null_ptr:
; RV32: # %bb.0:
; RV32-NEXT: slli a0, a0, 31
; RV32-NEXT: srai a0, a0, 31
-; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: and a0, a1, a0
; RV32-NEXT: ret
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %ptr, ptr null)
ret ptr %result
@@ -70,22 +68,20 @@ define ptr @test_ctselect_null_ptr(i1 %cond, ptr %ptr) {
define ptr @test_ctselect_function_ptr(i1 %cond, ptr %func1, ptr %func2) {
; RV64-LABEL: test_ctselect_function_ptr:
; RV64: # %bb.0:
-; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: neg a3, a0
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a1, a3, a1
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_function_ptr:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: neg a3, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a1, a3, a1
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %func1, ptr %func2)
ret ptr %result
@@ -97,22 +93,20 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
; RV64: # %bb.0:
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: snez a0, a0
+; RV64-NEXT: xor a2, a2, a3
; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a2, a0, a2
-; RV64-NEXT: not a0, a0
-; RV64-NEXT: and a0, a0, a3
-; RV64-NEXT: or a0, a2, a0
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: xor a0, a3, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_ptr_cmp:
; RV32: # %bb.0:
; RV32-NEXT: xor a0, a0, a1
; RV32-NEXT: snez a0, a0
+; RV32-NEXT: xor a2, a2, a3
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a2, a0, a2
-; RV32-NEXT: not a0, a0
-; RV32-NEXT: and a0, a0, a3
-; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: and a0, a2, a0
+; RV32-NEXT: xor a0, a3, a0
; RV32-NEXT: ret
%cmp = icmp eq ptr %p1, %p2
%result = call ptr @llvm.ct.select.p0(i1 %cmp, ptr %a, ptr %b)
@@ -125,22 +119,20 @@ define ptr @test_ctselect_ptr_cmp(ptr %p1, ptr %p2, ptr %a, ptr %b) {
define ptr @test_ctselect_struct_ptr(i1 %cond, ptr %a, ptr %b) {
; RV64-LABEL: test_ctselect_struct_ptr:
; RV64: # %bb.0:
-; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: neg a3, a0
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a1, a3, a1
-; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_struct_ptr:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: neg a3, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a1, a3, a1
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
ret ptr %result
@@ -162,44 +154,40 @@ define i32 @test_ctselect_deeply_nested(i1 %c1, i1 %c2, i1 %c3, i1 %c4, i32 %a,
; RV64-NEXT: srai a1, a1, 63
; RV64-NEXT: srai a2, a2, 63
; RV64-NEXT: and a0, a4, a0
-; RV64-NEXT: xor a0, a0, a5
+; RV64-NEXT: xor a0, a5, a0
; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: xor a1, a7, t0
-; RV64-NEXT: xor a0, a0, a6
+; RV64-NEXT: xor a0, a6, a0
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: srai a3, a3, 63
; RV64-NEXT: and a0, a0, a3
-; RV64-NEXT: xor a0, a0, t0
+; RV64-NEXT: xor a0, t0, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_deeply_nested:
; RV32: # %bb.0:
; RV32-NEXT: lw t0, 0(sp)
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: andi a2, a2, 1
-; RV32-NEXT: andi a3, a3, 1
-; RV32-NEXT: neg t1, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a4, t1, a4
-; RV32-NEXT: neg t1, a1
-; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: and a0, a0, a5
-; RV32-NEXT: neg a5, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a1, a1, a6
-; RV32-NEXT: neg a6, a3
-; RV32-NEXT: addi a3, a3, -1
-; RV32-NEXT: and a2, a2, a7
-; RV32-NEXT: or a0, a4, a0
-; RV32-NEXT: and a0, t1, a0
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: and a0, a5, a0
-; RV32-NEXT: or a0, a0, a2
-; RV32-NEXT: and a0, a6, a0
-; RV32-NEXT: and a1, a3, t0
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a4, a4, a5
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: xor a5, a5, a6
+; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: xor a6, a6, a7
+; RV32-NEXT: slli a2, a2, 31
+; RV32-NEXT: slli a3, a3, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: srai a1, a1, 31
+; RV32-NEXT: srai a2, a2, 31
+; RV32-NEXT: and a0, a4, a0
+; RV32-NEXT: xor a0, a5, a0
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: xor a1, a7, t0
+; RV32-NEXT: xor a0, a6, a0
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
+; RV32-NEXT: srai a3, a3, 31
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: xor a0, t0, a0
; RV32-NEXT: ret
%sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
%sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
index 1149971fd090e..5fbc36d07db83 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-patterns.ll
@@ -7,13 +7,13 @@ define i32 @test_ctselect_smin_zero(i32 %x) {
; RV64-LABEL: test_ctselect_smin_zero:
; RV64: # %bb.0:
; RV64-NEXT: sraiw a1, a0, 31
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_smin_zero:
; RV32: # %bb.0:
; RV32-NEXT: srai a1, a0, 31
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: ret
%cmp = icmp slt i32 %x, 0
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
@@ -27,14 +27,14 @@ define i32 @test_ctselect_smax_zero(i32 %x) {
; RV64-NEXT: sext.w a1, a0
; RV64-NEXT: sgtz a1, a1
; RV64-NEXT: neg a1, a1
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_smax_zero:
; RV32: # %bb.0:
; RV32-NEXT: sgtz a1, a0
; RV32-NEXT: neg a1, a1
-; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: and a0, a0, a1
; RV32-NEXT: ret
%cmp = icmp sgt i32 %x, 0
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 0)
@@ -51,17 +51,16 @@ define i32 @test_ctselect_smin_generic(i32 %x, i32 %y) {
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: neg a2, a2
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_smin_generic:
; RV32: # %bb.0:
; RV32-NEXT: slt a2, a0, a1
-; RV32-NEXT: neg a3, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%cmp = icmp slt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -78,17 +77,16 @@ define i32 @test_ctselect_smax_generic(i32 %x, i32 %y) {
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: neg a2, a2
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_smax_generic:
; RV32: # %bb.0:
; RV32-NEXT: slt a2, a1, a0
-; RV32-NEXT: neg a3, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%cmp = icmp sgt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -105,17 +103,16 @@ define i32 @test_ctselect_umin_generic(i32 %x, i32 %y) {
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: neg a2, a2
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_umin_generic:
; RV32: # %bb.0:
; RV32-NEXT: sltu a2, a0, a1
-; RV32-NEXT: neg a3, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%cmp = icmp ult i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -132,17 +129,16 @@ define i32 @test_ctselect_umax_generic(i32 %x, i32 %y) {
; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: neg a2, a2
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_umax_generic:
; RV32: # %bb.0:
; RV32-NEXT: sltu a2, a1, a0
-; RV32-NEXT: neg a3, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a0, a3, a0
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: neg a2, a2
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%cmp = icmp ugt i32 %x, %y
%result = call i32 @llvm.ct.select.i32(i1 %cmp, i32 %x, i32 %y)
@@ -157,17 +153,16 @@ define i32 @test_ctselect_abs(i32 %x) {
; RV64-NEXT: xor a1, a1, a0
; RV64-NEXT: sraiw a2, a0, 31
; RV64-NEXT: and a1, a1, a2
-; RV64-NEXT: xor a0, a1, a0
+; RV64-NEXT: xor a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_abs:
; RV32: # %bb.0:
; RV32-NEXT: neg a1, a0
+; RV32-NEXT: xor a1, a1, a0
; RV32-NEXT: srai a2, a0, 31
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: not a2, a2
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: xor a0, a0, a1
; RV32-NEXT: ret
%neg = sub i32 0, %x
%cmp = icmp slt i32 %x, 0
@@ -183,17 +178,16 @@ define i32 @test_ctselect_nabs(i32 %x) {
; RV64-NEXT: xor a2, a0, a1
; RV64-NEXT: sraiw a0, a0, 31
; RV64-NEXT: and a0, a2, a0
-; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: xor a0, a1, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_nabs:
; RV32: # %bb.0:
; RV32-NEXT: neg a1, a0
-; RV32-NEXT: srai a2, a0, 31
+; RV32-NEXT: xor a2, a0, a1
+; RV32-NEXT: srai a0, a0, 31
; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: not a2, a2
-; RV32-NEXT: and a1, a2, a1
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a0, a1, a0
; RV32-NEXT: ret
%neg = sub i32 0, %x
%cmp = icmp slt i32 %x, 0
@@ -270,12 +264,7 @@ define i32 @test_ctselect_identical_operands(i1 %cond, i32 %x) {
;
; RV32-LABEL: test_ctselect_identical_operands:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: neg a2, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a2, a2, a1
-; RV32-NEXT: and a0, a0, a1
-; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: mv a0, a1
; RV32-NEXT: ret
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %x, i32 %x)
ret i32 %result
@@ -288,22 +277,21 @@ define i32 @test_ctselect_inverted_condition(i32 %x, i32 %y, i32 %a, i32 %b) {
; RV64-NEXT: sext.w a1, a1
; RV64-NEXT: sext.w a0, a0
; RV64-NEXT: xor a0, a0, a1
-; RV64-NEXT: seqz a0, a0
-; RV64-NEXT: xor a2, a2, a3
+; RV64-NEXT: snez a0, a0
+; RV64-NEXT: xor a3, a3, a2
; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: and a0, a2, a0
-; RV64-NEXT: xor a0, a0, a3
+; RV64-NEXT: and a0, a3, a0
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_inverted_condition:
; RV32: # %bb.0:
; RV32-NEXT: xor a0, a0, a1
-; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: xor a3, a3, a2
; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a2, a0, a2
-; RV32-NEXT: not a0, a0
-; RV32-NEXT: and a0, a0, a3
-; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%cmp = icmp eq i32 %x, %y
%not_cmp = xor i1 %cmp, true
@@ -324,34 +312,31 @@ define i32 @test_ctselect_chain(i1 %c1, i1 %c2, i1 %c3, i32 %a, i32 %b, i32 %c,
; RV64-NEXT: srai a0, a0, 63
; RV64-NEXT: srai a1, a1, 63
; RV64-NEXT: and a0, a3, a0
-; RV64-NEXT: xor a0, a0, a4
+; RV64-NEXT: xor a0, a4, a0
; RV64-NEXT: and a0, a0, a1
-; RV64-NEXT: xor a0, a0, a5
+; RV64-NEXT: xor a0, a5, a0
; RV64-NEXT: srai a2, a2, 63
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: xor a0, a0, a6
+; RV64-NEXT: xor a0, a6, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_chain:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: andi a2, a2, 1
-; RV32-NEXT: neg a7, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a3, a7, a3
-; RV32-NEXT: neg a7, a1
-; RV32-NEXT: addi a1, a1, -1
-; RV32-NEXT: and a0, a0, a4
-; RV32-NEXT: neg a4, a2
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: and a1, a1, a5
-; RV32-NEXT: or a0, a3, a0
-; RV32-NEXT: and a0, a7, a0
-; RV32-NEXT: or a0, a0, a1
-; RV32-NEXT: and a0, a4, a0
-; RV32-NEXT: and a1, a2, a6
-; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: xor a3, a3, a4
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: xor a4, a4, a5
+; RV32-NEXT: slli a1, a1, 31
+; RV32-NEXT: xor a5, a5, a6
+; RV32-NEXT: slli a2, a2, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: srai a1, a1, 31
+; RV32-NEXT: and a0, a3, a0
+; RV32-NEXT: xor a0, a4, a0
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: xor a0, a5, a0
+; RV32-NEXT: srai a2, a2, 31
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: xor a0, a6, a0
; RV32-NEXT: ret
%sel1 = call i32 @llvm.ct.select.i32(i1 %c1, i32 %a, i32 %b)
%sel2 = call i32 @llvm.ct.select.i32(i1 %c2, i32 %sel1, i32 %c)
@@ -364,14 +349,14 @@ define i64 @test_ctselect_i64_smin_zero(i64 %x) {
; RV64-LABEL: test_ctselect_i64_smin_zero:
; RV64: # %bb.0:
; RV64-NEXT: srai a1, a0, 63
-; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: and a0, a0, a1
; RV64-NEXT: ret
;
; RV32-LABEL: test_ctselect_i64_smin_zero:
; RV32: # %bb.0:
; RV32-NEXT: srai a2, a1, 31
-; RV32-NEXT: and a0, a2, a0
-; RV32-NEXT: and a1, a2, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: and a1, a1, a2
; RV32-NEXT: ret
%cmp = icmp slt i64 %x, 0
%result = call i64 @llvm.ct.select.i64(i1 %cmp, i64 %x, i64 0)
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
index a02e1e4749443..6e5d3e72e14fd 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback-vector-rvv.ll
@@ -14,12 +14,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4i32_basic:
@@ -29,12 +24,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4i32_basic:
@@ -44,12 +34,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4i32_basic:
@@ -59,12 +44,7 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 4 x i32> @llvm.ct.select.nxv4i32(i1 %cond, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b)
ret <vscale x 4 x i32> %r
@@ -74,70 +54,46 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_basic(i1 %cond, <vscale x 4 x i32> %a,
define <vscale x 4 x i32> @ctsel_nxv4i32_load(i1 %cond, ptr %p1, ptr %p2) {
; RV64-LABEL: ctsel_nxv4i32_load:
; RV64: # %bb.0:
-; RV64-NEXT: vl2re32.v v8, (a1)
-; RV64-NEXT: vl2re32.v v10, (a2)
+; RV64-NEXT: vl2re32.v v8, (a2)
; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v12, a0
-; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v10, a0
+; RV64-NEXT: vmsne.vi v0, v10, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV64-NEXT: vle32.v v8, (a1), v0.t
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4i32_load:
; RV32: # %bb.0:
-; RV32-NEXT: vl2re32.v v8, (a1)
-; RV32-NEXT: vl2re32.v v10, (a2)
+; RV32-NEXT: vl2re32.v v8, (a2)
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v12, a0
-; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a0
+; RV32-NEXT: vmsne.vi v0, v10, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-NEXT: vle32.v v8, (a1), v0.t
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4i32_load:
; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: vl2re32.v v8, (a1)
-; RV32-V128-NEXT: vl2re32.v v10, (a2)
+; RV32-V128-NEXT: vl2re32.v v8, (a2)
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV32-V128-NEXT: vmv.v.x v12, a0
-; RV32-V128-NEXT: vmsne.vi v0, v12, 0
-; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v10, a0
+; RV32-V128-NEXT: vmsne.vi v0, v10, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-V128-NEXT: vle32.v v8, (a1), v0.t
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4i32_load:
; RV64-V256: # %bb.0:
-; RV64-V256-NEXT: vl2re32.v v8, (a1)
-; RV64-V256-NEXT: vl2re32.v v10, (a2)
+; RV64-V256-NEXT: vl2re32.v v8, (a2)
; RV64-V256-NEXT: andi a0, a0, 1
-; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; RV64-V256-NEXT: vmv.v.x v12, a0
-; RV64-V256-NEXT: vmsne.vi v0, v12, 0
-; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v10, a0
+; RV64-V256-NEXT: vmsne.vi v0, v10, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV64-V256-NEXT: vle32.v v8, (a1), v0.t
; RV64-V256-NEXT: ret
%a = load <vscale x 4 x i32>, ptr %p1, align 16
%b = load <vscale x 4 x i32>, ptr %p2, align 16
@@ -155,16 +111,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vadd.vv v8, v8, v8
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64-NEXT: vadd.vv v10, v10, v10
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vs2r.v v8, (a3)
+; RV64-NEXT: vadd.vv v10, v8, v8, v0.t
+; RV64-NEXT: vs2r.v v10, (a3)
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4i32_mixed:
@@ -175,16 +125,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vadd.vv v8, v8, v8
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV32-NEXT: vadd.vv v10, v10, v10
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vs2r.v v8, (a3)
+; RV32-NEXT: vadd.vv v10, v8, v8, v0.t
+; RV32-NEXT: vs2r.v v10, (a3)
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4i32_mixed:
@@ -195,16 +139,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
-; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vadd.vv v8, v8, v8
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV32-V128-NEXT: vadd.vv v10, v10, v10
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
-; RV32-V128-NEXT: vs2r.v v8, (a3)
+; RV32-V128-NEXT: vadd.vv v10, v8, v8, v0.t
+; RV32-V128-NEXT: vs2r.v v10, (a3)
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4i32_mixed:
@@ -215,16 +153,10 @@ define void @ctsel_nxv4i32_mixed(i1 %cond, ptr %p1, ptr %p2, ptr %out) {
; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
-; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vadd.vv v8, v8, v8
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; RV64-V256-NEXT: vadd.vv v10, v10, v10
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
-; RV64-V256-NEXT: vs2r.v v8, (a3)
+; RV64-V256-NEXT: vadd.vv v10, v8, v8, v0.t
+; RV64-V256-NEXT: vs2r.v v10, (a3)
; RV64-V256-NEXT: ret
%a = load <vscale x 4 x i32>, ptr %p1, align 16
%b = load <vscale x 4 x i32>, ptr %p2, align 16
@@ -290,105 +222,65 @@ define <vscale x 4 x i32> @ctsel_nxv4i32_chain(i1 %c1, i1 %c2,
; RV64-LABEL: ctsel_nxv4i32_chain:
; RV64: # %bb.0:
; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v14, 0
+; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v14, a0
+; RV64-NEXT: vmsne.vi v0, v14, 0
; RV64-NEXT: andi a1, a1, 1
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v16, a0
-; RV64-NEXT: vmsne.vi v0, v16, 0
-; RV64-NEXT: vmv.v.x v18, a1
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmerge.vim v16, v14, -1, v0
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmsne.vi v0, v18, 0
+; RV64-NEXT: vmv.v.x v10, a1
+; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmerge.vim v14, v14, -1, v0
-; RV64-NEXT: vand.vv v8, v16, v8
-; RV64-NEXT: vnot.v v16, v16
-; RV64-NEXT: vand.vv v10, v16, v10
-; RV64-NEXT: vnot.v v16, v14
-; RV64-NEXT: vor.vv v8, v8, v10
-; RV64-NEXT: vand.vv v8, v14, v8
-; RV64-NEXT: vand.vv v10, v16, v12
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v12, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4i32_chain:
; RV32: # %bb.0:
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v14, 0
+; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v14, a0
+; RV32-NEXT: vmsne.vi v0, v14, 0
; RV32-NEXT: andi a1, a1, 1
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v16, a0
-; RV32-NEXT: vmsne.vi v0, v16, 0
-; RV32-NEXT: vmv.v.x v18, a1
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmerge.vim v16, v14, -1, v0
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmsne.vi v0, v18, 0
+; RV32-NEXT: vmv.v.x v10, a1
+; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmerge.vim v14, v14, -1, v0
-; RV32-NEXT: vand.vv v8, v16, v8
-; RV32-NEXT: vnot.v v16, v16
-; RV32-NEXT: vand.vv v10, v16, v10
-; RV32-NEXT: vnot.v v16, v14
-; RV32-NEXT: vor.vv v8, v8, v10
-; RV32-NEXT: vand.vv v8, v14, v8
-; RV32-NEXT: vand.vv v10, v16, v12
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v12, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4i32_chain:
; RV32-V128: # %bb.0:
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v14, 0
+; RV32-V128-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v14, a0
+; RV32-V128-NEXT: vmsne.vi v0, v14, 0
; RV32-V128-NEXT: andi a1, a1, 1
-; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-V128-NEXT: vmv.v.x v16, a0
-; RV32-V128-NEXT: vmsne.vi v0, v16, 0
-; RV32-V128-NEXT: vmv.v.x v18, a1
; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmerge.vim v16, v14, -1, v0
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-V128-NEXT: vmsne.vi v0, v18, 0
+; RV32-V128-NEXT: vmv.v.x v10, a1
+; RV32-V128-NEXT: vmsne.vi v0, v10, 0
; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmerge.vim v14, v14, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v16, v8
-; RV32-V128-NEXT: vnot.v v16, v16
-; RV32-V128-NEXT: vand.vv v10, v16, v10
-; RV32-V128-NEXT: vnot.v v16, v14
-; RV32-V128-NEXT: vor.vv v8, v8, v10
-; RV32-V128-NEXT: vand.vv v8, v14, v8
-; RV32-V128-NEXT: vand.vv v10, v16, v12
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v12, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4i32_chain:
; RV64-V256: # %bb.0:
; RV64-V256-NEXT: andi a0, a0, 1
-; RV64-V256-NEXT: vsetvli a2, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v14, 0
+; RV64-V256-NEXT: vsetvli a2, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v14, a0
+; RV64-V256-NEXT: vmsne.vi v0, v14, 0
; RV64-V256-NEXT: andi a1, a1, 1
-; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-V256-NEXT: vmv.v.x v16, a0
-; RV64-V256-NEXT: vmsne.vi v0, v16, 0
-; RV64-V256-NEXT: vmv.v.x v18, a1
; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmerge.vim v16, v14, -1, v0
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-V256-NEXT: vmsne.vi v0, v18, 0
+; RV64-V256-NEXT: vmv.v.x v10, a1
+; RV64-V256-NEXT: vmsne.vi v0, v10, 0
; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmerge.vim v14, v14, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v16, v8
-; RV64-V256-NEXT: vnot.v v16, v16
-; RV64-V256-NEXT: vand.vv v10, v16, v10
-; RV64-V256-NEXT: vnot.v v16, v14
-; RV64-V256-NEXT: vor.vv v8, v8, v10
-; RV64-V256-NEXT: vand.vv v8, v14, v8
-; RV64-V256-NEXT: vand.vv v10, v16, v12
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v12, v8, v0
; RV64-V256-NEXT: ret
<vscale x 4 x i32> %a,
<vscale x 4 x i32> %b,
@@ -407,12 +299,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a,
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv8i16_basic:
@@ -422,12 +309,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a,
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv8i16_basic:
@@ -437,12 +319,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a,
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv8i16_basic:
@@ -452,12 +329,7 @@ define <vscale x 8 x i16> @ctsel_nxv8i16_basic(i1 %cond, <vscale x 8 x i16> %a,
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e16, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 8 x i16> @llvm.ct.select.nxv8i16(i1 %cond, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b)
ret <vscale x 8 x i16> %r
@@ -470,12 +342,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a,
; RV64-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv16i8_basic:
@@ -484,12 +351,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a,
; RV32-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv16i8_basic:
@@ -498,12 +360,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a,
; RV32-V128-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv16i8_basic:
@@ -512,12 +369,7 @@ define <vscale x 16 x i8> @ctsel_nxv16i8_basic(i1 %cond, <vscale x 16 x i8> %a,
; RV64-V256-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 16 x i8> @llvm.ct.select.nxv16i8(i1 %cond, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b)
ret <vscale x 16 x i8> %r
@@ -532,42 +384,37 @@ define <vscale x 2 x i64> @ctsel_nxv2i64_basic(i1 %cond, <vscale x 2 x i64> %a,
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv2i64_basic:
; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vxor.vv v8, v8, v10
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vand.vv v8, v8, v12
+; RV32-NEXT: vxor.vv v8, v10, v8
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv2i64_basic:
; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-V128-NEXT: vxor.vv v8, v8, v10
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-V128-NEXT: vmv.v.i v12, 0
; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vand.vv v8, v8, v12
+; RV32-V128-NEXT: vxor.vv v8, v10, v8
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv2i64_basic:
@@ -577,12 +424,7 @@ define <vscale x 2 x i64> @ctsel_nxv2i64_basic(i1 %cond, <vscale x 2 x i64> %a,
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 2 x i64> @llvm.ct.select.nxv2i64(i1 %cond, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b)
ret <vscale x 2 x i64> %r
@@ -597,12 +439,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4f32_basic:
@@ -612,12 +449,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v12, 0
-; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4f32_basic:
@@ -627,12 +459,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v12, 0
-; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vmerge.vvm v8, v10, v8, v0
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4f32_basic:
@@ -642,12 +469,7 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 4 x float> @llvm.ct.select.nxv4f32(i1 %cond, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
ret <vscale x 4 x float> %r
@@ -657,74 +479,50 @@ define <vscale x 4 x float> @ctsel_nxv4f32_basic(i1 %cond, <vscale x 4 x float>
define <vscale x 4 x float> @ctsel_nxv4f32_arith(i1 %cond, <vscale x 4 x float> %x, <vscale x 4 x float> %y) {
; RV64-LABEL: ctsel_nxv4f32_arith:
; RV64: # %bb.0:
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vfadd.vv v12, v8, v10
-; RV64-NEXT: vfsub.vv v8, v8, v10
; RV64-NEXT: andi a0, a0, 1
-; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-NEXT: vmv.v.x v10, a0
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmv.v.i v10, 0
-; RV64-NEXT: vmerge.vim v10, v10, -1, v0
-; RV64-NEXT: vand.vv v12, v10, v12
-; RV64-NEXT: vnot.v v10, v10
-; RV64-NEXT: vand.vv v8, v10, v8
-; RV64-NEXT: vor.vv v8, v12, v8
+; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-NEXT: vmv.v.x v12, a0
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV64-NEXT: vfsub.vv v12, v8, v10
+; RV64-NEXT: vfadd.vv v12, v8, v10, v0.t
+; RV64-NEXT: vmv.v.v v8, v12
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv4f32_arith:
; RV32: # %bb.0:
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vfadd.vv v12, v8, v10
-; RV32-NEXT: vfsub.vv v8, v8, v10
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v10, a0
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmv.v.i v10, 0
-; RV32-NEXT: vmerge.vim v10, v10, -1, v0
-; RV32-NEXT: vand.vv v12, v10, v12
-; RV32-NEXT: vnot.v v10, v10
-; RV32-NEXT: vand.vv v8, v10, v8
-; RV32-NEXT: vor.vv v8, v12, v8
+; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v12, a0
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-NEXT: vfsub.vv v12, v8, v10
+; RV32-NEXT: vfadd.vv v12, v8, v10, v0.t
+; RV32-NEXT: vmv.v.v v8, v12
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv4f32_arith:
; RV32-V128: # %bb.0:
-; RV32-V128-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vfadd.vv v12, v8, v10
-; RV32-V128-NEXT: vfsub.vv v8, v8, v10
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-V128-NEXT: vmv.v.x v10, a0
-; RV32-V128-NEXT: vmsne.vi v0, v10, 0
-; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-V128-NEXT: vmv.v.i v10, 0
-; RV32-V128-NEXT: vmerge.vim v10, v10, -1, v0
-; RV32-V128-NEXT: vand.vv v12, v10, v12
-; RV32-V128-NEXT: vnot.v v10, v10
-; RV32-V128-NEXT: vand.vv v8, v10, v8
-; RV32-V128-NEXT: vor.vv v8, v12, v8
+; RV32-V128-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV32-V128-NEXT: vmv.v.x v12, a0
+; RV32-V128-NEXT: vmsne.vi v0, v12, 0
+; RV32-V128-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-V128-NEXT: vfsub.vv v12, v8, v10
+; RV32-V128-NEXT: vfadd.vv v12, v8, v10, v0.t
+; RV32-V128-NEXT: vmv.v.v v8, v12
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv4f32_arith:
; RV64-V256: # %bb.0:
-; RV64-V256-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vfadd.vv v12, v8, v10
-; RV64-V256-NEXT: vfsub.vv v8, v8, v10
; RV64-V256-NEXT: andi a0, a0, 1
-; RV64-V256-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV64-V256-NEXT: vmv.v.x v10, a0
-; RV64-V256-NEXT: vmsne.vi v0, v10, 0
-; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v10, 0
-; RV64-V256-NEXT: vmerge.vim v10, v10, -1, v0
-; RV64-V256-NEXT: vand.vv v12, v10, v12
-; RV64-V256-NEXT: vnot.v v10, v10
-; RV64-V256-NEXT: vand.vv v8, v10, v8
-; RV64-V256-NEXT: vor.vv v8, v12, v8
+; RV64-V256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
+; RV64-V256-NEXT: vmv.v.x v12, a0
+; RV64-V256-NEXT: vmsne.vi v0, v12, 0
+; RV64-V256-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV64-V256-NEXT: vfsub.vv v12, v8, v10
+; RV64-V256-NEXT: vfadd.vv v12, v8, v10, v0.t
+; RV64-V256-NEXT: vmv.v.v v8, v12
; RV64-V256-NEXT: ret
%sum = fadd <vscale x 4 x float> %x, %y
%diff = fsub <vscale x 4 x float> %x, %y
@@ -740,42 +538,37 @@ define <vscale x 2 x double> @ctsel_nxv2f64_basic(i1 %cond, <vscale x 2 x double
; RV64-NEXT: vmv.v.x v12, a0
; RV64-NEXT: vmsne.vi v0, v12, 0
; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.i v12, 0
-; RV64-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-NEXT: vand.vv v8, v12, v8
-; RV64-NEXT: vnot.v v12, v12
-; RV64-NEXT: vand.vv v10, v12, v10
-; RV64-NEXT: vor.vv v8, v8, v10
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-NEXT: ret
;
; RV32-LABEL: ctsel_nxv2f64_basic:
; RV32: # %bb.0:
+; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-NEXT: vxor.vv v8, v8, v10
; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-NEXT: vmv.v.x v12, a0
; RV32-NEXT: vmsne.vi v0, v12, 0
; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-NEXT: vmv.v.i v12, 0
; RV32-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-NEXT: vand.vv v8, v12, v8
-; RV32-NEXT: vnot.v v12, v12
-; RV32-NEXT: vand.vv v10, v12, v10
-; RV32-NEXT: vor.vv v8, v8, v10
+; RV32-NEXT: vand.vv v8, v8, v12
+; RV32-NEXT: vxor.vv v8, v10, v8
; RV32-NEXT: ret
;
; RV32-V128-LABEL: ctsel_nxv2f64_basic:
; RV32-V128: # %bb.0:
+; RV32-V128-NEXT: vsetvli a1, zero, e64, m2, ta, ma
+; RV32-V128-NEXT: vxor.vv v8, v8, v10
; RV32-V128-NEXT: andi a0, a0, 1
-; RV32-V128-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
+; RV32-V128-NEXT: vsetvli zero, zero, e8, mf4, ta, ma
; RV32-V128-NEXT: vmv.v.x v12, a0
; RV32-V128-NEXT: vmsne.vi v0, v12, 0
; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma
; RV32-V128-NEXT: vmv.v.i v12, 0
; RV32-V128-NEXT: vmerge.vim v12, v12, -1, v0
-; RV32-V128-NEXT: vand.vv v8, v12, v8
-; RV32-V128-NEXT: vnot.v v12, v12
-; RV32-V128-NEXT: vand.vv v10, v12, v10
-; RV32-V128-NEXT: vor.vv v8, v8, v10
+; RV32-V128-NEXT: vand.vv v8, v8, v12
+; RV32-V128-NEXT: vxor.vv v8, v10, v8
; RV32-V128-NEXT: ret
;
; RV64-V256-LABEL: ctsel_nxv2f64_basic:
@@ -785,12 +578,7 @@ define <vscale x 2 x double> @ctsel_nxv2f64_basic(i1 %cond, <vscale x 2 x double
; RV64-V256-NEXT: vmv.v.x v12, a0
; RV64-V256-NEXT: vmsne.vi v0, v12, 0
; RV64-V256-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV64-V256-NEXT: vmv.v.i v12, 0
-; RV64-V256-NEXT: vmerge.vim v12, v12, -1, v0
-; RV64-V256-NEXT: vand.vv v8, v12, v8
-; RV64-V256-NEXT: vnot.v v12, v12
-; RV64-V256-NEXT: vand.vv v10, v12, v10
-; RV64-V256-NEXT: vor.vv v8, v8, v10
+; RV64-V256-NEXT: vmerge.vvm v8, v10, v8, v0
; RV64-V256-NEXT: ret
%r = call <vscale x 2 x double> @llvm.ct.select.nxv2f64(i1 %cond, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
ret <vscale x 2 x double> %r
diff --git a/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll b/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
index 255d575ca8f9f..b0b289666b1db 100644
--- a/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
+++ b/llvm/test/CodeGen/RISCV/ctselect-side-effects.ll
@@ -40,17 +40,16 @@ define i32 @test_protected_no_branch(i1 %cond, i32 %a, i32 %b) {
; RV64-NEXT: slli a0, a0, 63
; RV64-NEXT: srai a0, a0, 63
; RV64-NEXT: and a0, a1, a0
-; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: xor a0, a2, a0
; RV64-NEXT: ret
;
; RV32-LABEL: test_protected_no_branch:
; RV32: # %bb.0:
-; RV32-NEXT: andi a0, a0, 1
-; RV32-NEXT: neg a3, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: and a1, a3, a1
-; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a2, a0
; RV32-NEXT: ret
%result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
ret i32 %result
More information about the llvm-branch-commits
mailing list