[llvm] [ConstantTime][LLVM] Add llvm.ct.select intrinsic with generic SelectionDAG lowering (PR #166702)
Julius Alexandre via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 5 21:46:21 PST 2025
https://github.com/wizardengineer created https://github.com/llvm/llvm-project/pull/166702
None
>From 0664c25ef5603c3b2d623fc8ee8fedad26224fe0 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 10:51:08 -0500
Subject: [PATCH] [ConstantTime][LLVM] Add llvm.ct.select intrinsic with
generic SelectionDAG lowering
---
llvm/include/llvm/CodeGen/ISDOpcodes.h | 4 +
llvm/include/llvm/CodeGen/SelectionDAG.h | 7 +
llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 4 +-
llvm/include/llvm/CodeGen/TargetLowering.h | 18 +-
llvm/include/llvm/IR/Intrinsics.td | 7 +
.../include/llvm/Target/TargetSelectionDAG.td | 5 +
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 112 ++-
llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 40 +-
.../SelectionDAG/LegalizeFloatTypes.cpp | 17 +-
.../SelectionDAG/LegalizeIntegerTypes.cpp | 20 +
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 6 +-
.../SelectionDAG/LegalizeTypesGeneric.cpp | 14 +
.../SelectionDAG/LegalizeVectorTypes.cpp | 13 +
.../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 1 +
.../SelectionDAG/SelectionDAGBuilder.cpp | 131 +++
.../SelectionDAG/SelectionDAGBuilder.h | 3 +
.../SelectionDAG/SelectionDAGDumper.cpp | 1 +
llvm/test/CodeGen/RISCV/ctselect-fallback.ll | 330 ++++++++
llvm/test/CodeGen/X86/ctselect.ll | 779 ++++++++++++++++++
19 files changed, 1498 insertions(+), 14 deletions(-)
create mode 100644 llvm/test/CodeGen/RISCV/ctselect-fallback.ll
create mode 100644 llvm/test/CodeGen/X86/ctselect.ll
diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index ff3dd0d4c3c51..656f6e718f029 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -783,6 +783,10 @@ enum NodeType {
/// i1 then the high bits must conform to getBooleanContents.
SELECT,
+ /// Constant-time Select, implemented with CMOV instruction. This is used to
+ /// implement constant-time select.
+ CTSELECT,
+
/// Select with a vector condition (op #0) and two vector operands (ops #1
/// and #2), returning a vector result. All vectors have the same length.
/// Much like the scalar select and setcc, each bit in the condition selects
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 1a5ffb38f2568..b5debd490d9cb 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1352,6 +1352,13 @@ class SelectionDAG {
return getNode(Opcode, DL, VT, Cond, LHS, RHS, Flags);
}
+ SDValue getCTSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS,
+ SDValue RHS, SDNodeFlags Flags = SDNodeFlags()) {
+ assert(LHS.getValueType() == VT && RHS.getValueType() == VT &&
+ "Cannot use select on differing types");
+ return getNode(ISD::CTSELECT, DL, VT, Cond, LHS, RHS, Flags);
+ }
+
/// Helper function to make it easier to build SelectCC's if you just have an
/// ISD::CondCode instead of an SDValue.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True,
diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
index 1759463ea7965..8e18eb2f7db0e 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -435,6 +435,9 @@ struct SDNodeFlags {
NonNeg | NoNaNs | NoInfs | SameSign | InBounds,
FastMathFlags = NoNaNs | NoInfs | NoSignedZeros | AllowReciprocal |
AllowContract | ApproximateFuncs | AllowReassociation,
+
+ // Flag for disabling optimization
+ NoMerge = 1 << 15,
};
/// Default constructor turns off all optimization flags.
@@ -486,7 +489,6 @@ struct SDNodeFlags {
bool hasNoFPExcept() const { return Flags & NoFPExcept; }
bool hasUnpredictable() const { return Flags & Unpredictable; }
bool hasInBounds() const { return Flags & InBounds; }
-
bool operator==(const SDNodeFlags &Other) const {
return Flags == Other.Flags;
}
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 78f63b4406eb0..8198485803d8b 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -242,11 +242,15 @@ class LLVM_ABI TargetLoweringBase {
/// Enum that describes what type of support for selects the target has.
enum SelectSupportKind {
- ScalarValSelect, // The target supports scalar selects (ex: cmov).
- ScalarCondVectorVal, // The target supports selects with a scalar condition
- // and vector values (ex: cmov).
- VectorMaskSelect // The target supports vector selects with a vector
- // mask (ex: x86 blends).
+ ScalarValSelect, // The target supports scalar selects (ex: cmov).
+ ScalarCondVectorVal, // The target supports selects with a scalar condition
+ // and vector values (ex: cmov).
+ VectorMaskSelect, // The target supports vector selects with a vector
+ // mask (ex: x86 blends).
+ CtSelect, // The target implements a custom constant-time select.
+ ScalarCondVectorValCtSelect, // The target supports selects with a scalar
+ // condition and vector values.
+ VectorMaskValCtSelect, // The target supports vector selects with a vector
};
/// Enum that specifies what an atomic load/AtomicRMWInst is expanded
@@ -476,8 +480,8 @@ class LLVM_ABI TargetLoweringBase {
MachineMemOperand::Flags
getVPIntrinsicMemOperandFlags(const VPIntrinsic &VPIntrin) const;
- virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
- return true;
+ virtual bool isSelectSupported(SelectSupportKind kind) const {
+ return kind != CtSelect;
}
/// Return true if the @llvm.get.active.lane.mask intrinsic should be expanded
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 6a079f62dd9cf..508d02f2eae24 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1825,6 +1825,13 @@ def int_coro_subfn_addr : DefaultAttrsIntrinsic<
[IntrReadMem, IntrArgMemOnly, ReadOnly<ArgIndex<0>>,
NoCapture<ArgIndex<0>>]>;
+///===-------------------------- Constant Time Intrinsics --------------------------===//
+//
+// Intrinsic to support constant time select
+def int_ct_select : DefaultAttrsIntrinsic<[llvm_any_ty],
+ [llvm_i1_ty, LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrWriteMem, IntrWillReturn, NoUndef<RetIndex>]>;
+
///===-------------------------- Other Intrinsics --------------------------===//
//
// TODO: We should introduce a new memory kind fo traps (and other side effects
diff --git a/llvm/include/llvm/Target/TargetSelectionDAG.td b/llvm/include/llvm/Target/TargetSelectionDAG.td
index 07a858fd682fc..c783a2aa9258f 100644
--- a/llvm/include/llvm/Target/TargetSelectionDAG.td
+++ b/llvm/include/llvm/Target/TargetSelectionDAG.td
@@ -214,6 +214,10 @@ def SDTSelect : SDTypeProfile<1, 3, [ // select
SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>
]>;
+def SDTCtSelect : SDTypeProfile<1, 3, [ // ctselect
+ SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>
+]>;
+
def SDTVSelect : SDTypeProfile<1, 3, [ // vselect
SDTCisVec<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<2, 3>, SDTCisSameNumEltsAs<0, 1>
]>;
@@ -717,6 +721,7 @@ def reset_fpmode : SDNode<"ISD::RESET_FPMODE", SDTNone, [SDNPHasChain]>;
def setcc : SDNode<"ISD::SETCC" , SDTSetCC>;
def select : SDNode<"ISD::SELECT" , SDTSelect>;
+def ctselect : SDNode<"ISD::CTSELECT" , SDTCtSelect>;
def vselect : SDNode<"ISD::VSELECT" , SDTVSelect>;
def selectcc : SDNode<"ISD::SELECT_CC" , SDTSelectCC>;
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 46c4bb85a7420..28fcebbb4a92a 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -484,6 +484,7 @@ namespace {
SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
SDValue visitCTPOP(SDNode *N);
SDValue visitSELECT(SDNode *N);
+ SDValue visitCTSELECT(SDNode *N);
SDValue visitVSELECT(SDNode *N);
SDValue visitVP_SELECT(SDNode *N);
SDValue visitSELECT_CC(SDNode *N);
@@ -1898,6 +1899,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
}
SDValue DAGCombiner::visit(SDNode *N) {
+
// clang-format off
switch (N->getOpcode()) {
default: break;
@@ -1968,6 +1970,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N);
case ISD::CTPOP: return visitCTPOP(N);
case ISD::SELECT: return visitSELECT(N);
+ case ISD::CTSELECT: return visitCTSELECT(N);
case ISD::VSELECT: return visitVSELECT(N);
case ISD::SELECT_CC: return visitSELECT_CC(N);
case ISD::SETCC: return visitSETCC(N);
@@ -6032,6 +6035,7 @@ static SDValue isSaturatingMinMax(SDValue N0, SDValue N1, SDValue N2,
N0CC = cast<CondCodeSDNode>(N0.getOperand(4))->get();
break;
case ISD::SELECT:
+ case ISD::CTSELECT:
case ISD::VSELECT:
if (N0.getOperand(0).getOpcode() != ISD::SETCC)
return SDValue();
@@ -12184,8 +12188,9 @@ template <class MatchContextClass>
static SDValue foldBoolSelectToLogic(SDNode *N, const SDLoc &DL,
SelectionDAG &DAG) {
assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT ||
- N->getOpcode() == ISD::VP_SELECT) &&
- "Expected a (v)(vp.)select");
+ N->getOpcode() == ISD::VP_SELECT ||
+ N->getOpcode() == ISD::CTSELECT) &&
+ "Expected a (v)(vp.)(ct) select");
SDValue Cond = N->getOperand(0);
SDValue T = N->getOperand(1), F = N->getOperand(2);
EVT VT = N->getValueType(0);
@@ -12547,6 +12552,109 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitCTSELECT(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
+ EVT VT0 = N0.getValueType();
+ SDLoc DL(N);
+ SDNodeFlags Flags = N->getFlags();
+
+ if (SDValue V = foldBoolSelectToLogic<EmptyMatchContext>(N, DL, DAG))
+ return V;
+
+ // ctselect (not Cond), N1, N2 -> ctselect Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+ SDValue SelectOp = DAG.getNode(ISD::CTSELECT, DL, VT, F, N2, N1);
+ SelectOp->setFlags(Flags);
+ return SelectOp;
+ }
+
+ if (VT0 == MVT::i1) {
+ // The code in this block deals with the following 2 equivalences:
+ // select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
+ // select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
+ // The target can specify its preferred form with the
+ // shouldNormalizeToSelectSequence() callback. However we always transform
+ // to the right anyway if we find the inner select exists in the DAG anyway
+ // and we always transform to the left side if we know that we can further
+ // optimize the combination of the conditions.
+ bool normalizeToSequence =
+ TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
+ // ctselect (and Cond0, Cond1), X, Y
+ // -> ctselect Cond0, (ctselect Cond1, X, Y), Y
+ if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(),
+ Cond1, N1, N2, Flags);
+ if (normalizeToSequence || !InnerSelect.use_empty())
+ return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Cond0,
+ InnerSelect, N2, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
+ }
+ // ctselect (or Cond0, Cond1), X, Y -> ctselect Cond0, X, (ctselect Cond1,
+ // X, Y)
+ if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
+ SDValue Cond0 = N0->getOperand(0);
+ SDValue Cond1 = N0->getOperand(1);
+ SDValue InnerSelect = DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(),
+ Cond1, N1, N2, Flags);
+ if (normalizeToSequence || !InnerSelect.use_empty())
+ return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Cond0, N1,
+ InnerSelect, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
+ }
+
+ // ctselect Cond0, (ctselect Cond1, X, Y), Y -> ctselect (and Cond0, Cond1),
+ // X, Y
+ if (N1->getOpcode() == ISD::CTSELECT && N1->hasOneUse()) {
+ SDValue N1_0 = N1->getOperand(0);
+ SDValue N1_1 = N1->getOperand(1);
+ SDValue N1_2 = N1->getOperand(2);
+ if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
+ // Create the actual and node if we can generate good code for it.
+ if (!normalizeToSequence) {
+ SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
+ return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), And, N1_1,
+ N2, Flags);
+ }
+ // Otherwise see if we can optimize the "and" to a better pattern.
+ if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
+ return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Combined,
+ N1_1, N2, Flags);
+ }
+ }
+ }
+ // ctselect Cond0, X, (ctselect Cond1, X, Y) -> ctselect (or Cond0, Cond1),
+ // X, Y
+ if (N2->getOpcode() == ISD::CTSELECT && N2->hasOneUse()) {
+ SDValue N2_0 = N2->getOperand(0);
+ SDValue N2_1 = N2->getOperand(1);
+ SDValue N2_2 = N2->getOperand(2);
+ if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
+ // Create the actual or node if we can generate good code for it.
+ if (!normalizeToSequence) {
+ SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
+ return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Or, N1, N2_2,
+ Flags);
+ }
+ // Otherwise see if we can optimize to a better pattern.
+ if (SDValue Combined = visitORLike(N0, N2_0, DL))
+ return DAG.getNode(ISD::CTSELECT, DL, N1.getValueType(), Combined, N1,
+ N2_2, Flags);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
// This function assumes all the vselect's arguments are CONCAT_VECTOR
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 431a81002074f..6b89b62b62462 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -4136,6 +4136,40 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
Results.push_back(Tmp1);
break;
+ case ISD::CTSELECT: {
+ Tmp1 = Node->getOperand(0);
+ Tmp2 = Node->getOperand(1);
+ Tmp3 = Node->getOperand(2);
+ EVT VT = Tmp2.getValueType();
+ if (VT.isVector()) {
+ SmallVector<SDValue> Elements;
+ unsigned NumElements = VT.getVectorNumElements();
+ EVT ScalarVT = VT.getScalarType();
+ for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
+ SDValue IdxVal = DAG.getConstant(Idx, dl, MVT::i64);
+ SDValue TVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Tmp2, IdxVal);
+ SDValue FVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ScalarVT, Tmp3, IdxVal);
+ Elements.push_back(DAG.getCTSelect(dl, ScalarVT, Tmp1, TVal, FVal, Node->getFlags()));
+ }
+ Tmp1 = DAG.getBuildVector(VT, dl, Elements);
+ } else if (VT.isFloatingPoint()) {
+ EVT IntegerVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ Tmp2 = DAG.getBitcast(IntegerVT, Tmp2);
+ Tmp3 = DAG.getBitcast(IntegerVT, Tmp3);
+ Tmp1 = DAG.getBitcast(VT, DAG.getCTSelect(dl, IntegerVT, Tmp1, Tmp2, Tmp3, Node->getFlags()));
+ } else {
+ assert(VT.isInteger());
+ EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext());
+ auto [Tmp2Lo, Tmp2Hi] = DAG.SplitScalar(Tmp2, dl, HalfVT, HalfVT);
+ auto [Tmp3Lo, Tmp3Hi] = DAG.SplitScalar(Tmp3, dl, HalfVT, HalfVT);
+ SDValue ResLo = DAG.getCTSelect(dl, HalfVT, Tmp1, Tmp2Lo, Tmp3Lo, Node->getFlags());
+ SDValue ResHi = DAG.getCTSelect(dl, HalfVT, Tmp1, Tmp2Hi, Tmp3Hi, Node->getFlags());
+ Tmp1 = DAG.getNode(ISD::BUILD_PAIR, dl, VT, ResLo, ResHi);
+ Tmp1->setFlags(Node->getFlags());
+ }
+ Results.push_back(Tmp1);
+ break;
+ }
case ISD::BR_JT: {
SDValue Chain = Node->getOperand(0);
SDValue Table = Node->getOperand(1);
@@ -5474,7 +5508,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, OVT, Tmp2));
break;
}
- case ISD::SELECT: {
+ case ISD::SELECT:
+ case ISD::CTSELECT: {
unsigned ExtOp, TruncOp;
if (Node->getValueType(0).isVector() ||
Node->getValueType(0).getSizeInBits() == NVT.getSizeInBits()) {
@@ -5492,7 +5527,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
// Perform the larger operation, then round down.
- Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
+ Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1, Tmp2, Tmp3);
+ Tmp1->setFlags(Node->getFlags());
if (TruncOp != ISD::FP_ROUND)
Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
else
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 58983cb57d7f6..855a15a744cfe 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -159,6 +159,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_LOAD: R = SoftenFloatRes_ATOMIC_LOAD(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::CTSELECT: R = SoftenFloatRes_CTSELECT(N); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
case ISD::FREEZE: R = SoftenFloatRes_FREEZE(N); break;
case ISD::STRICT_SINT_TO_FP:
@@ -1041,6 +1042,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
LHS.getValueType(), N->getOperand(0), LHS, RHS);
}
+SDValue DAGTypeLegalizer::SoftenFloatRes_CTSELECT(SDNode *N) {
+ SDValue LHS = GetSoftenedFloat(N->getOperand(1));
+ SDValue RHS = GetSoftenedFloat(N->getOperand(2));
+ return DAG.getCTSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS,
+ RHS);
+}
+
SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(2));
SDValue RHS = GetSoftenedFloat(N->getOperand(3));
@@ -1561,6 +1569,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::POISON:
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
+ case ISD::CTSELECT: SplitRes_Select(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::MERGE_VALUES: ExpandRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
@@ -2917,6 +2926,9 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
R = PromoteFloatRes_ATOMIC_LOAD(N);
break;
case ISD::SELECT: R = PromoteFloatRes_SELECT(N); break;
+ case ISD::CTSELECT:
+ R = PromoteFloatRes_SELECT(N);
+ break;
case ISD::SELECT_CC: R = PromoteFloatRes_SELECT_CC(N); break;
case ISD::SINT_TO_FP:
@@ -3219,7 +3231,7 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_SELECT(SDNode *N) {
SDValue TrueVal = GetPromotedFloat(N->getOperand(1));
SDValue FalseVal = GetPromotedFloat(N->getOperand(2));
- return DAG.getNode(ISD::SELECT, SDLoc(N), TrueVal->getValueType(0),
+ return DAG.getNode(N->getOpcode(), SDLoc(N), TrueVal->getValueType(0),
N->getOperand(0), TrueVal, FalseVal);
}
@@ -3403,6 +3415,9 @@ void DAGTypeLegalizer::SoftPromoteHalfResult(SDNode *N, unsigned ResNo) {
R = SoftPromoteHalfRes_ATOMIC_LOAD(N);
break;
case ISD::SELECT: R = SoftPromoteHalfRes_SELECT(N); break;
+ case ISD::CTSELECT:
+ R = SoftPromoteHalfRes_SELECT(N);
+ break;
case ISD::SELECT_CC: R = SoftPromoteHalfRes_SELECT_CC(N); break;
case ISD::STRICT_SINT_TO_FP:
case ISD::STRICT_UINT_TO_FP:
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 44e5a187c4281..0135b3195438b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -95,6 +95,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_VECTOR_COMPRESS(N);
break;
case ISD::SELECT:
+ case ISD::CTSELECT:
case ISD::VSELECT:
case ISD::VP_SELECT:
case ISD::VP_MERGE:
@@ -2013,6 +2014,9 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
+ case ISD::CTSELECT:
+ Res = PromoteIntOp_CTSELECT(N, OpNo);
+ break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
case ISD::VP_SETCC:
case ISD::SETCC: Res = PromoteIntOp_SETCC(N, OpNo); break;
@@ -2390,6 +2394,19 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
N->getOperand(2)), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_CTSELECT(SDNode *N, unsigned OpNo) {
+ assert(OpNo == 0 && "Only know how to promote the condition!");
+ SDValue Cond = N->getOperand(0);
+ EVT OpTy = N->getOperand(1).getValueType();
+
+ // Promote all the way up to the canonical SetCC type.
+ EVT OpVT = N->getOpcode() == ISD::CTSELECT ? OpTy.getScalarType() : OpTy;
+ Cond = PromoteTargetBoolean(Cond, OpVT);
+
+ return SDValue(
+ DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), N->getOperand(2)), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Don't know how to promote this operand!");
@@ -2987,6 +3004,9 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ARITH_FENCE: SplitRes_ARITH_FENCE(N, Lo, Hi); break;
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_Select(N, Lo, Hi); break;
+ case ISD::CTSELECT:
+ SplitRes_Select(N, Lo, Hi);
+ break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::POISON:
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index ede522eff6df3..62069b4fb03a3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -401,6 +401,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_ScalarOp(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
+ SDValue PromoteIntOp_CTSELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_Shift(SDNode *N);
@@ -633,6 +634,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue SoftenFloatRes_LOAD(SDNode *N);
SDValue SoftenFloatRes_ATOMIC_LOAD(SDNode *N);
SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_CTSELECT(SDNode *N);
SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
@@ -896,6 +898,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_VSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
+ SDValue ScalarizeVecRes_CTSELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
SDValue ScalarizeVecRes_SETCC(SDNode *N);
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
@@ -1224,7 +1227,8 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue &Lo, SDValue &Hi);
void SplitVecRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_ARITH_FENCE (SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitRes_Select (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitRes_CTSELECT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_FREEZE (SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 88c1af20a321e..098368ef2f6b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -570,6 +570,20 @@ void DAGTypeLegalizer::SplitRes_Select(SDNode *N, SDValue &Lo, SDValue &Hi) {
Hi = DAG.getNode(Opcode, dl, LH.getValueType(), CH, LH, RH, EVLHi);
}
+void DAGTypeLegalizer::SplitRes_CTSELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDValue LL, LH, RL, RH, CL, CH;
+ SDLoc dl(N);
+ GetSplitOp(N->getOperand(1), LL, LH);
+ GetSplitOp(N->getOperand(2), RL, RH);
+
+ SDValue Cond = N->getOperand(0);
+ CL = CH = Cond;
+ assert(!Cond.getValueType().isVector() && "Unsupported vector type");
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL);
+ Hi = DAG.getNode(N->getOpcode(), dl, LH.getValueType(), CH, LH, RH);
+}
+
void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue LL, LH, RL, RH;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index bb4a8d9967f94..ed1a199cf7576 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -77,6 +77,9 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::VSELECT: R = ScalarizeVecRes_VSELECT(N); break;
case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
+ case ISD::CTSELECT:
+ R = ScalarizeVecRes_CTSELECT(N);
+ break;
case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
case ISD::POISON:
@@ -670,6 +673,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
GetScalarizedVector(N->getOperand(2)));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_CTSELECT(SDNode *N) {
+ SDValue LHS = GetScalarizedVector(N->getOperand(1));
+ return DAG.getCTSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS,
+ GetScalarizedVector(N->getOperand(2)));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(2));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(),
@@ -1204,6 +1213,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT:
case ISD::VP_MERGE:
case ISD::VP_SELECT: SplitRes_Select(N, Lo, Hi); break;
+ case ISD::CTSELECT:
+ SplitRes_CTSELECT(N, Lo, Hi);
+ break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::POISON:
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
@@ -4869,6 +4881,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::VSELECT:
case ISD::SELECT:
+ case ISD::CTSELECT:
case ISD::VP_SELECT:
case ISD::VP_MERGE:
Res = WidenVecRes_Select(N);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 379242ec5a157..f960e9961c851 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -8249,6 +8249,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
break;
}
+
case ISD::SELECT:
case ISD::VSELECT:
if (SDValue V = simplifySelect(N1, N2, N3))
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index fa0c899dfcc27..1f7c0fe0d6059 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -6496,6 +6496,105 @@ void SelectionDAGBuilder::visitVectorExtractLastActive(const CallInst &I,
setValue(&I, Result);
}
+/// Fallback implementation for constant-time select using DAG chaining.
+/// This implementation uses data dependencies through virtual registers to
+/// prevent optimizations from breaking the constant-time property.
+/// It handles scalars, vectors (fixed and scalable), and floating-point types.
+SDValue SelectionDAGBuilder::createProtectedCtSelectFallback(
+ SelectionDAG &DAG, const SDLoc &DL, SDValue Cond, SDValue T, SDValue F,
+ EVT VT) {
+
+ SDValue WorkingT = T;
+ SDValue WorkingF = F;
+ EVT WorkingVT = VT;
+
+ SDValue Chain = DAG.getEntryNode();
+ MachineRegisterInfo &MRI = DAG.getMachineFunction().getRegInfo();
+
+ // Handle vector condition: splat scalar condition to vector
+ if (VT.isVector() && !Cond.getValueType().isVector()) {
+ ElementCount NumElems = VT.getVectorElementCount();
+ EVT CondVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElems);
+
+ if (VT.isScalableVector()) {
+ Cond = DAG.getSplatVector(CondVT, DL, Cond);
+ } else {
+ Cond = DAG.getSplatBuildVector(CondVT, DL, Cond);
+ }
+ }
+
+ // Handle floating-point types: bitcast to integer for bitwise operations
+ if (VT.isFloatingPoint()) {
+ if (VT.isVector()) {
+ // float vector -> int vector
+ EVT ElemVT = VT.getVectorElementType();
+ unsigned int ElemBitWidth = ElemVT.getScalarSizeInBits();
+ EVT IntElemVT = EVT::getIntegerVT(*DAG.getContext(), ElemBitWidth);
+
+ WorkingVT = EVT::getVectorVT(*DAG.getContext(), IntElemVT,
+ VT.getVectorElementCount());
+ } else {
+ WorkingVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ }
+
+ WorkingT = DAG.getBitcast(WorkingVT, T);
+ WorkingF = DAG.getBitcast(WorkingVT, F);
+ }
+
+ // Create mask: sign-extend condition to all bits
+ SDValue Mask = DAG.getSExtOrTrunc(Cond, DL, WorkingVT);
+
+ // Create all-ones constant for inversion
+ SDValue AllOnes;
+ if (WorkingVT.isScalableVector()) {
+ unsigned BitWidth = WorkingVT.getScalarSizeInBits();
+ APInt AllOnesVal = APInt::getAllOnes(BitWidth);
+ SDValue ScalarAllOnes =
+ DAG.getConstant(AllOnesVal, DL, WorkingVT.getScalarType());
+ AllOnes = DAG.getSplatVector(WorkingVT, DL, ScalarAllOnes);
+ } else {
+ AllOnes = DAG.getAllOnesConstant(DL, WorkingVT);
+ }
+
+ // Invert mask for false value
+ SDValue Invert = DAG.getNode(ISD::XOR, DL, WorkingVT, Mask, AllOnes);
+
+ // Compute: (T & Mask) | (F & ~Mask)
+ // This is constant-time because both branches are always computed
+ SDValue TM = DAG.getNode(ISD::AND, DL, WorkingVT, Mask, WorkingT);
+
+ // DAG chaining: create data dependency through virtual register
+ // This prevents optimizations from reordering or eliminating operations
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ bool CanUseChaining = false;
+
+ if (!WorkingVT.isScalableVector()) {
+ // For fixed-size vectors and scalars, check if type is legal
+ CanUseChaining = TLI.isTypeLegal(WorkingVT.getSimpleVT());
+ } else {
+ // For scalable vectors, disable chaining (conservative approach)
+ CanUseChaining = false;
+ }
+
+ if (CanUseChaining) {
+ // Apply chaining through registers for additional protection
+ const TargetRegisterClass *RC = TLI.getRegClassFor(WorkingVT.getSimpleVT());
+ Register TMReg = MRI.createVirtualRegister(RC);
+ Chain = DAG.getCopyToReg(Chain, DL, TMReg, TM);
+ TM = DAG.getCopyFromReg(Chain, DL, TMReg, WorkingVT);
+ }
+
+ SDValue FM = DAG.getNode(ISD::AND, DL, WorkingVT, Invert, WorkingF);
+ SDValue Result = DAG.getNode(ISD::OR, DL, WorkingVT, TM, FM);
+
+ // Convert back to original type if needed
+ if (WorkingVT != VT) {
+ Result = DAG.getBitcast(VT, Result);
+ }
+
+ return Result;
+}
+
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
@@ -6674,6 +6773,38 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
updateDAGForMaybeTailCall(MC);
return;
}
+ case Intrinsic::ct_select: {
+ // Set function attribute to indicate ct.select usage
+ Function &F = DAG.getMachineFunction().getFunction();
+ F.addFnAttr("ct-select");
+
+ SDLoc DL = getCurSDLoc();
+
+ SDValue Cond = getValue(I.getArgOperand(0)); // i1
+ SDValue A = getValue(I.getArgOperand(1)); // T
+ SDValue B = getValue(I.getArgOperand(2)); // T
+
+ assert((A.getValueType() == B.getValueType()) &&
+ "Operands are of different types");
+
+ EVT VT = A.getValueType();
+ EVT CondVT = Cond.getValueType();
+
+ // assert if Cond type is Vector
+ assert(!CondVT.isVector() && "Vector type cond not supported yet");
+
+ // Handle scalar types
+ if (TLI.isSelectSupported(
+ TargetLoweringBase::SelectSupportKind::CtSelect) &&
+ !CondVT.isVector()) {
+ SDValue Result = DAG.getNode(ISD::CTSELECT, DL, VT, Cond, A, B);
+ setValue(&I, Result);
+ return;
+ }
+
+ setValue(&I, createProtectedCtSelectFallback(DAG, DL, Cond, A, B, VT));
+ return;
+ }
case Intrinsic::call_preallocated_setup: {
const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 47e19f77a15e7..5c02bd6b8a4ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -219,6 +219,9 @@ class SelectionDAGBuilder {
peelDominantCaseCluster(const SwitchInst &SI,
SwitchCG::CaseClusterVector &Clusters,
BranchProbability &PeeledCaseProb);
+ SDValue createProtectedCtSelectFallback(SelectionDAG &DAG, const SDLoc &DL,
+ SDValue Cond, SDValue T, SDValue F,
+ EVT VT);
private:
const TargetMachine &TM;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 77377d348b836..aafbc5e4401c8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -332,6 +332,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FPOWI: return "fpowi";
case ISD::STRICT_FPOWI: return "strict_fpowi";
case ISD::SETCC: return "setcc";
+ case ISD::CTSELECT: return "ctselect";
case ISD::SETCCCARRY: return "setcccarry";
case ISD::STRICT_FSETCC: return "strict_fsetcc";
case ISD::STRICT_FSETCCS: return "strict_fsetccs";
diff --git a/llvm/test/CodeGen/RISCV/ctselect-fallback.ll b/llvm/test/CodeGen/RISCV/ctselect-fallback.ll
new file mode 100644
index 0000000000000..f46bde0a05b8b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/ctselect-fallback.ll
@@ -0,0 +1,330 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -O3 | FileCheck %s --check-prefix=RV64
+; RUN: llc < %s -mtriple=riscv32 -O3 | FileCheck %s --check-prefix=RV32
+
+; Test basic ct.select functionality for scalar types
+define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
+; RV64-LABEL: test_ctselect_i8:
+; RV64: # %bb.0:
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_i8:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: ret
+ %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
+ ret i8 %result
+}
+
+define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
+; RV64-LABEL: test_ctselect_i16:
+; RV64: # %bb.0:
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_i16:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a1, a1, a2
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a0, a1, a0
+; RV32-NEXT: xor a0, a0, a2
+; RV32-NEXT: ret
+ %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
+ ret i16 %result
+}
+
+define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_i32:
+; RV64: # %bb.0:
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_i32:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: neg a3, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a3, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
+; RV64-LABEL: test_ctselect_i64:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: neg a3, a0
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a1, a3, a1
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_i64:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a1, a1, a3
+; RV32-NEXT: slli a0, a0, 31
+; RV32-NEXT: xor a2, a2, a4
+; RV32-NEXT: srai a0, a0, 31
+; RV32-NEXT: and a1, a1, a0
+; RV32-NEXT: and a2, a2, a0
+; RV32-NEXT: xor a0, a1, a3
+; RV32-NEXT: xor a1, a2, a4
+; RV32-NEXT: ret
+ %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
+ ret i64 %result
+}
+
+define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
+; RV64-LABEL: test_ctselect_ptr:
+; RV64: # %bb.0:
+; RV64-NEXT: andi a0, a0, 1
+; RV64-NEXT: neg a3, a0
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a1, a3, a1
+; RV64-NEXT: and a0, a0, a2
+; RV64-NEXT: or a0, a1, a0
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_ptr:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: neg a3, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a3, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+ ret ptr %result
+}
+
+; Test with constant conditions
+define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_const_true:
+; RV64: # %bb.0:
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_const_true:
+; RV32: # %bb.0:
+; RV32-NEXT: ret
+ %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_const_false:
+; RV64: # %bb.0:
+; RV64-NEXT: mv a0, a1
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_const_false:
+; RV32: # %bb.0:
+; RV32-NEXT: mv a0, a1
+; RV32-NEXT: ret
+ %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test with comparison conditions
+define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_icmp_eq:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: snez a0, a0
+; RV64-NEXT: xor a2, a2, a3
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: xor a0, a0, a3
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_icmp_eq:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a2, a0, a2
+; RV32-NEXT: not a0, a0
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: ret
+ %cond = icmp eq i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_icmp_ne:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: xor a0, a0, a1
+; RV64-NEXT: seqz a0, a0
+; RV64-NEXT: xor a2, a2, a3
+; RV64-NEXT: addi a0, a0, -1
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: xor a0, a0, a3
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_icmp_ne:
+; RV32: # %bb.0:
+; RV32-NEXT: xor a0, a0, a1
+; RV32-NEXT: seqz a0, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a2, a0, a2
+; RV32-NEXT: not a0, a0
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: or a0, a2, a0
+; RV32-NEXT: ret
+ %cond = icmp ne i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_icmp_slt:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: slt a0, a0, a1
+; RV64-NEXT: xor a2, a2, a3
+; RV64-NEXT: neg a0, a0
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: xor a0, a0, a3
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_icmp_slt:
+; RV32: # %bb.0:
+; RV32-NEXT: slt a0, a0, a1
+; RV32-NEXT: neg a1, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %cond = icmp slt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
+; RV64-LABEL: test_ctselect_icmp_ult:
+; RV64: # %bb.0:
+; RV64-NEXT: sext.w a1, a1
+; RV64-NEXT: sext.w a0, a0
+; RV64-NEXT: sltu a0, a0, a1
+; RV64-NEXT: xor a2, a2, a3
+; RV64-NEXT: neg a0, a0
+; RV64-NEXT: and a0, a2, a0
+; RV64-NEXT: xor a0, a0, a3
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_icmp_ult:
+; RV32: # %bb.0:
+; RV32-NEXT: sltu a0, a0, a1
+; RV32-NEXT: neg a1, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a1, a2
+; RV32-NEXT: and a0, a0, a3
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %cond = icmp ult i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test with memory operands
+define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
+; RV64-LABEL: test_ctselect_load:
+; RV64: # %bb.0:
+; RV64-NEXT: lw a1, 0(a1)
+; RV64-NEXT: lw a2, 0(a2)
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: xor a1, a1, a2
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a0, a2
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_load:
+; RV32: # %bb.0:
+; RV32-NEXT: lw a1, 0(a1)
+; RV32-NEXT: lw a2, 0(a2)
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: neg a3, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a3, a1
+; RV32-NEXT: and a0, a0, a2
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %a = load i32, ptr %p1
+ %b = load i32, ptr %p2
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test nested ctselect calls
+define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
+; RV64-LABEL: test_ctselect_nested:
+; RV64: # %bb.0:
+; RV64-NEXT: xor a2, a2, a3
+; RV64-NEXT: slli a1, a1, 63
+; RV64-NEXT: xor a3, a3, a4
+; RV64-NEXT: slli a0, a0, 63
+; RV64-NEXT: srai a1, a1, 63
+; RV64-NEXT: and a1, a2, a1
+; RV64-NEXT: xor a1, a1, a3
+; RV64-NEXT: srai a0, a0, 63
+; RV64-NEXT: and a0, a1, a0
+; RV64-NEXT: xor a0, a0, a4
+; RV64-NEXT: ret
+;
+; RV32-LABEL: test_ctselect_nested:
+; RV32: # %bb.0:
+; RV32-NEXT: andi a1, a1, 1
+; RV32-NEXT: andi a0, a0, 1
+; RV32-NEXT: neg a5, a1
+; RV32-NEXT: addi a1, a1, -1
+; RV32-NEXT: and a2, a5, a2
+; RV32-NEXT: neg a5, a0
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: and a1, a1, a3
+; RV32-NEXT: or a1, a2, a1
+; RV32-NEXT: and a1, a5, a1
+; RV32-NEXT: and a0, a0, a4
+; RV32-NEXT: or a0, a1, a0
+; RV32-NEXT: ret
+ %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
+ %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
+ ret i32 %result
+}
+
+; Declare the intrinsics
+declare i8 @llvm.ct.select.i8(i1, i8, i8)
+declare i16 @llvm.ct.select.i16(i1, i16, i16)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
diff --git a/llvm/test/CodeGen/X86/ctselect.ll b/llvm/test/CodeGen/X86/ctselect.ll
new file mode 100644
index 0000000000000..095787a5e2a4b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ctselect.ll
@@ -0,0 +1,779 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+cmov | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=-cmov | FileCheck %s --check-prefix=X32-NOCMOV
+
+; Test basic ct.select functionality for scalar types
+
+define i8 @test_ctselect_i8(i1 %cond, i8 %a, i8 %b) {
+; X64-LABEL: test_ctselect_i8:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: andb $1, %dil
+; X64-NEXT: leal -1(%rdi), %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: negb %cl
+; X64-NEXT: andb %sil, %cl
+; X64-NEXT: andb %dl, %al
+; X64-NEXT: orb %cl, %al
+; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_i8:
+; X32: # %bb.0:
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andb $1, %al
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: negb %cl
+; X32-NEXT: andb {{[0-9]+}}(%esp), %cl
+; X32-NEXT: decb %al
+; X32-NEXT: andb {{[0-9]+}}(%esp), %al
+; X32-NEXT: orb %cl, %al
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_i8:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andb $1, %al
+; X32-NOCMOV-NEXT: movl %eax, %ecx
+; X32-NOCMOV-NEXT: negb %cl
+; X32-NOCMOV-NEXT: andb {{[0-9]+}}(%esp), %cl
+; X32-NOCMOV-NEXT: decb %al
+; X32-NOCMOV-NEXT: andb {{[0-9]+}}(%esp), %al
+; X32-NOCMOV-NEXT: orb %cl, %al
+; X32-NOCMOV-NEXT: retl
+ %result = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
+ ret i8 %result
+}
+
+define i16 @test_ctselect_i16(i1 %cond, i16 %a, i16 %b) {
+; X64-LABEL: test_ctselect_i16:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: leal -1(%rdi), %ecx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: andl %esi, %eax
+; X64-NEXT: andl %edx, %ecx
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_i16:
+; X32: # %bb.0:
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andl $1, %eax
+; X32-NEXT: leal -1(%eax), %ecx
+; X32-NEXT: andw {{[0-9]+}}(%esp), %cx
+; X32-NEXT: negl %eax
+; X32-NEXT: andw {{[0-9]+}}(%esp), %ax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: # kill: def $ax killed $ax killed $eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_i16:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: leal -1(%eax), %ecx
+; X32-NOCMOV-NEXT: andw {{[0-9]+}}(%esp), %cx
+; X32-NOCMOV-NEXT: negl %eax
+; X32-NOCMOV-NEXT: andw {{[0-9]+}}(%esp), %ax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: # kill: def $ax killed $ax killed $eax
+; X32-NOCMOV-NEXT: retl
+ %result = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
+ ret i16 %result
+}
+
+define i32 @test_ctselect_i32(i1 %cond, i32 %a, i32 %b) {
+; X64-LABEL: test_ctselect_i32:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: leal -1(%rdi), %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: andl %esi, %ecx
+; X64-NEXT: andl %edx, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_i32:
+; X32: # %bb.0:
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andl $1, %eax
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: negl %ecx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: decl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_i32:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movl %eax, %ecx
+; X32-NOCMOV-NEXT: negl %ecx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: retl
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i64 @test_ctselect_i64(i1 %cond, i64 %a, i64 %b) {
+; X64-LABEL: test_ctselect_i64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: leaq -1(%rdi), %rax
+; X64-NEXT: negq %rdi
+; X64-NEXT: andq %rsi, %rdi
+; X64-NEXT: andq %rdx, %rax
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_i64:
+; X32: # %bb.0:
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %esi, -8
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: xorl %edx, %eax
+; X32-NEXT: andl $1, %esi
+; X32-NEXT: negl %esi
+; X32-NEXT: andl %esi, %eax
+; X32-NEXT: xorl %edx, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: xorl %ecx, %edx
+; X32-NEXT: andl %esi, %edx
+; X32-NEXT: xorl %ecx, %edx
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_i64:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: pushl %esi
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: .cfi_offset %esi, -8
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %esi
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: xorl %edx, %eax
+; X32-NOCMOV-NEXT: andl $1, %esi
+; X32-NOCMOV-NEXT: negl %esi
+; X32-NOCMOV-NEXT: andl %esi, %eax
+; X32-NOCMOV-NEXT: xorl %edx, %eax
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: xorl %ecx, %edx
+; X32-NOCMOV-NEXT: andl %esi, %edx
+; X32-NOCMOV-NEXT: xorl %ecx, %edx
+; X32-NOCMOV-NEXT: popl %esi
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4
+; X32-NOCMOV-NEXT: retl
+ %result = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
+ ret i64 %result
+}
+
+define float @test_ctselect_f32(i1 %cond, float %a, float %b) {
+; X64-LABEL: test_ctselect_f32:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm1, %eax
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: movl %edi, %edx
+; X64-NEXT: negl %edx
+; X64-NEXT: andl %ecx, %edx
+; X64-NEXT: decl %edi
+; X64-NEXT: andl %eax, %edi
+; X64-NEXT: orl %edx, %edi
+; X64-NEXT: movd %edi, %xmm0
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_f32:
+; X32: # %bb.0:
+; X32-NEXT: pushl %eax
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andl $1, %eax
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: negl %ecx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: decl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: flds (%esp)
+; X32-NEXT: popl %eax
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_f32:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: pushl %eax
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movl %eax, %ecx
+; X32-NOCMOV-NEXT: negl %ecx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: movl %eax, (%esp)
+; X32-NOCMOV-NEXT: flds (%esp)
+; X32-NOCMOV-NEXT: popl %eax
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4
+; X32-NOCMOV-NEXT: retl
+ %result = call float @llvm.ct.select.f32(i1 %cond, float %a, float %b)
+ ret float %result
+}
+
+define double @test_ctselect_f64(i1 %cond, double %a, double %b) {
+; X64-LABEL: test_ctselect_f64:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: movq %xmm1, %rax
+; X64-NEXT: movq %xmm0, %rcx
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: movq %rdi, %rdx
+; X64-NEXT: negq %rdx
+; X64-NEXT: andq %rcx, %rdx
+; X64-NEXT: decq %rdi
+; X64-NEXT: andq %rax, %rdi
+; X64-NEXT: orq %rdx, %rdi
+; X64-NEXT: movq %rdi, %xmm0
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_f64:
+; X32: # %bb.0:
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 16
+; X32-NEXT: .cfi_offset %esi, -8
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: xorl %edx, %esi
+; X32-NEXT: andl $1, %ecx
+; X32-NEXT: negl %ecx
+; X32-NEXT: andl %ecx, %esi
+; X32-NEXT: xorl %edx, %esi
+; X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: xorl %eax, %edx
+; X32-NEXT: andl %ecx, %edx
+; X32-NEXT: xorl %eax, %edx
+; X32-NEXT: movl %edx, (%esp)
+; X32-NEXT: fldl (%esp)
+; X32-NEXT: addl $8, %esp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_f64:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: pushl %esi
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: subl $8, %esp
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 16
+; X32-NOCMOV-NEXT: .cfi_offset %esi, -8
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NOCMOV-NEXT: xorl %edx, %esi
+; X32-NOCMOV-NEXT: andl $1, %ecx
+; X32-NOCMOV-NEXT: negl %ecx
+; X32-NOCMOV-NEXT: andl %ecx, %esi
+; X32-NOCMOV-NEXT: xorl %edx, %esi
+; X32-NOCMOV-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: xorl %eax, %edx
+; X32-NOCMOV-NEXT: andl %ecx, %edx
+; X32-NOCMOV-NEXT: xorl %eax, %edx
+; X32-NOCMOV-NEXT: movl %edx, (%esp)
+; X32-NOCMOV-NEXT: fldl (%esp)
+; X32-NOCMOV-NEXT: addl $8, %esp
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: popl %esi
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4
+; X32-NOCMOV-NEXT: retl
+ %result = call double @llvm.ct.select.f64(i1 %cond, double %a, double %b)
+ ret double %result
+}
+
+define ptr @test_ctselect_ptr(i1 %cond, ptr %a, ptr %b) {
+; X64-LABEL: test_ctselect_ptr:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: leaq -1(%rdi), %rax
+; X64-NEXT: negq %rdi
+; X64-NEXT: andq %rsi, %rdi
+; X64-NEXT: andq %rdx, %rax
+; X64-NEXT: orq %rdi, %rax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_ptr:
+; X32: # %bb.0:
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andl $1, %eax
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: negl %ecx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: decl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_ptr:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movl %eax, %ecx
+; X32-NOCMOV-NEXT: negl %ecx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: retl
+ %result = call ptr @llvm.ct.select.p0(i1 %cond, ptr %a, ptr %b)
+ ret ptr %result
+}
+
+; Test with constant conditions
+define i32 @test_ctselect_const_true(i32 %a, i32 %b) {
+; X64-LABEL: test_ctselect_const_true:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_const_true:
+; X32: # %bb.0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_const_true:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: retl
+ %result = call i32 @llvm.ct.select.i32(i1 true, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_const_false(i32 %a, i32 %b) {
+; X64-LABEL: test_ctselect_const_false:
+; X64: # %bb.0:
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_const_false:
+; X32: # %bb.0:
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_const_false:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: xorl %eax, %eax
+; X32-NOCMOV-NEXT: orl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: retl
+ %result = call i32 @llvm.ct.select.i32(i1 false, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test with comparison conditions
+define i32 @test_ctselect_icmp_eq(i32 %x, i32 %y, i32 %a, i32 %b) {
+; X64-LABEL: test_ctselect_icmp_eq:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: movl %eax, %esi
+; X64-NEXT: negl %esi
+; X64-NEXT: andl %edx, %esi
+; X64-NEXT: decl %eax
+; X64-NEXT: andl %ecx, %eax
+; X64-NEXT: orl %esi, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_icmp_eq:
+; X32: # %bb.0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: sete %al
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: negl %ecx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: decl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_icmp_eq:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: xorl %eax, %eax
+; X32-NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: sete %al
+; X32-NOCMOV-NEXT: movl %eax, %ecx
+; X32-NOCMOV-NEXT: negl %ecx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: retl
+ %cond = icmp eq i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ne(i32 %x, i32 %y, i32 %a, i32 %b) {
+; X64-LABEL: test_ctselect_icmp_ne:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setne %al
+; X64-NEXT: movl %eax, %esi
+; X64-NEXT: negl %esi
+; X64-NEXT: andl %edx, %esi
+; X64-NEXT: decl %eax
+; X64-NEXT: andl %ecx, %eax
+; X64-NEXT: orl %esi, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_icmp_ne:
+; X32: # %bb.0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: setne %al
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: negl %ecx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: decl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_icmp_ne:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: xorl %eax, %eax
+; X32-NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: setne %al
+; X32-NOCMOV-NEXT: movl %eax, %ecx
+; X32-NOCMOV-NEXT: negl %ecx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: retl
+ %cond = icmp ne i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_slt(i32 %x, i32 %y, i32 %a, i32 %b) {
+; X64-LABEL: test_ctselect_icmp_slt:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: setl %al
+; X64-NEXT: movl %eax, %esi
+; X64-NEXT: negl %esi
+; X64-NEXT: andl %edx, %esi
+; X64-NEXT: decl %eax
+; X64-NEXT: andl %ecx, %eax
+; X64-NEXT: orl %esi, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_icmp_slt:
+; X32: # %bb.0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: setl %al
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: negl %ecx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: decl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_icmp_slt:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: xorl %eax, %eax
+; X32-NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: setl %al
+; X32-NOCMOV-NEXT: movl %eax, %ecx
+; X32-NOCMOV-NEXT: negl %ecx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: retl
+ %cond = icmp slt i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define i32 @test_ctselect_icmp_ult(i32 %x, i32 %y, i32 %a, i32 %b) {
+; X64-LABEL: test_ctselect_icmp_ult:
+; X64: # %bb.0:
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl %esi, %edi
+; X64-NEXT: sbbl %eax, %eax
+; X64-NEXT: andl %eax, %edx
+; X64-NEXT: notl %eax
+; X64-NEXT: andl %ecx, %eax
+; X64-NEXT: orl %edx, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_icmp_ult:
+; X32: # %bb.0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: sbbl %eax, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: andl %eax, %ecx
+; X32-NEXT: notl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_icmp_ult:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: xorl %eax, %eax
+; X32-NOCMOV-NEXT: cmpl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: sbbl %eax, %eax
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: andl %eax, %ecx
+; X32-NOCMOV-NEXT: notl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: retl
+ %cond = icmp ult i32 %x, %y
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+define float @test_ctselect_fcmp_oeq(float %x, float %y, float %a, float %b) {
+; X64-LABEL: test_ctselect_fcmp_oeq:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm3, %eax
+; X64-NEXT: cmpeqss %xmm1, %xmm0
+; X64-NEXT: movd %xmm0, %ecx
+; X64-NEXT: pand %xmm2, %xmm0
+; X64-NEXT: movd %xmm0, %edx
+; X64-NEXT: notl %ecx
+; X64-NEXT: andl %eax, %ecx
+; X64-NEXT: orl %edx, %ecx
+; X64-NEXT: movd %ecx, %xmm0
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_fcmp_oeq:
+; X32: # %bb.0:
+; X32-NEXT: pushl %eax
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NEXT: fucompi %st(1), %st
+; X32-NEXT: fstp %st(0)
+; X32-NEXT: setnp %al
+; X32-NEXT: sete %cl
+; X32-NEXT: andb %al, %cl
+; X32-NEXT: movzbl %cl, %eax
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: negl %ecx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: decl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %ecx, %eax
+; X32-NEXT: movl %eax, (%esp)
+; X32-NEXT: flds (%esp)
+; X32-NEXT: popl %eax
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_fcmp_oeq:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: pushl %eax
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: flds {{[0-9]+}}(%esp)
+; X32-NOCMOV-NEXT: fucompp
+; X32-NOCMOV-NEXT: fnstsw %ax
+; X32-NOCMOV-NEXT: # kill: def $ah killed $ah killed $ax
+; X32-NOCMOV-NEXT: sahf
+; X32-NOCMOV-NEXT: setnp %al
+; X32-NOCMOV-NEXT: sete %cl
+; X32-NOCMOV-NEXT: andb %al, %cl
+; X32-NOCMOV-NEXT: movzbl %cl, %eax
+; X32-NOCMOV-NEXT: movl %eax, %ecx
+; X32-NOCMOV-NEXT: negl %ecx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %ecx, %eax
+; X32-NOCMOV-NEXT: movl %eax, (%esp)
+; X32-NOCMOV-NEXT: flds (%esp)
+; X32-NOCMOV-NEXT: popl %eax
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4
+; X32-NOCMOV-NEXT: retl
+ %cond = fcmp oeq float %x, %y
+ %result = call float @llvm.ct.select.f32(i1 %cond, float %a, float %b)
+ ret float %result
+}
+
+; Test with memory operands
+define i32 @test_ctselect_load(i1 %cond, ptr %p1, ptr %p2) {
+; X64-LABEL: test_ctselect_load:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: leal -1(%rdi), %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: andl (%rsi), %ecx
+; X64-NEXT: andl (%rdx), %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_load:
+; X32: # %bb.0:
+; X32-NEXT: pushl %esi
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %esi, -8
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: andl $1, %eax
+; X32-NEXT: movl %eax, %esi
+; X32-NEXT: negl %esi
+; X32-NEXT: andl (%edx), %esi
+; X32-NEXT: decl %eax
+; X32-NEXT: andl (%ecx), %eax
+; X32-NEXT: orl %esi, %eax
+; X32-NEXT: popl %esi
+; X32-NEXT: .cfi_def_cfa_offset 4
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_load:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: pushl %esi
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 8
+; X32-NOCMOV-NEXT: .cfi_offset %esi, -8
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movl %eax, %esi
+; X32-NOCMOV-NEXT: negl %esi
+; X32-NOCMOV-NEXT: andl (%edx), %esi
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl (%ecx), %eax
+; X32-NOCMOV-NEXT: orl %esi, %eax
+; X32-NOCMOV-NEXT: popl %esi
+; X32-NOCMOV-NEXT: .cfi_def_cfa_offset 4
+; X32-NOCMOV-NEXT: retl
+ %a = load i32, ptr %p1
+ %b = load i32, ptr %p2
+ %result = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %result
+}
+
+; Test nested ctselect calls
+define i32 @test_ctselect_nested(i1 %cond1, i1 %cond2, i32 %a, i32 %b, i32 %c) {
+; X64-LABEL: test_ctselect_nested:
+; X64: # %bb.0:
+; X64-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: andl $1, %esi
+; X64-NEXT: leal -1(%rsi), %r9d
+; X64-NEXT: movl %esi, %eax
+; X64-NEXT: negl %eax
+; X64-NEXT: andl %edx, %eax
+; X64-NEXT: andl %ecx, %r9d
+; X64-NEXT: orl %eax, %r9d
+; X64-NEXT: andl $1, %edi
+; X64-NEXT: leal -1(%rdi), %eax
+; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: negl %ecx
+; X64-NEXT: andl %r9d, %ecx
+; X64-NEXT: andl %r8d, %eax
+; X64-NEXT: orl %ecx, %eax
+; X64-NEXT: retq
+;
+; X32-LABEL: test_ctselect_nested:
+; X32: # %bb.0:
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: andl $1, %ecx
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: negl %edx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: decl %ecx
+; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: orl %edx, %ecx
+; X32-NEXT: andl $1, %eax
+; X32-NEXT: movl %eax, %edx
+; X32-NEXT: negl %edx
+; X32-NEXT: andl %ecx, %edx
+; X32-NEXT: decl %eax
+; X32-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: orl %edx, %eax
+; X32-NEXT: retl
+;
+; X32-NOCMOV-LABEL: test_ctselect_nested:
+; X32-NOCMOV: # %bb.0:
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: andl $1, %ecx
+; X32-NOCMOV-NEXT: movl %ecx, %edx
+; X32-NOCMOV-NEXT: negl %edx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %edx
+; X32-NOCMOV-NEXT: decl %ecx
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %ecx
+; X32-NOCMOV-NEXT: orl %edx, %ecx
+; X32-NOCMOV-NEXT: andl $1, %eax
+; X32-NOCMOV-NEXT: movl %eax, %edx
+; X32-NOCMOV-NEXT: negl %edx
+; X32-NOCMOV-NEXT: andl %ecx, %edx
+; X32-NOCMOV-NEXT: decl %eax
+; X32-NOCMOV-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X32-NOCMOV-NEXT: orl %edx, %eax
+; X32-NOCMOV-NEXT: retl
+ %inner = call i32 @llvm.ct.select.i32(i1 %cond2, i32 %a, i32 %b)
+ %result = call i32 @llvm.ct.select.i32(i1 %cond1, i32 %inner, i32 %c)
+ ret i32 %result
+}
+
+; Declare the intrinsics
+declare i8 @llvm.ct.select.i8(i1, i8, i8)
+declare i16 @llvm.ct.select.i16(i1, i16, i16)
+declare i32 @llvm.ct.select.i32(i1, i32, i32)
+declare i64 @llvm.ct.select.i64(i1, i64, i64)
+declare float @llvm.ct.select.f32(i1, float, float)
+declare double @llvm.ct.select.f64(i1, double, double)
+declare ptr @llvm.ct.select.p0(i1, ptr, ptr)
More information about the llvm-commits
mailing list