[llvm-branch-commits] [llvm] [ConstantTime] Native ct.select support for ARM32 and Thumb (PR #166707)
Julius Alexandre via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Thu Nov 6 09:28:57 PST 2025
https://github.com/wizardengineer updated https://github.com/llvm/llvm-project/pull/166707
>From 8d58556a5ae181cd09088848696e8566e99cab34 Mon Sep 17 00:00:00 2001
From: wizardengineer <juliuswoosebert at gmail.com>
Date: Wed, 5 Nov 2025 17:10:05 -0500
Subject: [PATCH] [LLVM][ARM] Add native ct.select support for ARM32 and Thumb
This patch implements architecture-specific lowering for ct.select on ARM
(both ARM32 and Thumb modes) using conditional move instructions and
bitwise operations for constant-time selection.
Implementation details:
- Uses pseudo-instructions that are expanded Post-RA to bitwise operations
- Post-RA expansion in ARMBaseInstrInfo for BUNDLE pseudo-instructions
- Handles scalar integer types, floating-point, and half-precision types
- Handles vector types with NEON when available
- Support for both ARM and Thumb instruction sets (Thumb1 and Thumb2)
- Special handling for Thumb1 which lacks conditional execution
- Comprehensive test coverage including half-precision and vectors
The implementation includes:
- ISelLowering: Custom lowering to CTSELECT pseudo-instructions
- ISelDAGToDAG: Selection of appropriate pseudo-instructions
- BaseInstrInfo: Post-RA expansion of BUNDLE to bitwise instruction sequences
- InstrInfo.td: Pseudo-instruction definitions for different types
- TargetMachine: Registration of Post-RA expansion pass
- Proper handling of condition codes and register allocation constraints
---
llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 337 +++-
llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 6 +
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 86 +
llvm/lib/Target/ARM/ARMISelLowering.cpp | 184 +-
llvm/lib/Target/ARM/ARMISelLowering.h | 13 +-
llvm/lib/Target/ARM/ARMInstrInfo.td | 185 ++
llvm/lib/Target/ARM/ARMTargetMachine.cpp | 8 +-
llvm/test/CodeGen/ARM/ctselect-half.ll | 975 ++++++++++
llvm/test/CodeGen/ARM/ctselect-vector.ll | 2179 ++++++++++++++++++++++
llvm/test/CodeGen/ARM/ctselect.ll | 555 ++++++
10 files changed, 4499 insertions(+), 29 deletions(-)
create mode 100644 llvm/test/CodeGen/ARM/ctselect-half.ll
create mode 100644 llvm/test/CodeGen/ARM/ctselect-vector.ll
create mode 100644 llvm/test/CodeGen/ARM/ctselect.ll
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 22769dbf38719..6d8a3b72244fe 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1526,18 +1526,351 @@ void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {
BB->erase(MI);
}
+// Expands the ctselect pseudo for vector operands, post-RA.
+bool ARMBaseInstrInfo::expandCtSelectVector(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register MaskReg = MI.getOperand(1).getReg();
+
+ // These operations will differ by operand register size.
+ unsigned AndOp = ARM::VANDd;
+ unsigned BicOp = ARM::VBICd;
+ unsigned OrrOp = ARM::VORRd;
+ unsigned BroadcastOp = ARM::VDUP32d;
+
+ const TargetRegisterInfo *TRI = &getRegisterInfo();
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(DestReg);
+
+ if (ARM::QPRRegClass.hasSubClassEq(RC)) {
+ AndOp = ARM::VANDq;
+ BicOp = ARM::VBICq;
+ OrrOp = ARM::VORRq;
+ BroadcastOp = ARM::VDUP32q;
+ }
+
+ unsigned RsbOp = Subtarget.isThumb2() ? ARM::t2RSBri : ARM::RSBri;
+
+ // Any vector pseudo has: ((outs $dst, $tmp_mask, $bcast_mask), (ins $src1,
+ // $src2, $cond))
+ Register VectorMaskReg = MI.getOperand(2).getReg();
+ Register Src1Reg = MI.getOperand(3).getReg();
+ Register Src2Reg = MI.getOperand(4).getReg();
+ Register CondReg = MI.getOperand(5).getReg();
+
+ // The following sequence of steps yields: (src1 & mask) | (src2 & ~mask)
+
+ // 1. mask = 0 - cond
+ // When cond = 0: mask = 0x00000000.
+ // When cond = 1: mask = 0xFFFFFFFF.
+
+ MachineInstr *FirstNewMI = BuildMI(*MBB, MI, DL, get(RsbOp), MaskReg)
+ .addReg(CondReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp())
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // 2. A = src1 & mask
+ // For vectors, broadcast the scalar mask so it matches operand size.
+ BuildMI(*MBB, MI, DL, get(BroadcastOp), VectorMaskReg)
+ .addReg(MaskReg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ BuildMI(*MBB, MI, DL, get(AndOp), DestReg)
+ .addReg(Src1Reg)
+ .addReg(VectorMaskReg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // 3. B = src2 & ~mask
+ BuildMI(*MBB, MI, DL, get(BicOp), VectorMaskReg)
+ .addReg(Src2Reg)
+ .addReg(VectorMaskReg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // 4. result = A | B
+ auto LastNewMI = BuildMI(*MBB, MI, DL, get(OrrOp), DestReg)
+ .addReg(DestReg)
+ .addReg(VectorMaskReg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ auto BundleStart = FirstNewMI->getIterator();
+ auto BundleEnd = LastNewMI->getIterator();
+
+ // Add instruction bundling
+ finalizeBundle(*MBB, BundleStart, std::next(BundleEnd));
+
+ MI.eraseFromParent();
+ return true;
+}
+
+// Expands the ctselect pseudo for thumb1, post-RA.
+bool ARMBaseInstrInfo::expandCtSelectThumb(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ // pseudos in thumb1 mode have: (outs $dst, $tmp_mask), (ins $src1, $src2,
+ // $cond)) register class here is always tGPR.
+ Register DestReg = MI.getOperand(0).getReg();
+ Register MaskReg = MI.getOperand(1).getReg();
+ Register Src1Reg = MI.getOperand(2).getReg();
+ Register Src2Reg = MI.getOperand(3).getReg();
+ Register CondReg = MI.getOperand(4).getReg();
+
+ // Access register info
+ MachineFunction *MF = MBB->getParent();
+ const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+
+ unsigned RegSize = TRI->getRegSizeInBits(MaskReg, MRI);
+ unsigned ShiftAmount = RegSize - 1;
+
+ // Option 1: Shift-based mask (preferred - no flag modification)
+ MachineInstr *FirstNewMI = BuildMI(*MBB, MI, DL, get(ARM::tMOVr), MaskReg)
+ .addReg(CondReg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // Instead of using RSB, we can use LSL and ASR to get the mask. This is to
+ // avoid the flag modification caused by RSB. tLSLri: (outs tGPR:$Rd,
+ // s_cc_out:$s), (ins tGPR:$Rm, imm0_31:$imm5, pred:$p)
+ BuildMI(*MBB, MI, DL, get(ARM::tLSLri), MaskReg)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+ .addReg(MaskReg) // $Rm
+ .addImm(ShiftAmount) // imm0_31:$imm5
+ .add(predOps(ARMCC::AL)) // pred:$p
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // tASRri: (outs tGPR:$Rd, s_cc_out:$s), (ins tGPR:$Rm, imm_sr:$imm5, pred:$p)
+ BuildMI(*MBB, MI, DL, get(ARM::tASRri), MaskReg)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+ .addReg(MaskReg) // $Rm
+ .addImm(ShiftAmount) // imm_sr:$imm5
+ .add(predOps(ARMCC::AL)) // pred:$p
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // 2. xor_diff = src1 ^ src2
+ BuildMI(*MBB, MI, DL, get(ARM::tMOVr), DestReg)
+ .addReg(Src1Reg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // tEOR has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn,
+ // pred:$p) with constraint "$Rn = $Rdn"
+ BuildMI(*MBB, MI, DL, get(ARM::tEOR), DestReg)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+ .addReg(DestReg) // tied input $Rn
+ .addReg(Src2Reg) // $Rm
+ .add(predOps(ARMCC::AL)) // pred:$p
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // 3. masked_xor = xor_diff & mask
+ // tAND has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn,
+ // pred:$p) with constraint "$Rn = $Rdn"
+ BuildMI(*MBB, MI, DL, get(ARM::tAND), DestReg)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+ .addReg(DestReg) // tied input $Rn
+ .addReg(MaskReg, RegState::Kill) // $Rm
+ .add(predOps(ARMCC::AL)) // pred:$p
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // 4. result = src2 ^ masked_xor
+ // tEOR has tied operands: (outs tGPR:$Rdn, s_cc_out:$s), (ins tGPR:$Rn,
+ // pred:$p) with constraint "$Rn = $Rdn"
+ auto LastMI =
+ BuildMI(*MBB, MI, DL, get(ARM::tEOR), DestReg)
+ .addReg(ARM::CPSR, RegState::Define | RegState::Dead) // s_cc_out:$s
+ .addReg(DestReg) // tied input $Rn
+ .addReg(Src2Reg) // $Rm
+ .add(predOps(ARMCC::AL)) // pred:$p
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // Add instruction bundling
+ auto BundleStart = FirstNewMI->getIterator();
+ finalizeBundle(*MBB, BundleStart, std::next(LastMI->getIterator()));
+
+ MI.eraseFromParent();
+ return true;
+}
+
+// Expands the ctselect pseudo, post-RA.
+bool ARMBaseInstrInfo::expandCtSelect(MachineInstr &MI) const {
+ MachineBasicBlock *MBB = MI.getParent();
+ DebugLoc DL = MI.getDebugLoc();
+
+ Register DestReg = MI.getOperand(0).getReg();
+ Register MaskReg = MI.getOperand(1).getReg();
+ Register DestRegSavedRef = DestReg;
+ Register Src1Reg, Src2Reg, CondReg;
+
+ // These operations will differ by operand register size.
+ unsigned RsbOp = ARM::RSBri;
+ unsigned AndOp = ARM::ANDrr;
+ unsigned BicOp = ARM::BICrr;
+ unsigned OrrOp = ARM::ORRrr;
+
+ if (Subtarget.isThumb2()) {
+ RsbOp = ARM::t2RSBri;
+ AndOp = ARM::t2ANDrr;
+ BicOp = ARM::t2BICrr;
+ OrrOp = ARM::t2ORRrr;
+ }
+
+ unsigned Opcode = MI.getOpcode();
+ bool IsFloat = Opcode == ARM::CTSELECTf32 || Opcode == ARM::CTSELECTf16 ||
+ Opcode == ARM::CTSELECTbf16;
+ MachineInstr *FirstNewMI = nullptr;
+ if (IsFloat) {
+ // Each float pseudo has: (outs $dst, $tmp_mask, $scratch1, $scratch2), (ins
+ // $src1, $src2, $cond)) We use two scratch registers in tablegen for
+ // bitwise ops on float types,.
+ Register GPRScratch1 = MI.getOperand(2).getReg();
+ Register GPRScratch2 = MI.getOperand(3).getReg();
+
+ // choice a from __builtin_ct_select(cond, a, b)
+ Src1Reg = MI.getOperand(4).getReg();
+ // choice b from __builtin_ct_select(cond, a, b)
+ Src2Reg = MI.getOperand(5).getReg();
+ // cond from __builtin_ct_select(cond, a, b)
+ CondReg = MI.getOperand(6).getReg();
+
+ // Move fp src1 to GPR scratch1 so we can do our bitwise ops
+ FirstNewMI = BuildMI(*MBB, MI, DL, get(ARM::VMOVRS), GPRScratch1)
+ .addReg(Src1Reg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // Move src2 to scratch2
+ BuildMI(*MBB, MI, DL, get(ARM::VMOVRS), GPRScratch2)
+ .addReg(Src2Reg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ Src1Reg = GPRScratch1;
+ Src2Reg = GPRScratch2;
+ // Reuse GPRScratch1 for dest after we are done working with src1.
+ DestReg = GPRScratch1;
+ } else {
+ // Any non-float, non-vector pseudo has: (outs $dst, $tmp_mask), (ins $src1,
+ // $src2, $cond))
+ Src1Reg = MI.getOperand(2).getReg();
+ Src2Reg = MI.getOperand(3).getReg();
+ CondReg = MI.getOperand(4).getReg();
+ }
+
+ // The following sequence of steps yields: (src1 & mask) | (src2 & ~mask)
+
+ // 1. mask = 0 - cond
+ // When cond = 0: mask = 0x00000000.
+ // When cond = 1: mask = 0xFFFFFFFF.
+ auto TmpNewMI = BuildMI(*MBB, MI, DL, get(RsbOp), MaskReg)
+ .addReg(CondReg)
+ .addImm(0)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp())
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // We use the first instruction in the bundle as the first instruction.
+ if (!FirstNewMI)
+ FirstNewMI = TmpNewMI;
+
+ // 2. A = src1 & mask
+ BuildMI(*MBB, MI, DL, get(AndOp), DestReg)
+ .addReg(Src1Reg)
+ .addReg(MaskReg)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp())
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // 3. B = src2 & ~mask
+ BuildMI(*MBB, MI, DL, get(BicOp), MaskReg)
+ .addReg(Src2Reg)
+ .addReg(MaskReg)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp())
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ // 4. result = A | B
+ auto LastNewMI = BuildMI(*MBB, MI, DL, get(OrrOp), DestReg)
+ .addReg(DestReg)
+ .addReg(MaskReg)
+ .add(predOps(ARMCC::AL))
+ .add(condCodeOp())
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+
+ if (IsFloat) {
+ // Return our result from GPR to the correct register type.
+ LastNewMI = BuildMI(*MBB, MI, DL, get(ARM::VMOVSR), DestRegSavedRef)
+ .addReg(DestReg)
+ .add(predOps(ARMCC::AL))
+ .setMIFlag(MachineInstr::MIFlag::NoMerge);
+ }
+
+ auto BundleStart = FirstNewMI->getIterator();
+ auto BundleEnd = LastNewMI->getIterator();
+
+ // Add instruction bundling
+ finalizeBundle(*MBB, BundleStart, std::next(BundleEnd));
+
+ MI.eraseFromParent();
+ return true;
+}
+
bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
- if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {
+ auto opcode = MI.getOpcode();
+
+ if (opcode == TargetOpcode::LOAD_STACK_GUARD) {
expandLoadStackGuard(MI);
MI.getParent()->erase(MI);
return true;
}
- if (MI.getOpcode() == ARM::MEMCPY) {
+ if (opcode == ARM::MEMCPY) {
expandMEMCPY(MI);
return true;
}
+ if (opcode == ARM::CTSELECTf64) {
+ if (Subtarget.isThumb1Only()) {
+ LLVM_DEBUG(dbgs() << "Opcode (thumb1 subtarget) " << opcode
+ << "replaced by: " << MI);
+ return expandCtSelectThumb(MI);
+ } else {
+ LLVM_DEBUG(dbgs() << "Opcode (vector) " << opcode
+ << "replaced by: " << MI);
+ return expandCtSelectVector(MI);
+ }
+ }
+
+ if (opcode == ARM::CTSELECTv8i8 || opcode == ARM::CTSELECTv4i16 ||
+ opcode == ARM::CTSELECTv2i32 || opcode == ARM::CTSELECTv1i64 ||
+ opcode == ARM::CTSELECTv2f32 || opcode == ARM::CTSELECTv4f16 ||
+ opcode == ARM::CTSELECTv4bf16 || opcode == ARM::CTSELECTv16i8 ||
+ opcode == ARM::CTSELECTv8i16 || opcode == ARM::CTSELECTv4i32 ||
+ opcode == ARM::CTSELECTv2i64 || opcode == ARM::CTSELECTv4f32 ||
+ opcode == ARM::CTSELECTv2f64 || opcode == ARM::CTSELECTv8f16 ||
+ opcode == ARM::CTSELECTv8bf16) {
+ LLVM_DEBUG(dbgs() << "Opcode (vector) " << opcode << "replaced by: " << MI);
+ return expandCtSelectVector(MI);
+ }
+
+ if (opcode == ARM::CTSELECTint || opcode == ARM::CTSELECTf16 ||
+ opcode == ARM::CTSELECTbf16 || opcode == ARM::CTSELECTf32) {
+ if (Subtarget.isThumb1Only()) {
+ LLVM_DEBUG(dbgs() << "Opcode (thumb1 subtarget) " << opcode
+ << "replaced by: " << MI);
+ return expandCtSelectThumb(MI);
+ } else {
+ LLVM_DEBUG(dbgs() << "Opcode " << opcode << "replaced by: " << MI);
+ return expandCtSelect(MI);
+ }
+ }
+
// This hook gets to expand COPY instructions before they become
// copyPhysReg() calls. Look for VMOVS instructions that can legally be
// widened to VMOVD. We prefer the VMOVD when possible because it may be
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 2869e7f708046..f0e090f09f5dc 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -221,6 +221,12 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
const TargetRegisterInfo *TRI, Register VReg,
MachineInstr::MIFlag Flags = MachineInstr::NoFlags) const override;
+ bool expandCtSelectVector(MachineInstr &MI) const;
+
+ bool expandCtSelectThumb(MachineInstr &MI) const;
+
+ bool expandCtSelect(MachineInstr &MI) const;
+
bool expandPostRAPseudo(MachineInstr &MI) const override;
bool shouldSink(const MachineInstr &MI) const override;
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 847b7af5a9b11..3fdc5734baaa5 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -4200,6 +4200,92 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
// Other cases are autogenerated.
break;
}
+ case ARMISD::CTSELECT: {
+ EVT VT = N->getValueType(0);
+ unsigned PseudoOpcode;
+ bool IsFloat = false;
+ bool IsVector = false;
+
+ if (VT == MVT::f16) {
+ PseudoOpcode = ARM::CTSELECTf16;
+ IsFloat = true;
+ } else if (VT == MVT::bf16) {
+ PseudoOpcode = ARM::CTSELECTbf16;
+ IsFloat = true;
+ } else if (VT == MVT::f32) {
+ PseudoOpcode = ARM::CTSELECTf32;
+ IsFloat = true;
+ } else if (VT == MVT::f64) {
+ PseudoOpcode = ARM::CTSELECTf64;
+ IsVector = true;
+ } else if (VT == MVT::v8i8) {
+ PseudoOpcode = ARM::CTSELECTv8i8;
+ IsVector = true;
+ } else if (VT == MVT::v4i16) {
+ PseudoOpcode = ARM::CTSELECTv4i16;
+ IsVector = true;
+ } else if (VT == MVT::v2i32) {
+ PseudoOpcode = ARM::CTSELECTv2i32;
+ IsVector = true;
+ } else if (VT == MVT::v1i64) {
+ PseudoOpcode = ARM::CTSELECTv1i64;
+ IsVector = true;
+ } else if (VT == MVT::v2f32) {
+ PseudoOpcode = ARM::CTSELECTv2f32;
+ IsVector = true;
+ } else if (VT == MVT::v4f16) {
+ PseudoOpcode = ARM::CTSELECTv4f16;
+ IsVector = true;
+ } else if (VT == MVT::v4bf16) {
+ PseudoOpcode = ARM::CTSELECTv4bf16;
+ IsVector = true;
+ } else if (VT == MVT::v16i8) {
+ PseudoOpcode = ARM::CTSELECTv16i8;
+ IsVector = true;
+ } else if (VT == MVT::v8i16) {
+ PseudoOpcode = ARM::CTSELECTv8i16;
+ IsVector = true;
+ } else if (VT == MVT::v4i32) {
+ PseudoOpcode = ARM::CTSELECTv4i32;
+ IsVector = true;
+ } else if (VT == MVT::v2i64) {
+ PseudoOpcode = ARM::CTSELECTv2i64;
+ IsVector = true;
+ } else if (VT == MVT::v4f32) {
+ PseudoOpcode = ARM::CTSELECTv4f32;
+ IsVector = true;
+ } else if (VT == MVT::v2f64) {
+ PseudoOpcode = ARM::CTSELECTv2f64;
+ IsVector = true;
+ } else if (VT == MVT::v8f16) {
+ PseudoOpcode = ARM::CTSELECTv8f16;
+ IsVector = true;
+ } else if (VT == MVT::v8bf16) {
+ PseudoOpcode = ARM::CTSELECTv8bf16;
+ IsVector = true;
+ } else {
+ // i1, i8, i16, i32, i64
+ PseudoOpcode = ARM::CTSELECTint;
+ }
+
+ SmallVector<EVT, 4> VTs;
+ VTs.push_back(VT); // $dst
+ VTs.push_back(MVT::i32); // $tmp_mask (always GPR)
+
+ if (IsVector) {
+ VTs.push_back(VT); // $bcast_mask (same type as dst for vectors)
+ } else if (IsFloat) {
+ VTs.push_back(MVT::i32); // $scratch1 (GPR)
+ VTs.push_back(MVT::i32); // $scratch2 (GPR)
+ }
+
+ // src1, src2, cond
+ SDValue Ops[] = {N->getOperand(0), N->getOperand(1), N->getOperand(2)};
+
+ SDNode *ResNode = CurDAG->getMachineNode(PseudoOpcode, SDLoc(N), VTs, Ops);
+ ReplaceNode(N, ResNode);
+ return;
+ }
case ARMISD::VZIP: {
EVT VT = N->getValueType(0);
// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 6b0653457cbaf..63005f1c9f989 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -203,6 +203,7 @@ void ARMTargetLowering::addTypeForNEON(MVT VT, MVT PromotedLdStVT) {
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
setOperationAction(ISD::VSELECT, VT, Expand);
+ setOperationAction(ISD::CTSELECT, VT, Custom);
setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Expand);
if (VT.isInteger()) {
setOperationAction(ISD::SHL, VT, Custom);
@@ -304,6 +305,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::CTPOP, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::CTSELECT, VT, Custom);
// Vector reductions
setOperationAction(ISD::VECREDUCE_ADD, VT, Legal);
@@ -355,6 +357,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::MSTORE, VT, Legal);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::CTSELECT, VT, Custom);
// Pre and Post inc are supported on loads and stores
for (unsigned im = (unsigned)ISD::PRE_INC;
@@ -408,6 +411,28 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::VECREDUCE_FMIN, MVT::v2f16, Custom);
setOperationAction(ISD::VECREDUCE_FMAX, MVT::v2f16, Custom);
+ if (Subtarget->hasFullFP16()) {
+ setOperationAction(ISD::CTSELECT, MVT::v4f16, Custom);
+ setOperationAction(ISD::CTSELECT, MVT::v8f16, Custom);
+ }
+
+ if (Subtarget->hasBF16()) {
+ setOperationAction(ISD::CTSELECT, MVT::v4bf16, Custom);
+ setOperationAction(ISD::CTSELECT, MVT::v8bf16, Custom);
+ }
+
+ // small exotic vectors get scalarised for ctselect
+ setOperationAction(ISD::CTSELECT, MVT::v1i8, Expand);
+ setOperationAction(ISD::CTSELECT, MVT::v1i16, Expand);
+ setOperationAction(ISD::CTSELECT, MVT::v1i32, Expand);
+ setOperationAction(ISD::CTSELECT, MVT::v1f32, Expand);
+ setOperationAction(ISD::CTSELECT, MVT::v2i8, Expand);
+
+ setOperationAction(ISD::CTSELECT, MVT::v2i16, Promote);
+ setOperationPromotedToType(ISD::CTSELECT, MVT::v2i16, MVT::v4i16);
+ setOperationAction(ISD::CTSELECT, MVT::v4i8, Promote);
+ setOperationPromotedToType(ISD::CTSELECT, MVT::v4i8, MVT::v8i8);
+
// We 'support' these types up to bitcast/load/store level, regardless of
// MVE integer-only / float support. Only doing FP data processing on the FP
// vector types is inhibited at integer-only level.
@@ -419,6 +444,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::VSELECT, VT, Legal);
+ setOperationAction(ISD::CTSELECT, VT, Custom);
setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
}
setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
@@ -474,6 +500,7 @@ void ARMTargetLowering::addMVEVectorTypes(bool HasMVEFP) {
setOperationAction(ISD::VSELECT, VT, Expand);
setOperationAction(ISD::SELECT, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
+ setOperationAction(ISD::CTSELECT, VT, Custom);
if (!HasMVEFP) {
setOperationAction(ISD::SINT_TO_FP, VT, Expand);
@@ -1247,10 +1274,27 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM_,
setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
+ setOperationAction(ISD::CTSELECT, MVT::i8, Promote);
+ setOperationAction(ISD::CTSELECT, MVT::i16, Promote);
+ setOperationPromotedToType(ISD::CTSELECT, MVT::i16, MVT::i32);
+
+ setOperationAction(ISD::CTSELECT, MVT::i32, Custom);
+ setOperationAction(ISD::CTSELECT, MVT::i64, Expand);
+ setOperationAction(ISD::CTSELECT, MVT::f32, Custom);
+ setOperationAction(ISD::CTSELECT, MVT::f64, Custom);
+
+ // Handle f16 and bf16 without falling back to select from ctselect.
+ setTargetDAGCombine({ISD::CTSELECT});
+
if (Subtarget->hasFullFP16()) {
setOperationAction(ISD::SETCC, MVT::f16, Expand);
setOperationAction(ISD::SELECT, MVT::f16, Custom);
setOperationAction(ISD::SELECT_CC, MVT::f16, Custom);
+ setOperationAction(ISD::CTSELECT, MVT::f16, Custom);
+ }
+
+ if (Subtarget->hasBF16()) {
+ setOperationAction(ISD::CTSELECT, MVT::bf16, Custom);
}
setOperationAction(ISD::SETCCCARRY, MVT::i32, Custom);
@@ -1589,6 +1633,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(ARMISD::BCC_i64)
MAKE_CASE(ARMISD::FMSTAT)
MAKE_CASE(ARMISD::CMOV)
+ MAKE_CASE(ARMISD::CTSELECT)
MAKE_CASE(ARMISD::SSAT)
MAKE_CASE(ARMISD::USAT)
MAKE_CASE(ARMISD::ASRL)
@@ -5129,6 +5174,20 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
SelectTrue, SelectFalse, ISD::SETNE);
}
+SDValue ARMTargetLowering::LowerCTSELECT(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+
+ SDValue Cond = Op.getOperand(0);
+ SDValue TrueVal = Op.getOperand(1);
+ SDValue FalseVal = Op.getOperand(2);
+ EVT VT = Op.getValueType();
+
+ // Normalise the condition to 0 or 1.
+ SDValue One = DAG.getConstant(1, DL, MVT::i32);
+ SDValue CondNode = DAG.getNode(ISD::AND, DL, MVT::i32, Cond, One);
+ return DAG.getNode(ARMISD::CTSELECT, DL, VT, TrueVal, FalseVal, CondNode);
+}
+
static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode,
bool &swpCmpOps, bool &swpVselOps) {
// Start by selecting the GE condition code for opcodes that return true for
@@ -10628,6 +10687,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::SELECT: return LowerSELECT(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::CTSELECT:
+ return LowerCTSELECT(Op, DAG);
case ISD::BRCOND: return LowerBRCOND(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
@@ -10857,6 +10918,36 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
case ISD::FP_TO_UINT_SAT:
Res = LowerFP_TO_INT_SAT(SDValue(N, 0), DAG, Subtarget);
break;
+ case ISD::CTSELECT: {
+ EVT VT = N->getValueType(0);
+
+ // Handle f16/bf16 type promotion while preserving ctselect
+ if (VT == MVT::f16 || VT == MVT::bf16) {
+ SDLoc DL(N);
+ SDValue Cond = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue FalseVal = N->getOperand(2);
+
+ // Bitcast to i16, then promote to i32
+ SDValue TrueInt = DAG.getBitcast(MVT::i16, TrueVal);
+ SDValue FalseInt = DAG.getBitcast(MVT::i16, FalseVal);
+
+ TrueInt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, TrueInt);
+ FalseInt = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, FalseInt);
+
+ // Normalize condition
+ SDValue One = DAG.getConstant(1, DL, MVT::i32);
+ SDValue CondNorm = DAG.getNode(ISD::AND, DL, MVT::i32, Cond, One);
+
+ // Create i32 ctselect that will go through normal lowering
+ Res =
+ DAG.getNode(ISD::CTSELECT, DL, MVT::i32, CondNorm, TrueInt, FalseInt);
+ } else {
+ // For other types, use existing lowering
+ Res = LowerCTSELECT(SDValue(N, 0), DAG);
+ }
+ break;
+ }
}
if (Res.getNode())
Results.push_back(Res);
@@ -13478,6 +13569,64 @@ static SDValue PerformVQDMULHCombine(SDNode *N, SelectionDAG &DAG) {
DAG.getNode(ISD::CONCAT_VECTORS, DL, VecVT, Parts));
}
+static SDValue PerformCTSELECTCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ if (!DCI.isBeforeLegalize()) {
+ return SDValue();
+ }
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc DL(N);
+
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::f16 || VT == MVT::bf16) {
+ SDValue Cond = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue FalseVal = N->getOperand(2);
+
+ SDValue TrueInt = DAG.getBitcast(MVT::i16, TrueVal);
+ SDValue FalseInt = DAG.getBitcast(MVT::i16, FalseVal);
+
+ // Create i16 ctselect - this will be promoted to i32 ctselect naturally
+ SDValue Result =
+ DAG.getNode(ISD::CTSELECT, DL, MVT::i16, Cond, TrueInt, FalseInt);
+
+ return DAG.getBitcast(VT, Result);
+ } else if (VT.isVector()) {
+ EVT EltVT = VT.getVectorElementType();
+ if (EltVT == MVT::f16 || EltVT == MVT::bf16) {
+ SDValue Cond = N->getOperand(0);
+ SDValue TrueVal = N->getOperand(1);
+ SDValue FalseVal = N->getOperand(2);
+
+ EVT IntVT;
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::v4f16:
+ case MVT::v4bf16:
+ IntVT = MVT::v4i16;
+ break;
+ case MVT::v8f16:
+ case MVT::v8bf16:
+ IntVT = MVT::v8i16;
+ break;
+ default:
+ return SDValue(); // Unsupported vector type
+ }
+
+ SDValue TrueInt = DAG.getBitcast(IntVT, TrueVal);
+ SDValue FalseInt = DAG.getBitcast(IntVT, FalseVal);
+
+ SDValue Result =
+ DAG.getNode(ISD::CTSELECT, DL, IntVT, Cond, TrueInt, FalseInt);
+
+ return DAG.getBitcast(VT, Result);
+ }
+ }
+
+ return SDValue();
+}
+
static SDValue PerformVSELECTCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
const ARMSubtarget *Subtarget) {
@@ -18981,6 +19130,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::SELECT_CC:
case ISD::SELECT: return PerformSELECTCombine(N, DCI, Subtarget);
case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget);
+ case ISD::CTSELECT:
+ return PerformCTSELECTCombine(N, DCI, Subtarget);
case ISD::SETCC: return PerformVSetCCToVCTPCombine(N, DCI, Subtarget);
case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget);
case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget);
@@ -21394,28 +21545,21 @@ bool ARMTargetLowering::useLoadStackGuardNode(const Module &M) const {
}
void ARMTargetLowering::insertSSPDeclarations(Module &M) const {
- // MSVC CRT provides functionalities for stack protection.
RTLIB::LibcallImpl SecurityCheckCookieLibcall =
getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
-
- RTLIB::LibcallImpl SecurityCookieVar =
- getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
- if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
- SecurityCookieVar != RTLIB::Unsupported) {
- // MSVC CRT has a global variable holding security cookie.
- M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
- PointerType::getUnqual(M.getContext()));
-
- // MSVC CRT has a function to validate security cookie.
- FunctionCallee SecurityCheckCookie =
- M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
- Type::getVoidTy(M.getContext()),
- PointerType::getUnqual(M.getContext()));
- if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
- F->addParamAttr(0, Attribute::AttrKind::InReg);
- }
-
- TargetLowering::insertSSPDeclarations(M);
+ if (SecurityCheckCookieLibcall == RTLIB::Unsupported)
+ return TargetLowering::insertSSPDeclarations(M);
+
+ // MSVC CRT has a global variable holding security cookie.
+ M.getOrInsertGlobal("__security_cookie",
+ PointerType::getUnqual(M.getContext()));
+
+ // MSVC CRT has a function to validate security cookie.
+ FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
+ getLibcallImplName(SecurityCheckCookieLibcall),
+ Type::getVoidTy(M.getContext()), PointerType::getUnqual(M.getContext()));
+ if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee()))
+ F->addParamAttr(0, Attribute::AttrKind::InReg);
}
bool ARMTargetLowering::canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index bf3438b0d8803..90aa1bf162694 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -97,6 +97,9 @@ class VectorType;
CMOV, // ARM conditional move instructions.
+ CTSELECT, // ARM constant-time select, implemented with constant-time
+ // bitwise arithmetic instructions.
+
SSAT, // Signed saturation
USAT, // Unsigned saturation
@@ -430,8 +433,12 @@ class VectorType;
const char *getTargetNodeName(unsigned Opcode) const override;
bool isSelectSupported(SelectSupportKind Kind) const override {
- // ARM does not support scalar condition selects on vectors.
- return (Kind != ScalarCondVectorVal);
+ if (Kind == SelectSupportKind::CtSelect) {
+ return true;
+ } else {
+ // ARM does not support scalar condition selects on vectors.
+ return (Kind != SelectSupportKind::ScalarCondVectorVal);
+ }
}
bool isReadOnly(const GlobalValue *GV) const;
@@ -885,6 +892,7 @@ class VectorType;
SDValue LowerUnsignedALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTSELECT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
@@ -1032,6 +1040,7 @@ class VectorType;
MachineBasicBlock *MBB) const;
MachineBasicBlock *EmitLowered__dbzchk(MachineInstr &MI,
MachineBasicBlock *MBB) const;
+
void addMVEVectorTypes(bool HasMVEFP);
void addAllExtLoads(const MVT From, const MVT To, LegalizeAction Action);
void setAllExpand(MVT VT);
diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td
index f7176a65d8163..b63d041081098 100644
--- a/llvm/lib/Target/ARM/ARMInstrInfo.td
+++ b/llvm/lib/Target/ARM/ARMInstrInfo.td
@@ -32,6 +32,13 @@ def SDT_ARMSaveCallPC : SDTypeProfile<0, 1, []>;
def SDT_ARMcall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>;
+def SDT_ARMCtSelect : SDTypeProfile<1, 3, [
+ /* any */ // result
+ SDTCisSameAs<1, 0>, // value on false
+ SDTCisSameAs<2, 0>, // value on true
+ SDTCisVT<3, i32> // cond
+]>;
+
def SDT_ARMCMov : SDTypeProfile<1, 4, [
/* any */ // result
SDTCisSameAs<1, 0>, // value on false
@@ -188,6 +195,7 @@ def ARMseretglue : SDNode<"ARMISD::SERET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def ARMintretglue : SDNode<"ARMISD::INTRET_GLUE", SDT_ARMcall,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
+def ARMctselect : SDNode<"ARMISD::CTSELECT", SDT_ARMCtSelect>;
def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov>;
def ARMssat : SDNode<"ARMISD::SSAT", SDTIntSatNoShOp, []>;
@@ -5108,6 +5116,183 @@ def : ARMPat<(ARMcmov i32:$false, mod_imm_not:$imm, imm:$cc, CPSR),
def : ARMV6T2Pat<(ARMcmov i32:$false, imm:$src, imm:$cc, CPSR),
(MOVCCi32imm $false, imm:$src, imm:$cc, CPSR)>;
+//===----------------------------------------------------------------------===//
+// Constant-time selection pseudoinstructions.
+// We use a machine pass to lower these pseudos as applicable by subtarget,
+// in order to avoid backend optimizations that could invalidate constant-time
+// guarantees to the source programmer by node merging or other operations that
+// would result in machine code that does not run in constant time.
+let isNotDuplicable = 1, isPseudo = 1, hasNoSchedulingInfo = 1 in {
+
+ // i1, i8, i16, i32, i64
+ def CTSELECTint : ARMPseudoInst<(outs GPR:$dst, GPR:$tmp_mask),
+ (ins GPR:$src1, GPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber $tmp_mask";
+ }
+
+ def CTSELECTf16
+ : ARMPseudoInst<
+ (outs HPR:$dst, GPR:$tmp_mask, GPR:$scratch1, GPR:$scratch2),
+ (ins HPR:$src1, HPR:$src2, GPR:$cond), 4, NoItinerary, []> {
+ let Constraints =
+ "@earlyclobber $dst, at earlyclobber $tmp_mask, at earlyclobber "
+ "$scratch1, at earlyclobber $scratch2";
+ }
+
+ def CTSELECTbf16
+ : ARMPseudoInst<
+ (outs HPR:$dst, GPR:$tmp_mask, GPR:$scratch1, GPR:$scratch2),
+ (ins HPR:$src1, HPR:$src2, GPR:$cond), 4, NoItinerary, []> {
+ let Constraints =
+ "@earlyclobber $dst, at earlyclobber $tmp_mask, at earlyclobber "
+ "$scratch1, at earlyclobber $scratch2";
+ }
+
+ def CTSELECTf32
+ : ARMPseudoInst<
+ (outs SPR:$dst, GPR:$tmp_mask, GPR:$scratch1, GPR:$scratch2),
+ (ins SPR:$src1, SPR:$src2, GPR:$cond), 4, NoItinerary, []> {
+ let Constraints =
+ "@earlyclobber $dst, at earlyclobber $tmp_mask, at earlyclobber "
+ "$scratch1, at earlyclobber $scratch2";
+ }
+
+ let Predicates = [HasDPVFP] in {
+ def CTSELECTf64
+ : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask),
+ (ins DPR:$src1, DPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+ }
+
+ let Predicates = [HasNEON] in {
+ // DPR
+ def CTSELECTv8i8
+ : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask),
+ (ins DPR:$src1, DPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv4i16
+ : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask),
+ (ins DPR:$src1, DPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv2i32
+ : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask),
+ (ins DPR:$src1, DPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv1i64
+ : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask),
+ (ins DPR:$src1, DPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv2f32
+ : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask),
+ (ins DPR:$src1, DPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv4f16
+ : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask),
+ (ins DPR:$src1, DPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv4bf16
+ : ARMPseudoInst<(outs DPR:$dst, GPR:$tmp_mask, DPR:$bcast_mask),
+ (ins DPR:$src1, DPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ // QPR
+ def CTSELECTv16i8
+ : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask),
+ (ins QPR:$src1, QPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv8i16
+ : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask),
+ (ins QPR:$src1, QPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv4i32
+ : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask),
+ (ins QPR:$src1, QPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv2i64
+ : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask),
+ (ins QPR:$src1, QPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv4f32
+ : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask),
+ (ins QPR:$src1, QPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv2f64
+ : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask),
+ (ins QPR:$src1, QPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv8f16
+ : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask),
+ (ins QPR:$src1, QPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+
+ def CTSELECTv8bf16
+ : ARMPseudoInst<(outs QPR:$dst, GPR:$tmp_mask, QPR:$bcast_mask),
+ (ins QPR:$src1, QPR:$src2, GPR:$cond), 4,
+ NoItinerary, []> {
+ let Constraints = "@earlyclobber $dst, at earlyclobber "
+ "$tmp_mask, at earlyclobber $bcast_mask";
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Atomic operations intrinsics
//
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 590d4c70592f8..abde3ae28a751 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -521,13 +521,11 @@ void ARMPassConfig::addPreSched2() {
void ARMPassConfig::addPreEmitPass() {
addPass(createThumb2SizeReductionPass());
- // Unpack bundles for:
+ // Always unpack bundles for:
// - Thumb2: Constant island pass requires unbundled instructions
// - KCFI: KCFI_CHECK pseudo instructions need to be unbundled for AsmPrinter
- addPass(createUnpackMachineBundles([](const MachineFunction &MF) {
- return MF.getSubtarget<ARMSubtarget>().isThumb2() ||
- MF.getFunction().getParent()->getModuleFlag("kcfi");
- }));
+ // - CTSELECT: Post-RA expansion creates bundles that must be unpacked
+ addPass(createUnpackMachineBundles(nullptr));
// Don't optimize barriers or block placement at -O0.
if (getOptLevel() != CodeGenOptLevel::None) {
diff --git a/llvm/test/CodeGen/ARM/ctselect-half.ll b/llvm/test/CodeGen/ARM/ctselect-half.ll
new file mode 100644
index 0000000000000..fed3387ce8f53
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ctselect-half.ll
@@ -0,0 +1,975 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=CT %s
+; RUN: llc < %s -mtriple=armv8.6a-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=BFLOAT-F16-NATIVE %s
+; RUN: llc < %s -mtriple=armv8.2a-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=F16-NATIVE %s
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB1 %s
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB2 %s
+
+define half @ct_half(i1 %cond, half %a, half %b) {
+; CT-LABEL: ct_half:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; BFLOAT-F16-NATIVE-LABEL: ct_half:
+; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry
+; BFLOAT-F16-NATIVE-NEXT: and r3, r0, #1
+; BFLOAT-F16-NATIVE-NEXT: rsb r12, r3, #0
+; BFLOAT-F16-NATIVE-NEXT: and r0, r1, r12
+; BFLOAT-F16-NATIVE-NEXT: bic r12, r2, r12
+; BFLOAT-F16-NATIVE-NEXT: orr r0, r0, r12
+; BFLOAT-F16-NATIVE-NEXT: bx lr
+;
+; F16-NATIVE-LABEL: ct_half:
+; F16-NATIVE: @ %bb.0: @ %entry
+; F16-NATIVE-NEXT: and r3, r0, #1
+; F16-NATIVE-NEXT: rsb r12, r3, #0
+; F16-NATIVE-NEXT: and r0, r1, r12
+; F16-NATIVE-NEXT: bic r12, r2, r12
+; F16-NATIVE-NEXT: orr r0, r0, r12
+; F16-NATIVE-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_half:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_half:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+entry:
+ %sel = call half @llvm.ct.select.f16(i1 %cond, half %a, half %b)
+ ret half %sel
+}
+
+define bfloat @ct_bf16(i1 %cond, bfloat %a, bfloat %b) {
+; CT-LABEL: ct_bf16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; BFLOAT-F16-NATIVE-LABEL: ct_bf16:
+; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry
+; BFLOAT-F16-NATIVE-NEXT: .pad #4
+; BFLOAT-F16-NATIVE-NEXT: sub sp, sp, #4
+; BFLOAT-F16-NATIVE-NEXT: and r0, r0, #1
+; BFLOAT-F16-NATIVE-NEXT: rsb r12, r0, #0
+; BFLOAT-F16-NATIVE-NEXT: and r3, r1, r12
+; BFLOAT-F16-NATIVE-NEXT: bic r12, r2, r12
+; BFLOAT-F16-NATIVE-NEXT: orr r3, r3, r12
+; BFLOAT-F16-NATIVE-NEXT: strh r3, [sp, #2]
+; BFLOAT-F16-NATIVE-NEXT: ldrh r0, [sp, #2]
+; BFLOAT-F16-NATIVE-NEXT: add sp, sp, #4
+; BFLOAT-F16-NATIVE-NEXT: bx lr
+;
+; F16-NATIVE-LABEL: ct_bf16:
+; F16-NATIVE: @ %bb.0: @ %entry
+; F16-NATIVE-NEXT: and r3, r0, #1
+; F16-NATIVE-NEXT: rsb r12, r3, #0
+; F16-NATIVE-NEXT: and r0, r1, r12
+; F16-NATIVE-NEXT: bic r12, r2, r12
+; F16-NATIVE-NEXT: orr r0, r0, r12
+; F16-NATIVE-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_bf16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_bf16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+entry:
+ %sel = call bfloat @llvm.ct.select.bf16(i1 %cond, bfloat %a, bfloat %b)
+ ret bfloat %sel
+}
+
+define <4 x half> @ct_v4f16(i1 %cond, <4 x half> %a, <4 x half> %b) {
+; CT-LABEL: ct_v4f16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: .save {r4, r5, r6, lr}
+; CT-NEXT: push {r4, r5, r6, lr}
+; CT-NEXT: ldrh r1, [sp, #20]
+; CT-NEXT: pkhbt r2, r2, r3, lsl #16
+; CT-NEXT: ldrh r12, [sp, #36]
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: ldrh lr, [sp, #28]
+; CT-NEXT: ldrh r6, [sp, #24]
+; CT-NEXT: ldrh r4, [sp, #16]
+; CT-NEXT: ldrh r5, [sp, #32]
+; CT-NEXT: orr r6, r6, lr, lsl #16
+; CT-NEXT: orr r1, r4, r1, lsl #16
+; CT-NEXT: orr r3, r5, r12, lsl #16
+; CT-NEXT: vmov d17, r2, r1
+; CT-NEXT: vmov d16, r6, r3
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov.u16 r0, d18[0]
+; CT-NEXT: vmov.u16 r1, d18[1]
+; CT-NEXT: vmov.u16 r2, d18[2]
+; CT-NEXT: vmov.u16 r3, d18[3]
+; CT-NEXT: pop {r4, r5, r6, pc}
+;
+; BFLOAT-F16-NATIVE-LABEL: ct_v4f16:
+; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry
+; BFLOAT-F16-NATIVE-NEXT: .save {r4, r5, r6, lr}
+; BFLOAT-F16-NATIVE-NEXT: push {r4, r5, r6, lr}
+; BFLOAT-F16-NATIVE-NEXT: ldrh r1, [sp, #20]
+; BFLOAT-F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: ldrh r12, [sp, #36]
+; BFLOAT-F16-NATIVE-NEXT: and r0, r0, #1
+; BFLOAT-F16-NATIVE-NEXT: ldrh lr, [sp, #28]
+; BFLOAT-F16-NATIVE-NEXT: ldrh r6, [sp, #24]
+; BFLOAT-F16-NATIVE-NEXT: ldrh r4, [sp, #16]
+; BFLOAT-F16-NATIVE-NEXT: ldrh r5, [sp, #32]
+; BFLOAT-F16-NATIVE-NEXT: orr r6, r6, lr, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: orr r1, r4, r1, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: orr r3, r5, r12, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: vmov d17, r2, r1
+; BFLOAT-F16-NATIVE-NEXT: vmov d16, r6, r3
+; BFLOAT-F16-NATIVE-NEXT: rsb r1, r0, #0
+; BFLOAT-F16-NATIVE-NEXT: vdup.32 d19, r1
+; BFLOAT-F16-NATIVE-NEXT: vand d18, d17, d19
+; BFLOAT-F16-NATIVE-NEXT: vbic d19, d16, d19
+; BFLOAT-F16-NATIVE-NEXT: vorr d18, d18, d19
+; BFLOAT-F16-NATIVE-NEXT: vmov.u16 r0, d18[0]
+; BFLOAT-F16-NATIVE-NEXT: vmov.u16 r1, d18[1]
+; BFLOAT-F16-NATIVE-NEXT: vmov.u16 r2, d18[2]
+; BFLOAT-F16-NATIVE-NEXT: vmov.u16 r3, d18[3]
+; BFLOAT-F16-NATIVE-NEXT: pop {r4, r5, r6, pc}
+;
+; F16-NATIVE-LABEL: ct_v4f16:
+; F16-NATIVE: @ %bb.0: @ %entry
+; F16-NATIVE-NEXT: .save {r4, r5, r6, lr}
+; F16-NATIVE-NEXT: push {r4, r5, r6, lr}
+; F16-NATIVE-NEXT: ldrh r1, [sp, #20]
+; F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16
+; F16-NATIVE-NEXT: ldrh r12, [sp, #36]
+; F16-NATIVE-NEXT: and r0, r0, #1
+; F16-NATIVE-NEXT: ldrh lr, [sp, #28]
+; F16-NATIVE-NEXT: ldrh r6, [sp, #24]
+; F16-NATIVE-NEXT: ldrh r4, [sp, #16]
+; F16-NATIVE-NEXT: ldrh r5, [sp, #32]
+; F16-NATIVE-NEXT: orr r6, r6, lr, lsl #16
+; F16-NATIVE-NEXT: orr r1, r4, r1, lsl #16
+; F16-NATIVE-NEXT: orr r3, r5, r12, lsl #16
+; F16-NATIVE-NEXT: vmov d17, r2, r1
+; F16-NATIVE-NEXT: vmov d16, r6, r3
+; F16-NATIVE-NEXT: rsb r1, r0, #0
+; F16-NATIVE-NEXT: vdup.32 d19, r1
+; F16-NATIVE-NEXT: vand d18, d17, d19
+; F16-NATIVE-NEXT: vbic d19, d16, d19
+; F16-NATIVE-NEXT: vorr d18, d18, d19
+; F16-NATIVE-NEXT: vmov.u16 r0, d18[0]
+; F16-NATIVE-NEXT: vmov.u16 r1, d18[1]
+; F16-NATIVE-NEXT: vmov.u16 r2, d18[2]
+; F16-NATIVE-NEXT: vmov.u16 r3, d18[3]
+; F16-NATIVE-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB1-LABEL: ct_v4f16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #32]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #36]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ldr r3, [sp, #40]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r2, r5
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ands r2, r6
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: ldr r6, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r3, r6
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: ands r3, r7
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v4f16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldrh.w r1, [sp, #24]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldrh.w r2, [sp, #28]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: ldrh.w r3, [sp, #16]
+; THUMB2-NEXT: ldrh.w lr, [sp, #32]
+; THUMB2-NEXT: rsb.w r4, r12, #0
+; THUMB2-NEXT: and.w r2, r3, r4
+; THUMB2-NEXT: bic.w r4, lr, r4
+; THUMB2-NEXT: orrs r2, r4
+; THUMB2-NEXT: ldrh.w lr, [sp, #36]
+; THUMB2-NEXT: ldrh.w r4, [sp, #20]
+; THUMB2-NEXT: rsb.w r5, r12, #0
+; THUMB2-NEXT: and.w r3, r4, r5
+; THUMB2-NEXT: bic.w r5, lr, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <4 x half> @llvm.ct.select.v4f16(i1 %cond, <4 x half> %a, <4 x half> %b)
+ ret <4 x half> %sel
+}
+
+define <4 x bfloat> @ct_v4bf16(i1 %cond, <4 x bfloat> %a, <4 x bfloat> %b) {
+; CT-LABEL: ct_v4bf16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: .save {r4, r5, r6, lr}
+; CT-NEXT: push {r4, r5, r6, lr}
+; CT-NEXT: ldrh r1, [sp, #20]
+; CT-NEXT: pkhbt r2, r2, r3, lsl #16
+; CT-NEXT: ldrh r12, [sp, #36]
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: ldrh lr, [sp, #28]
+; CT-NEXT: ldrh r6, [sp, #24]
+; CT-NEXT: ldrh r4, [sp, #16]
+; CT-NEXT: ldrh r5, [sp, #32]
+; CT-NEXT: orr r6, r6, lr, lsl #16
+; CT-NEXT: orr r1, r4, r1, lsl #16
+; CT-NEXT: orr r3, r5, r12, lsl #16
+; CT-NEXT: vmov d17, r2, r1
+; CT-NEXT: vmov d16, r6, r3
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov.u16 r0, d18[0]
+; CT-NEXT: vmov.u16 r1, d18[1]
+; CT-NEXT: vmov.u16 r2, d18[2]
+; CT-NEXT: vmov.u16 r3, d18[3]
+; CT-NEXT: pop {r4, r5, r6, pc}
+;
+; BFLOAT-F16-NATIVE-LABEL: ct_v4bf16:
+; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry
+; BFLOAT-F16-NATIVE-NEXT: vldr d16, [sp]
+; BFLOAT-F16-NATIVE-NEXT: vmov d17, r2, r3
+; BFLOAT-F16-NATIVE-NEXT: and r0, r0, #1
+; BFLOAT-F16-NATIVE-NEXT: rsb r1, r0, #0
+; BFLOAT-F16-NATIVE-NEXT: vdup.32 d19, r1
+; BFLOAT-F16-NATIVE-NEXT: vand d18, d17, d19
+; BFLOAT-F16-NATIVE-NEXT: vbic d19, d16, d19
+; BFLOAT-F16-NATIVE-NEXT: vorr d18, d18, d19
+; BFLOAT-F16-NATIVE-NEXT: vmov r0, r1, d18
+; BFLOAT-F16-NATIVE-NEXT: bx lr
+;
+; F16-NATIVE-LABEL: ct_v4bf16:
+; F16-NATIVE: @ %bb.0: @ %entry
+; F16-NATIVE-NEXT: .save {r4, r5, r6, lr}
+; F16-NATIVE-NEXT: push {r4, r5, r6, lr}
+; F16-NATIVE-NEXT: ldrh r1, [sp, #20]
+; F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16
+; F16-NATIVE-NEXT: ldrh r12, [sp, #36]
+; F16-NATIVE-NEXT: and r0, r0, #1
+; F16-NATIVE-NEXT: ldrh lr, [sp, #28]
+; F16-NATIVE-NEXT: ldrh r6, [sp, #24]
+; F16-NATIVE-NEXT: ldrh r4, [sp, #16]
+; F16-NATIVE-NEXT: ldrh r5, [sp, #32]
+; F16-NATIVE-NEXT: orr r6, r6, lr, lsl #16
+; F16-NATIVE-NEXT: orr r1, r4, r1, lsl #16
+; F16-NATIVE-NEXT: orr r3, r5, r12, lsl #16
+; F16-NATIVE-NEXT: vmov d17, r2, r1
+; F16-NATIVE-NEXT: vmov d16, r6, r3
+; F16-NATIVE-NEXT: rsb r1, r0, #0
+; F16-NATIVE-NEXT: vdup.32 d19, r1
+; F16-NATIVE-NEXT: vand d18, d17, d19
+; F16-NATIVE-NEXT: vbic d19, d16, d19
+; F16-NATIVE-NEXT: vorr d18, d18, d19
+; F16-NATIVE-NEXT: vmov.u16 r0, d18[0]
+; F16-NATIVE-NEXT: vmov.u16 r1, d18[1]
+; F16-NATIVE-NEXT: vmov.u16 r2, d18[2]
+; F16-NATIVE-NEXT: vmov.u16 r3, d18[3]
+; F16-NATIVE-NEXT: pop {r4, r5, r6, pc}
+;
+; THUMB1-LABEL: ct_v4bf16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #32]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #36]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ldr r3, [sp, #40]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r2, r5
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ands r2, r6
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: ldr r6, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r3, r6
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: ands r3, r7
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v4bf16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldrh.w r1, [sp, #24]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldrh.w r2, [sp, #28]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: ldrh.w r3, [sp, #16]
+; THUMB2-NEXT: ldrh.w lr, [sp, #32]
+; THUMB2-NEXT: rsb.w r4, r12, #0
+; THUMB2-NEXT: and.w r2, r3, r4
+; THUMB2-NEXT: bic.w r4, lr, r4
+; THUMB2-NEXT: orrs r2, r4
+; THUMB2-NEXT: ldrh.w lr, [sp, #36]
+; THUMB2-NEXT: ldrh.w r4, [sp, #20]
+; THUMB2-NEXT: rsb.w r5, r12, #0
+; THUMB2-NEXT: and.w r3, r4, r5
+; THUMB2-NEXT: bic.w r5, lr, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <4 x bfloat> @llvm.ct.select.v4bf16(i1 %cond, <4 x bfloat> %a, <4 x bfloat> %b)
+ ret <4 x bfloat> %sel
+}
+
+define <8 x half> @ct_v8f16(i1 %cond, <8 x half> %a, <8 x half> %b) {
+; CT-LABEL: ct_v8f16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CT-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CT-NEXT: ldrh r12, [sp, #36]
+; CT-NEXT: pkhbt r2, r2, r3, lsl #16
+; CT-NEXT: ldrh r7, [sp, #32]
+; CT-NEXT: and r1, r1, #1
+; CT-NEXT: ldrh r3, [sp, #52]
+; CT-NEXT: vmov.32 d16[0], r2
+; CT-NEXT: ldrh r2, [sp, #48]
+; CT-NEXT: orr r7, r7, r12, lsl #16
+; CT-NEXT: ldrh r5, [sp, #68]
+; CT-NEXT: orr r2, r2, r3, lsl #16
+; CT-NEXT: vmov.32 d17[0], r7
+; CT-NEXT: ldrh r7, [sp, #64]
+; CT-NEXT: ldrh r3, [sp, #28]
+; CT-NEXT: vmov.32 d18[0], r2
+; CT-NEXT: ldrh r2, [sp, #24]
+; CT-NEXT: orr r7, r7, r5, lsl #16
+; CT-NEXT: ldrh r5, [sp, #76]
+; CT-NEXT: vmov.32 d19[0], r7
+; CT-NEXT: orr r2, r2, r3, lsl #16
+; CT-NEXT: ldrh r7, [sp, #72]
+; CT-NEXT: ldrh lr, [sp, #60]
+; CT-NEXT: vmov.32 d16[1], r2
+; CT-NEXT: orr r2, r7, r5, lsl #16
+; CT-NEXT: ldrh r4, [sp, #56]
+; CT-NEXT: ldrh r8, [sp, #44]
+; CT-NEXT: vmov.32 d19[1], r2
+; CT-NEXT: orr r2, r4, lr, lsl #16
+; CT-NEXT: ldrh r6, [sp, #40]
+; CT-NEXT: vmov.32 d18[1], r2
+; CT-NEXT: orr r2, r6, r8, lsl #16
+; CT-NEXT: vmov.32 d17[1], r2
+; CT-NEXT: rsb r2, r1, #0
+; CT-NEXT: vdup.32 q11, r2
+; CT-NEXT: vand q10, q8, q11
+; CT-NEXT: vbic q11, q9, q11
+; CT-NEXT: vorr q10, q10, q11
+; CT-NEXT: vst1.64 {d20, d21}, [r0:128]
+; CT-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; BFLOAT-F16-NATIVE-LABEL: ct_v8f16:
+; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry
+; BFLOAT-F16-NATIVE-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; BFLOAT-F16-NATIVE-NEXT: push {r4, r5, r6, r7, r8, lr}
+; BFLOAT-F16-NATIVE-NEXT: ldrh r12, [sp, #36]
+; BFLOAT-F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: ldrh r7, [sp, #32]
+; BFLOAT-F16-NATIVE-NEXT: and r1, r1, #1
+; BFLOAT-F16-NATIVE-NEXT: ldrh r3, [sp, #52]
+; BFLOAT-F16-NATIVE-NEXT: vmov.32 d16[0], r2
+; BFLOAT-F16-NATIVE-NEXT: ldrh r2, [sp, #48]
+; BFLOAT-F16-NATIVE-NEXT: orr r7, r7, r12, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: ldrh r5, [sp, #68]
+; BFLOAT-F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: vmov.32 d17[0], r7
+; BFLOAT-F16-NATIVE-NEXT: ldrh r7, [sp, #64]
+; BFLOAT-F16-NATIVE-NEXT: ldrh r3, [sp, #28]
+; BFLOAT-F16-NATIVE-NEXT: vmov.32 d18[0], r2
+; BFLOAT-F16-NATIVE-NEXT: ldrh r2, [sp, #24]
+; BFLOAT-F16-NATIVE-NEXT: orr r7, r7, r5, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: ldrh r5, [sp, #76]
+; BFLOAT-F16-NATIVE-NEXT: vmov.32 d19[0], r7
+; BFLOAT-F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: ldrh r7, [sp, #72]
+; BFLOAT-F16-NATIVE-NEXT: ldrh lr, [sp, #60]
+; BFLOAT-F16-NATIVE-NEXT: vmov.32 d16[1], r2
+; BFLOAT-F16-NATIVE-NEXT: orr r2, r7, r5, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: ldrh r4, [sp, #56]
+; BFLOAT-F16-NATIVE-NEXT: ldrh r8, [sp, #44]
+; BFLOAT-F16-NATIVE-NEXT: vmov.32 d19[1], r2
+; BFLOAT-F16-NATIVE-NEXT: orr r2, r4, lr, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: ldrh r6, [sp, #40]
+; BFLOAT-F16-NATIVE-NEXT: vmov.32 d18[1], r2
+; BFLOAT-F16-NATIVE-NEXT: orr r2, r6, r8, lsl #16
+; BFLOAT-F16-NATIVE-NEXT: vmov.32 d17[1], r2
+; BFLOAT-F16-NATIVE-NEXT: rsb r2, r1, #0
+; BFLOAT-F16-NATIVE-NEXT: vdup.32 q11, r2
+; BFLOAT-F16-NATIVE-NEXT: vand q10, q8, q11
+; BFLOAT-F16-NATIVE-NEXT: vbic q11, q9, q11
+; BFLOAT-F16-NATIVE-NEXT: vorr q10, q10, q11
+; BFLOAT-F16-NATIVE-NEXT: vst1.64 {d20, d21}, [r0:128]
+; BFLOAT-F16-NATIVE-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; F16-NATIVE-LABEL: ct_v8f16:
+; F16-NATIVE: @ %bb.0: @ %entry
+; F16-NATIVE-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; F16-NATIVE-NEXT: push {r4, r5, r6, r7, r8, lr}
+; F16-NATIVE-NEXT: ldrh r12, [sp, #36]
+; F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16
+; F16-NATIVE-NEXT: ldrh r7, [sp, #32]
+; F16-NATIVE-NEXT: and r1, r1, #1
+; F16-NATIVE-NEXT: ldrh r3, [sp, #52]
+; F16-NATIVE-NEXT: vmov.32 d16[0], r2
+; F16-NATIVE-NEXT: ldrh r2, [sp, #48]
+; F16-NATIVE-NEXT: orr r7, r7, r12, lsl #16
+; F16-NATIVE-NEXT: ldrh r5, [sp, #68]
+; F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16
+; F16-NATIVE-NEXT: vmov.32 d17[0], r7
+; F16-NATIVE-NEXT: ldrh r7, [sp, #64]
+; F16-NATIVE-NEXT: ldrh r3, [sp, #28]
+; F16-NATIVE-NEXT: vmov.32 d18[0], r2
+; F16-NATIVE-NEXT: ldrh r2, [sp, #24]
+; F16-NATIVE-NEXT: orr r7, r7, r5, lsl #16
+; F16-NATIVE-NEXT: ldrh r5, [sp, #76]
+; F16-NATIVE-NEXT: vmov.32 d19[0], r7
+; F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16
+; F16-NATIVE-NEXT: ldrh r7, [sp, #72]
+; F16-NATIVE-NEXT: ldrh lr, [sp, #60]
+; F16-NATIVE-NEXT: vmov.32 d16[1], r2
+; F16-NATIVE-NEXT: orr r2, r7, r5, lsl #16
+; F16-NATIVE-NEXT: ldrh r4, [sp, #56]
+; F16-NATIVE-NEXT: ldrh r8, [sp, #44]
+; F16-NATIVE-NEXT: vmov.32 d19[1], r2
+; F16-NATIVE-NEXT: orr r2, r4, lr, lsl #16
+; F16-NATIVE-NEXT: ldrh r6, [sp, #40]
+; F16-NATIVE-NEXT: vmov.32 d18[1], r2
+; F16-NATIVE-NEXT: orr r2, r6, r8, lsl #16
+; F16-NATIVE-NEXT: vmov.32 d17[1], r2
+; F16-NATIVE-NEXT: rsb r2, r1, #0
+; F16-NATIVE-NEXT: vdup.32 q11, r2
+; F16-NATIVE-NEXT: vand q10, q8, q11
+; F16-NATIVE-NEXT: vbic q11, q9, q11
+; F16-NATIVE-NEXT: vorr q10, q10, q11
+; F16-NATIVE-NEXT: vst1.64 {d20, d21}, [r0:128]
+; F16-NATIVE-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; THUMB1-LABEL: ct_v8f16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r1
+; THUMB1-NEXT: ldr r1, [sp, #76]
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #14]
+; THUMB1-NEXT: ldr r1, [sp, #72]
+; THUMB1-NEXT: ldr r5, [sp, #40]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #12]
+; THUMB1-NEXT: ldr r1, [sp, #68]
+; THUMB1-NEXT: ldr r5, [sp, #36]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #10]
+; THUMB1-NEXT: ldr r1, [sp, #64]
+; THUMB1-NEXT: ldr r5, [sp, #32]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #8]
+; THUMB1-NEXT: ldr r1, [sp, #60]
+; THUMB1-NEXT: ldr r5, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #6]
+; THUMB1-NEXT: ldr r1, [sp, #56]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #4]
+; THUMB1-NEXT: ldr r1, [sp, #52]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r5, r3
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: ands r5, r6
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: strh r5, [r0, #2]
+; THUMB1-NEXT: ldr r1, [sp, #48]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r3, r2
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: ands r3, r5
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: strh r3, [r0]
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v8f16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and lr, r1, #1
+; THUMB2-NEXT: ldrh.w r12, [sp, #68]
+; THUMB2-NEXT: ldrh.w r1, [sp, #36]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r4, r1, r5
+; THUMB2-NEXT: bic.w r5, r12, r5
+; THUMB2-NEXT: orrs r4, r5
+; THUMB2-NEXT: strh r4, [r0, #14]
+; THUMB2-NEXT: ldrh.w r12, [sp, #64]
+; THUMB2-NEXT: ldrh.w r5, [sp, #32]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #12]
+; THUMB2-NEXT: ldrh.w r12, [sp, #60]
+; THUMB2-NEXT: ldrh.w r5, [sp, #28]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #10]
+; THUMB2-NEXT: ldrh.w r12, [sp, #56]
+; THUMB2-NEXT: ldrh.w r5, [sp, #24]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #8]
+; THUMB2-NEXT: ldrh.w r12, [sp, #52]
+; THUMB2-NEXT: ldrh.w r5, [sp, #20]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #6]
+; THUMB2-NEXT: ldrh.w r12, [sp, #48]
+; THUMB2-NEXT: ldrh.w r5, [sp, #16]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #4]
+; THUMB2-NEXT: ldrh.w r1, [sp, #44]
+; THUMB2-NEXT: rsb.w r4, lr, #0
+; THUMB2-NEXT: and.w r5, r3, r4
+; THUMB2-NEXT: bic.w r4, r1, r4
+; THUMB2-NEXT: orrs r5, r4
+; THUMB2-NEXT: strh r5, [r0, #2]
+; THUMB2-NEXT: ldrh.w r1, [sp, #40]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r3, r2, r5
+; THUMB2-NEXT: bic.w r5, r1, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: strh r3, [r0]
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <8 x half> @llvm.ct.select.v8f16(i1 %cond, <8 x half> %a, <8 x half> %b)
+ ret <8 x half> %sel
+}
+
+define <8 x bfloat> @ct_v8bf16(i1 %cond, <8 x bfloat> %a, <8 x bfloat> %b) {
+; CT-LABEL: ct_v8bf16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; CT-NEXT: push {r4, r5, r6, r7, r8, lr}
+; CT-NEXT: ldrh r12, [sp, #36]
+; CT-NEXT: pkhbt r2, r2, r3, lsl #16
+; CT-NEXT: ldrh r7, [sp, #32]
+; CT-NEXT: and r1, r1, #1
+; CT-NEXT: ldrh r3, [sp, #52]
+; CT-NEXT: vmov.32 d16[0], r2
+; CT-NEXT: ldrh r2, [sp, #48]
+; CT-NEXT: orr r7, r7, r12, lsl #16
+; CT-NEXT: ldrh r5, [sp, #68]
+; CT-NEXT: orr r2, r2, r3, lsl #16
+; CT-NEXT: vmov.32 d17[0], r7
+; CT-NEXT: ldrh r7, [sp, #64]
+; CT-NEXT: ldrh r3, [sp, #28]
+; CT-NEXT: vmov.32 d18[0], r2
+; CT-NEXT: ldrh r2, [sp, #24]
+; CT-NEXT: orr r7, r7, r5, lsl #16
+; CT-NEXT: ldrh r5, [sp, #76]
+; CT-NEXT: vmov.32 d19[0], r7
+; CT-NEXT: orr r2, r2, r3, lsl #16
+; CT-NEXT: ldrh r7, [sp, #72]
+; CT-NEXT: ldrh lr, [sp, #60]
+; CT-NEXT: vmov.32 d16[1], r2
+; CT-NEXT: orr r2, r7, r5, lsl #16
+; CT-NEXT: ldrh r4, [sp, #56]
+; CT-NEXT: ldrh r8, [sp, #44]
+; CT-NEXT: vmov.32 d19[1], r2
+; CT-NEXT: orr r2, r4, lr, lsl #16
+; CT-NEXT: ldrh r6, [sp, #40]
+; CT-NEXT: vmov.32 d18[1], r2
+; CT-NEXT: orr r2, r6, r8, lsl #16
+; CT-NEXT: vmov.32 d17[1], r2
+; CT-NEXT: rsb r2, r1, #0
+; CT-NEXT: vdup.32 q11, r2
+; CT-NEXT: vand q10, q8, q11
+; CT-NEXT: vbic q11, q9, q11
+; CT-NEXT: vorr q10, q10, q11
+; CT-NEXT: vst1.64 {d20, d21}, [r0:128]
+; CT-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; BFLOAT-F16-NATIVE-LABEL: ct_v8bf16:
+; BFLOAT-F16-NATIVE: @ %bb.0: @ %entry
+; BFLOAT-F16-NATIVE-NEXT: vldr d17, [sp]
+; BFLOAT-F16-NATIVE-NEXT: add r1, sp, #8
+; BFLOAT-F16-NATIVE-NEXT: vmov d16, r2, r3
+; BFLOAT-F16-NATIVE-NEXT: vld1.64 {d18, d19}, [r1]
+; BFLOAT-F16-NATIVE-NEXT: and r0, r0, #1
+; BFLOAT-F16-NATIVE-NEXT: rsb r1, r0, #0
+; BFLOAT-F16-NATIVE-NEXT: vdup.32 q11, r1
+; BFLOAT-F16-NATIVE-NEXT: vand q10, q8, q11
+; BFLOAT-F16-NATIVE-NEXT: vbic q11, q9, q11
+; BFLOAT-F16-NATIVE-NEXT: vorr q10, q10, q11
+; BFLOAT-F16-NATIVE-NEXT: vmov r0, r1, d20
+; BFLOAT-F16-NATIVE-NEXT: vmov r2, r3, d21
+; BFLOAT-F16-NATIVE-NEXT: bx lr
+;
+; F16-NATIVE-LABEL: ct_v8bf16:
+; F16-NATIVE: @ %bb.0: @ %entry
+; F16-NATIVE-NEXT: .save {r4, r5, r6, r7, r8, lr}
+; F16-NATIVE-NEXT: push {r4, r5, r6, r7, r8, lr}
+; F16-NATIVE-NEXT: ldrh r12, [sp, #36]
+; F16-NATIVE-NEXT: pkhbt r2, r2, r3, lsl #16
+; F16-NATIVE-NEXT: ldrh r7, [sp, #32]
+; F16-NATIVE-NEXT: and r1, r1, #1
+; F16-NATIVE-NEXT: ldrh r3, [sp, #52]
+; F16-NATIVE-NEXT: vmov.32 d16[0], r2
+; F16-NATIVE-NEXT: ldrh r2, [sp, #48]
+; F16-NATIVE-NEXT: orr r7, r7, r12, lsl #16
+; F16-NATIVE-NEXT: ldrh r5, [sp, #68]
+; F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16
+; F16-NATIVE-NEXT: vmov.32 d17[0], r7
+; F16-NATIVE-NEXT: ldrh r7, [sp, #64]
+; F16-NATIVE-NEXT: ldrh r3, [sp, #28]
+; F16-NATIVE-NEXT: vmov.32 d18[0], r2
+; F16-NATIVE-NEXT: ldrh r2, [sp, #24]
+; F16-NATIVE-NEXT: orr r7, r7, r5, lsl #16
+; F16-NATIVE-NEXT: ldrh r5, [sp, #76]
+; F16-NATIVE-NEXT: vmov.32 d19[0], r7
+; F16-NATIVE-NEXT: orr r2, r2, r3, lsl #16
+; F16-NATIVE-NEXT: ldrh r7, [sp, #72]
+; F16-NATIVE-NEXT: ldrh lr, [sp, #60]
+; F16-NATIVE-NEXT: vmov.32 d16[1], r2
+; F16-NATIVE-NEXT: orr r2, r7, r5, lsl #16
+; F16-NATIVE-NEXT: ldrh r4, [sp, #56]
+; F16-NATIVE-NEXT: ldrh r8, [sp, #44]
+; F16-NATIVE-NEXT: vmov.32 d19[1], r2
+; F16-NATIVE-NEXT: orr r2, r4, lr, lsl #16
+; F16-NATIVE-NEXT: ldrh r6, [sp, #40]
+; F16-NATIVE-NEXT: vmov.32 d18[1], r2
+; F16-NATIVE-NEXT: orr r2, r6, r8, lsl #16
+; F16-NATIVE-NEXT: vmov.32 d17[1], r2
+; F16-NATIVE-NEXT: rsb r2, r1, #0
+; F16-NATIVE-NEXT: vdup.32 q11, r2
+; F16-NATIVE-NEXT: vand q10, q8, q11
+; F16-NATIVE-NEXT: vbic q11, q9, q11
+; F16-NATIVE-NEXT: vorr q10, q10, q11
+; F16-NATIVE-NEXT: vst1.64 {d20, d21}, [r0:128]
+; F16-NATIVE-NEXT: pop {r4, r5, r6, r7, r8, pc}
+;
+; THUMB1-LABEL: ct_v8bf16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r1
+; THUMB1-NEXT: ldr r1, [sp, #76]
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #14]
+; THUMB1-NEXT: ldr r1, [sp, #72]
+; THUMB1-NEXT: ldr r5, [sp, #40]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #12]
+; THUMB1-NEXT: ldr r1, [sp, #68]
+; THUMB1-NEXT: ldr r5, [sp, #36]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #10]
+; THUMB1-NEXT: ldr r1, [sp, #64]
+; THUMB1-NEXT: ldr r5, [sp, #32]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #8]
+; THUMB1-NEXT: ldr r1, [sp, #60]
+; THUMB1-NEXT: ldr r5, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #6]
+; THUMB1-NEXT: ldr r1, [sp, #56]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #4]
+; THUMB1-NEXT: ldr r1, [sp, #52]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r5, r3
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: ands r5, r6
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: strh r5, [r0, #2]
+; THUMB1-NEXT: ldr r1, [sp, #48]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r3, r2
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: ands r3, r5
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: strh r3, [r0]
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v8bf16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and lr, r1, #1
+; THUMB2-NEXT: ldrh.w r12, [sp, #68]
+; THUMB2-NEXT: ldrh.w r1, [sp, #36]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r4, r1, r5
+; THUMB2-NEXT: bic.w r5, r12, r5
+; THUMB2-NEXT: orrs r4, r5
+; THUMB2-NEXT: strh r4, [r0, #14]
+; THUMB2-NEXT: ldrh.w r12, [sp, #64]
+; THUMB2-NEXT: ldrh.w r5, [sp, #32]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #12]
+; THUMB2-NEXT: ldrh.w r12, [sp, #60]
+; THUMB2-NEXT: ldrh.w r5, [sp, #28]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #10]
+; THUMB2-NEXT: ldrh.w r12, [sp, #56]
+; THUMB2-NEXT: ldrh.w r5, [sp, #24]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #8]
+; THUMB2-NEXT: ldrh.w r12, [sp, #52]
+; THUMB2-NEXT: ldrh.w r5, [sp, #20]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #6]
+; THUMB2-NEXT: ldrh.w r12, [sp, #48]
+; THUMB2-NEXT: ldrh.w r5, [sp, #16]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #4]
+; THUMB2-NEXT: ldrh.w r1, [sp, #44]
+; THUMB2-NEXT: rsb.w r4, lr, #0
+; THUMB2-NEXT: and.w r5, r3, r4
+; THUMB2-NEXT: bic.w r4, r1, r4
+; THUMB2-NEXT: orrs r5, r4
+; THUMB2-NEXT: strh r5, [r0, #2]
+; THUMB2-NEXT: ldrh.w r1, [sp, #40]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r3, r2, r5
+; THUMB2-NEXT: bic.w r5, r1, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: strh r3, [r0]
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <8 x bfloat> @llvm.ct.select.v8bf16(i1 %cond, <8 x bfloat> %a, <8 x bfloat> %b)
+ ret <8 x bfloat> %sel
+}
diff --git a/llvm/test/CodeGen/ARM/ctselect-vector.ll b/llvm/test/CodeGen/ARM/ctselect-vector.ll
new file mode 100644
index 0000000000000..22619735c4535
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ctselect-vector.ll
@@ -0,0 +1,2179 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=CT %s
+; RUN: llc < %s -mtriple=armv6 -verify-machineinstrs | FileCheck --check-prefix=DEFAULT %s
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB1 %s
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB2 %s
+
+define <8 x i8> @ct_v8i8(i1 %cond, <8 x i8> %a, <8 x i8> %b) {
+; CT-LABEL: ct_v8i8:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v8i8:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and lr, r1, #1
+; DEFAULT-NEXT: ldrb r12, [sp, #68]
+; DEFAULT-NEXT: ldrb r1, [sp, #36]
+; DEFAULT-NEXT: rsb r5, lr, #0
+; DEFAULT-NEXT: and r4, r1, r5
+; DEFAULT-NEXT: bic r5, r12, r5
+; DEFAULT-NEXT: orr r4, r4, r5
+; DEFAULT-NEXT: strb r4, [r0, #7]
+; DEFAULT-NEXT: ldrb r12, [sp, #64]
+; DEFAULT-NEXT: ldrb r5, [sp, #32]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #6]
+; DEFAULT-NEXT: ldrb r12, [sp, #60]
+; DEFAULT-NEXT: ldrb r5, [sp, #28]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #5]
+; DEFAULT-NEXT: ldrb r12, [sp, #56]
+; DEFAULT-NEXT: ldrb r5, [sp, #24]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #4]
+; DEFAULT-NEXT: ldrb r12, [sp, #52]
+; DEFAULT-NEXT: ldrb r5, [sp, #20]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #3]
+; DEFAULT-NEXT: ldrb r12, [sp, #48]
+; DEFAULT-NEXT: ldrb r5, [sp, #16]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #2]
+; DEFAULT-NEXT: ldrb r1, [sp, #44]
+; DEFAULT-NEXT: rsb r4, lr, #0
+; DEFAULT-NEXT: and r5, r3, r4
+; DEFAULT-NEXT: bic r4, r1, r4
+; DEFAULT-NEXT: orr r5, r5, r4
+; DEFAULT-NEXT: strb r5, [r0, #1]
+; DEFAULT-NEXT: ldrb r1, [sp, #40]
+; DEFAULT-NEXT: rsb r5, lr, #0
+; DEFAULT-NEXT: and r3, r2, r5
+; DEFAULT-NEXT: bic r5, r1, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: strb r3, [r0]
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v8i8:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r1
+; THUMB1-NEXT: ldr r1, [sp, #76]
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #7]
+; THUMB1-NEXT: ldr r1, [sp, #72]
+; THUMB1-NEXT: ldr r5, [sp, #40]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #6]
+; THUMB1-NEXT: ldr r1, [sp, #68]
+; THUMB1-NEXT: ldr r5, [sp, #36]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #5]
+; THUMB1-NEXT: ldr r1, [sp, #64]
+; THUMB1-NEXT: ldr r5, [sp, #32]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #4]
+; THUMB1-NEXT: ldr r1, [sp, #60]
+; THUMB1-NEXT: ldr r5, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #3]
+; THUMB1-NEXT: ldr r1, [sp, #56]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #2]
+; THUMB1-NEXT: ldr r1, [sp, #52]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r5, r3
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: ands r5, r6
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: strb r5, [r0, #1]
+; THUMB1-NEXT: ldr r1, [sp, #48]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r3, r2
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: ands r3, r5
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: strb r3, [r0]
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v8i8:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and lr, r1, #1
+; THUMB2-NEXT: ldrb.w r12, [sp, #68]
+; THUMB2-NEXT: ldrb.w r1, [sp, #36]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r4, r1, r5
+; THUMB2-NEXT: bic.w r5, r12, r5
+; THUMB2-NEXT: orrs r4, r5
+; THUMB2-NEXT: strb r4, [r0, #7]
+; THUMB2-NEXT: ldrb.w r12, [sp, #64]
+; THUMB2-NEXT: ldrb.w r5, [sp, #32]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #6]
+; THUMB2-NEXT: ldrb.w r12, [sp, #60]
+; THUMB2-NEXT: ldrb.w r5, [sp, #28]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #5]
+; THUMB2-NEXT: ldrb.w r12, [sp, #56]
+; THUMB2-NEXT: ldrb.w r5, [sp, #24]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #4]
+; THUMB2-NEXT: ldrb.w r12, [sp, #52]
+; THUMB2-NEXT: ldrb.w r5, [sp, #20]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #3]
+; THUMB2-NEXT: ldrb.w r12, [sp, #48]
+; THUMB2-NEXT: ldrb.w r5, [sp, #16]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #2]
+; THUMB2-NEXT: ldrb.w r1, [sp, #44]
+; THUMB2-NEXT: rsb.w r4, lr, #0
+; THUMB2-NEXT: and.w r5, r3, r4
+; THUMB2-NEXT: bic.w r4, r1, r4
+; THUMB2-NEXT: orrs r5, r4
+; THUMB2-NEXT: strb r5, [r0, #1]
+; THUMB2-NEXT: ldrb.w r1, [sp, #40]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r3, r2, r5
+; THUMB2-NEXT: bic.w r5, r1, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: strb r3, [r0]
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <8 x i8> @llvm.ct.select.v8i8(i1 %cond, <8 x i8> %a, <8 x i8> %b)
+ ret <8 x i8> %sel
+}
+
+define <4 x i16> @ct_v4i16(i1 %cond, <4 x i16> %a, <4 x i16> %b) {
+; CT-LABEL: ct_v4i16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v4i16:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldrh r1, [sp, #24]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldrh r2, [sp, #28]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: ldrh r3, [sp, #16]
+; DEFAULT-NEXT: ldrh lr, [sp, #32]
+; DEFAULT-NEXT: rsb r4, r12, #0
+; DEFAULT-NEXT: and r2, r3, r4
+; DEFAULT-NEXT: bic r4, lr, r4
+; DEFAULT-NEXT: orr r2, r2, r4
+; DEFAULT-NEXT: ldrh lr, [sp, #36]
+; DEFAULT-NEXT: ldrh r4, [sp, #20]
+; DEFAULT-NEXT: rsb r5, r12, #0
+; DEFAULT-NEXT: and r3, r4, r5
+; DEFAULT-NEXT: bic r5, lr, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v4i16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #32]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #36]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ldr r3, [sp, #40]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r2, r5
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ands r2, r6
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: ldr r6, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r3, r6
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: ands r3, r7
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v4i16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldrh.w r1, [sp, #24]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldrh.w r2, [sp, #28]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: ldrh.w r3, [sp, #16]
+; THUMB2-NEXT: ldrh.w lr, [sp, #32]
+; THUMB2-NEXT: rsb.w r4, r12, #0
+; THUMB2-NEXT: and.w r2, r3, r4
+; THUMB2-NEXT: bic.w r4, lr, r4
+; THUMB2-NEXT: orrs r2, r4
+; THUMB2-NEXT: ldrh.w lr, [sp, #36]
+; THUMB2-NEXT: ldrh.w r4, [sp, #20]
+; THUMB2-NEXT: rsb.w r5, r12, #0
+; THUMB2-NEXT: and.w r3, r4, r5
+; THUMB2-NEXT: bic.w r5, lr, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <4 x i16> @llvm.ct.select.v4i16(i1 %cond, <4 x i16> %a, <4 x i16> %b)
+ ret <4 x i16> %sel
+}
+
+define <2 x i32> @ct_v2i32(i1 %cond, <2 x i32> %a, <2 x i32> %b) {
+; CT-LABEL: ct_v2i32:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v2i32:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #8]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #12]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: pop {r11, pc}
+;
+; THUMB1-LABEL: ct_v2i32:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r7, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #16]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #20]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: pop {r4, r5, r7, pc}
+;
+; THUMB2-LABEL: ct_v2i32:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r7, lr}
+; THUMB2-NEXT: push {r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #8]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #12]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: pop {r7, pc}
+entry:
+ %sel = call <2 x i32> @llvm.ct.select.v2i32(i1 %cond, <2 x i32> %a, <2 x i32> %b)
+ ret <2 x i32> %sel
+}
+
+define <1 x i64> @ct_v1i64(i1 %cond, <1 x i64> %a, <1 x i64> %b) {
+; CT-LABEL: ct_v1i64:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v1i64:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #8]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #12]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: pop {r11, pc}
+;
+; THUMB1-LABEL: ct_v1i64:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r7, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #16]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #20]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: pop {r4, r5, r7, pc}
+;
+; THUMB2-LABEL: ct_v1i64:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r7, lr}
+; THUMB2-NEXT: push {r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #8]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #12]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: pop {r7, pc}
+entry:
+ %sel = call <1 x i64> @llvm.ct.select.v1i64(i1 %cond, <1 x i64> %a, <1 x i64> %b)
+ ret <1 x i64> %sel
+}
+
+define <2 x float> @ct_v2f32(i1 %cond, <2 x float> %a, <2 x float> %b) {
+; CT-LABEL: ct_v2f32:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v2f32:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #8]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #12]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: pop {r11, pc}
+;
+; THUMB1-LABEL: ct_v2f32:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r7, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #16]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #20]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: pop {r4, r5, r7, pc}
+;
+; THUMB2-LABEL: ct_v2f32:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r7, lr}
+; THUMB2-NEXT: push {r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #8]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #12]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: pop {r7, pc}
+entry:
+ %sel = call <2 x float> @llvm.ct.select.v2f32(i1 %cond, <2 x float> %a, <2 x float> %b)
+ ret <2 x float> %sel
+}
+
+define <16 x i8> @ct_v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b) {
+; CT-LABEL: ct_v16i8:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d17, [sp]
+; CT-NEXT: add r1, sp, #8
+; CT-NEXT: vmov d16, r2, r3
+; CT-NEXT: vld1.64 {d18, d19}, [r1]
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 q11, r1
+; CT-NEXT: vand q10, q8, q11
+; CT-NEXT: vbic q11, q9, q11
+; CT-NEXT: vorr q10, q10, q11
+; CT-NEXT: vmov r0, r1, d20
+; CT-NEXT: vmov r2, r3, d21
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v16i8:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and lr, r1, #1
+; DEFAULT-NEXT: ldrb r12, [sp, #132]
+; DEFAULT-NEXT: ldrb r1, [sp, #68]
+; DEFAULT-NEXT: rsb r5, lr, #0
+; DEFAULT-NEXT: and r4, r1, r5
+; DEFAULT-NEXT: bic r5, r12, r5
+; DEFAULT-NEXT: orr r4, r4, r5
+; DEFAULT-NEXT: strb r4, [r0, #15]
+; DEFAULT-NEXT: ldrb r12, [sp, #128]
+; DEFAULT-NEXT: ldrb r5, [sp, #64]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #14]
+; DEFAULT-NEXT: ldrb r12, [sp, #124]
+; DEFAULT-NEXT: ldrb r5, [sp, #60]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #13]
+; DEFAULT-NEXT: ldrb r12, [sp, #120]
+; DEFAULT-NEXT: ldrb r5, [sp, #56]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #12]
+; DEFAULT-NEXT: ldrb r12, [sp, #116]
+; DEFAULT-NEXT: ldrb r5, [sp, #52]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #11]
+; DEFAULT-NEXT: ldrb r12, [sp, #112]
+; DEFAULT-NEXT: ldrb r5, [sp, #48]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #10]
+; DEFAULT-NEXT: ldrb r12, [sp, #108]
+; DEFAULT-NEXT: ldrb r5, [sp, #44]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #9]
+; DEFAULT-NEXT: ldrb r12, [sp, #104]
+; DEFAULT-NEXT: ldrb r5, [sp, #40]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #8]
+; DEFAULT-NEXT: ldrb r12, [sp, #100]
+; DEFAULT-NEXT: ldrb r5, [sp, #36]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #7]
+; DEFAULT-NEXT: ldrb r12, [sp, #96]
+; DEFAULT-NEXT: ldrb r5, [sp, #32]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #6]
+; DEFAULT-NEXT: ldrb r12, [sp, #92]
+; DEFAULT-NEXT: ldrb r5, [sp, #28]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #5]
+; DEFAULT-NEXT: ldrb r12, [sp, #88]
+; DEFAULT-NEXT: ldrb r5, [sp, #24]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #4]
+; DEFAULT-NEXT: ldrb r12, [sp, #84]
+; DEFAULT-NEXT: ldrb r5, [sp, #20]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #3]
+; DEFAULT-NEXT: ldrb r12, [sp, #80]
+; DEFAULT-NEXT: ldrb r5, [sp, #16]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strb r4, [r0, #2]
+; DEFAULT-NEXT: ldrb r1, [sp, #76]
+; DEFAULT-NEXT: rsb r4, lr, #0
+; DEFAULT-NEXT: and r5, r3, r4
+; DEFAULT-NEXT: bic r4, r1, r4
+; DEFAULT-NEXT: orr r5, r5, r4
+; DEFAULT-NEXT: strb r5, [r0, #1]
+; DEFAULT-NEXT: ldrb r1, [sp, #72]
+; DEFAULT-NEXT: rsb r5, lr, #0
+; DEFAULT-NEXT: and r3, r2, r5
+; DEFAULT-NEXT: bic r5, r1, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: strb r3, [r0]
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v16i8:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r1
+; THUMB1-NEXT: ldr r1, [sp, #140]
+; THUMB1-NEXT: ldr r5, [sp, #76]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #15]
+; THUMB1-NEXT: ldr r1, [sp, #136]
+; THUMB1-NEXT: ldr r5, [sp, #72]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #14]
+; THUMB1-NEXT: ldr r1, [sp, #132]
+; THUMB1-NEXT: ldr r5, [sp, #68]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #13]
+; THUMB1-NEXT: ldr r1, [sp, #128]
+; THUMB1-NEXT: ldr r5, [sp, #64]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #12]
+; THUMB1-NEXT: ldr r1, [sp, #124]
+; THUMB1-NEXT: ldr r5, [sp, #60]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #11]
+; THUMB1-NEXT: ldr r1, [sp, #120]
+; THUMB1-NEXT: ldr r5, [sp, #56]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #10]
+; THUMB1-NEXT: ldr r1, [sp, #116]
+; THUMB1-NEXT: ldr r5, [sp, #52]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #9]
+; THUMB1-NEXT: ldr r1, [sp, #112]
+; THUMB1-NEXT: ldr r5, [sp, #48]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #8]
+; THUMB1-NEXT: ldr r1, [sp, #108]
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #7]
+; THUMB1-NEXT: ldr r1, [sp, #104]
+; THUMB1-NEXT: ldr r5, [sp, #40]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #6]
+; THUMB1-NEXT: ldr r1, [sp, #100]
+; THUMB1-NEXT: ldr r5, [sp, #36]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #5]
+; THUMB1-NEXT: ldr r1, [sp, #96]
+; THUMB1-NEXT: ldr r5, [sp, #32]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #4]
+; THUMB1-NEXT: ldr r1, [sp, #92]
+; THUMB1-NEXT: ldr r5, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #3]
+; THUMB1-NEXT: ldr r1, [sp, #88]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strb r6, [r0, #2]
+; THUMB1-NEXT: ldr r1, [sp, #84]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r5, r3
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: ands r5, r6
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: strb r5, [r0, #1]
+; THUMB1-NEXT: ldr r1, [sp, #80]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r3, r2
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: ands r3, r5
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: strb r3, [r0]
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v16i8:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and lr, r1, #1
+; THUMB2-NEXT: ldrb.w r12, [sp, #132]
+; THUMB2-NEXT: ldrb.w r1, [sp, #68]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r4, r1, r5
+; THUMB2-NEXT: bic.w r5, r12, r5
+; THUMB2-NEXT: orrs r4, r5
+; THUMB2-NEXT: strb r4, [r0, #15]
+; THUMB2-NEXT: ldrb.w r12, [sp, #128]
+; THUMB2-NEXT: ldrb.w r5, [sp, #64]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #14]
+; THUMB2-NEXT: ldrb.w r12, [sp, #124]
+; THUMB2-NEXT: ldrb.w r5, [sp, #60]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #13]
+; THUMB2-NEXT: ldrb.w r12, [sp, #120]
+; THUMB2-NEXT: ldrb.w r5, [sp, #56]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #12]
+; THUMB2-NEXT: ldrb.w r12, [sp, #116]
+; THUMB2-NEXT: ldrb.w r5, [sp, #52]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #11]
+; THUMB2-NEXT: ldrb.w r12, [sp, #112]
+; THUMB2-NEXT: ldrb.w r5, [sp, #48]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #10]
+; THUMB2-NEXT: ldrb.w r12, [sp, #108]
+; THUMB2-NEXT: ldrb.w r5, [sp, #44]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #9]
+; THUMB2-NEXT: ldrb.w r12, [sp, #104]
+; THUMB2-NEXT: ldrb.w r5, [sp, #40]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #8]
+; THUMB2-NEXT: ldrb.w r12, [sp, #100]
+; THUMB2-NEXT: ldrb.w r5, [sp, #36]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #7]
+; THUMB2-NEXT: ldrb.w r12, [sp, #96]
+; THUMB2-NEXT: ldrb.w r5, [sp, #32]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #6]
+; THUMB2-NEXT: ldrb.w r12, [sp, #92]
+; THUMB2-NEXT: ldrb.w r5, [sp, #28]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #5]
+; THUMB2-NEXT: ldrb.w r12, [sp, #88]
+; THUMB2-NEXT: ldrb.w r5, [sp, #24]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #4]
+; THUMB2-NEXT: ldrb.w r12, [sp, #84]
+; THUMB2-NEXT: ldrb.w r5, [sp, #20]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #3]
+; THUMB2-NEXT: ldrb.w r12, [sp, #80]
+; THUMB2-NEXT: ldrb.w r5, [sp, #16]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strb r4, [r0, #2]
+; THUMB2-NEXT: ldrb.w r1, [sp, #76]
+; THUMB2-NEXT: rsb.w r4, lr, #0
+; THUMB2-NEXT: and.w r5, r3, r4
+; THUMB2-NEXT: bic.w r4, r1, r4
+; THUMB2-NEXT: orrs r5, r4
+; THUMB2-NEXT: strb r5, [r0, #1]
+; THUMB2-NEXT: ldrb.w r1, [sp, #72]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r3, r2, r5
+; THUMB2-NEXT: bic.w r5, r1, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: strb r3, [r0]
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <16 x i8> @llvm.ct.select.v16i8(i1 %cond, <16 x i8> %a, <16 x i8> %b)
+ ret <16 x i8> %sel
+}
+
+define <8 x i16> @ct_v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b) {
+; CT-LABEL: ct_v8i16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d17, [sp]
+; CT-NEXT: add r1, sp, #8
+; CT-NEXT: vmov d16, r2, r3
+; CT-NEXT: vld1.64 {d18, d19}, [r1]
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 q11, r1
+; CT-NEXT: vand q10, q8, q11
+; CT-NEXT: vbic q11, q9, q11
+; CT-NEXT: vorr q10, q10, q11
+; CT-NEXT: vmov r0, r1, d20
+; CT-NEXT: vmov r2, r3, d21
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v8i16:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and lr, r1, #1
+; DEFAULT-NEXT: ldrh r12, [sp, #68]
+; DEFAULT-NEXT: ldrh r1, [sp, #36]
+; DEFAULT-NEXT: rsb r5, lr, #0
+; DEFAULT-NEXT: and r4, r1, r5
+; DEFAULT-NEXT: bic r5, r12, r5
+; DEFAULT-NEXT: orr r4, r4, r5
+; DEFAULT-NEXT: strh r4, [r0, #14]
+; DEFAULT-NEXT: ldrh r12, [sp, #64]
+; DEFAULT-NEXT: ldrh r5, [sp, #32]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strh r4, [r0, #12]
+; DEFAULT-NEXT: ldrh r12, [sp, #60]
+; DEFAULT-NEXT: ldrh r5, [sp, #28]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strh r4, [r0, #10]
+; DEFAULT-NEXT: ldrh r12, [sp, #56]
+; DEFAULT-NEXT: ldrh r5, [sp, #24]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strh r4, [r0, #8]
+; DEFAULT-NEXT: ldrh r12, [sp, #52]
+; DEFAULT-NEXT: ldrh r5, [sp, #20]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strh r4, [r0, #6]
+; DEFAULT-NEXT: ldrh r12, [sp, #48]
+; DEFAULT-NEXT: ldrh r5, [sp, #16]
+; DEFAULT-NEXT: rsb r1, lr, #0
+; DEFAULT-NEXT: and r4, r5, r1
+; DEFAULT-NEXT: bic r1, r12, r1
+; DEFAULT-NEXT: orr r4, r4, r1
+; DEFAULT-NEXT: strh r4, [r0, #4]
+; DEFAULT-NEXT: ldrh r1, [sp, #44]
+; DEFAULT-NEXT: rsb r4, lr, #0
+; DEFAULT-NEXT: and r5, r3, r4
+; DEFAULT-NEXT: bic r4, r1, r4
+; DEFAULT-NEXT: orr r5, r5, r4
+; DEFAULT-NEXT: strh r5, [r0, #2]
+; DEFAULT-NEXT: ldrh r1, [sp, #40]
+; DEFAULT-NEXT: rsb r5, lr, #0
+; DEFAULT-NEXT: and r3, r2, r5
+; DEFAULT-NEXT: bic r5, r1, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: strh r3, [r0]
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v8i16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r1
+; THUMB1-NEXT: ldr r1, [sp, #76]
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #14]
+; THUMB1-NEXT: ldr r1, [sp, #72]
+; THUMB1-NEXT: ldr r5, [sp, #40]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #12]
+; THUMB1-NEXT: ldr r1, [sp, #68]
+; THUMB1-NEXT: ldr r5, [sp, #36]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #10]
+; THUMB1-NEXT: ldr r1, [sp, #64]
+; THUMB1-NEXT: ldr r5, [sp, #32]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #8]
+; THUMB1-NEXT: ldr r1, [sp, #60]
+; THUMB1-NEXT: ldr r5, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #6]
+; THUMB1-NEXT: ldr r1, [sp, #56]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r6, r5
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: ands r6, r7
+; THUMB1-NEXT: eors r6, r1
+; THUMB1-NEXT: strh r6, [r0, #4]
+; THUMB1-NEXT: ldr r1, [sp, #52]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r5, r3
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: ands r5, r6
+; THUMB1-NEXT: eors r5, r1
+; THUMB1-NEXT: strh r5, [r0, #2]
+; THUMB1-NEXT: ldr r1, [sp, #48]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r3, r2
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: ands r3, r5
+; THUMB1-NEXT: eors r3, r1
+; THUMB1-NEXT: strh r3, [r0]
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v8i16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and lr, r1, #1
+; THUMB2-NEXT: ldrh.w r12, [sp, #68]
+; THUMB2-NEXT: ldrh.w r1, [sp, #36]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r4, r1, r5
+; THUMB2-NEXT: bic.w r5, r12, r5
+; THUMB2-NEXT: orrs r4, r5
+; THUMB2-NEXT: strh r4, [r0, #14]
+; THUMB2-NEXT: ldrh.w r12, [sp, #64]
+; THUMB2-NEXT: ldrh.w r5, [sp, #32]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #12]
+; THUMB2-NEXT: ldrh.w r12, [sp, #60]
+; THUMB2-NEXT: ldrh.w r5, [sp, #28]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #10]
+; THUMB2-NEXT: ldrh.w r12, [sp, #56]
+; THUMB2-NEXT: ldrh.w r5, [sp, #24]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #8]
+; THUMB2-NEXT: ldrh.w r12, [sp, #52]
+; THUMB2-NEXT: ldrh.w r5, [sp, #20]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #6]
+; THUMB2-NEXT: ldrh.w r12, [sp, #48]
+; THUMB2-NEXT: ldrh.w r5, [sp, #16]
+; THUMB2-NEXT: rsb.w r1, lr, #0
+; THUMB2-NEXT: and.w r4, r5, r1
+; THUMB2-NEXT: bic.w r1, r12, r1
+; THUMB2-NEXT: orrs r4, r1
+; THUMB2-NEXT: strh r4, [r0, #4]
+; THUMB2-NEXT: ldrh.w r1, [sp, #44]
+; THUMB2-NEXT: rsb.w r4, lr, #0
+; THUMB2-NEXT: and.w r5, r3, r4
+; THUMB2-NEXT: bic.w r4, r1, r4
+; THUMB2-NEXT: orrs r5, r4
+; THUMB2-NEXT: strh r5, [r0, #2]
+; THUMB2-NEXT: ldrh.w r1, [sp, #40]
+; THUMB2-NEXT: rsb.w r5, lr, #0
+; THUMB2-NEXT: and.w r3, r2, r5
+; THUMB2-NEXT: bic.w r5, r1, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: strh r3, [r0]
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <8 x i16> @llvm.ct.select.v8i16(i1 %cond, <8 x i16> %a, <8 x i16> %b)
+ ret <8 x i16> %sel
+}
+
+define <4 x i32> @ct_v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b) {
+; CT-LABEL: ct_v4i32:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d17, [sp]
+; CT-NEXT: add r1, sp, #8
+; CT-NEXT: vmov d16, r2, r3
+; CT-NEXT: vld1.64 {d18, d19}, [r1]
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 q11, r1
+; CT-NEXT: vand q10, q8, q11
+; CT-NEXT: vbic q11, q9, q11
+; CT-NEXT: vorr q10, q10, q11
+; CT-NEXT: vmov r0, r1, d20
+; CT-NEXT: vmov r2, r3, d21
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v4i32:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #24]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #28]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: ldr r3, [sp, #16]
+; DEFAULT-NEXT: ldr lr, [sp, #32]
+; DEFAULT-NEXT: rsb r4, r12, #0
+; DEFAULT-NEXT: and r2, r3, r4
+; DEFAULT-NEXT: bic r4, lr, r4
+; DEFAULT-NEXT: orr r2, r2, r4
+; DEFAULT-NEXT: ldr lr, [sp, #36]
+; DEFAULT-NEXT: ldr r4, [sp, #20]
+; DEFAULT-NEXT: rsb r5, r12, #0
+; DEFAULT-NEXT: and r3, r4, r5
+; DEFAULT-NEXT: bic r5, lr, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v4i32:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #32]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #36]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ldr r3, [sp, #40]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r2, r5
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ands r2, r6
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: ldr r6, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r3, r6
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: ands r3, r7
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v4i32:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #24]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #28]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: ldr r3, [sp, #16]
+; THUMB2-NEXT: ldr.w lr, [sp, #32]
+; THUMB2-NEXT: rsb.w r4, r12, #0
+; THUMB2-NEXT: and.w r2, r3, r4
+; THUMB2-NEXT: bic.w r4, lr, r4
+; THUMB2-NEXT: orrs r2, r4
+; THUMB2-NEXT: ldr.w lr, [sp, #36]
+; THUMB2-NEXT: ldr r4, [sp, #20]
+; THUMB2-NEXT: rsb.w r5, r12, #0
+; THUMB2-NEXT: and.w r3, r4, r5
+; THUMB2-NEXT: bic.w r5, lr, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <4 x i32> @llvm.ct.select.v4i32(i1 %cond, <4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %sel
+}
+
+define <2 x i64> @ct_v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b) {
+; CT-LABEL: ct_v2i64:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d17, [sp]
+; CT-NEXT: add r1, sp, #8
+; CT-NEXT: vmov d16, r2, r3
+; CT-NEXT: vld1.64 {d18, d19}, [r1]
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 q11, r1
+; CT-NEXT: vand q10, q8, q11
+; CT-NEXT: vbic q11, q9, q11
+; CT-NEXT: vorr q10, q10, q11
+; CT-NEXT: vmov r0, r1, d20
+; CT-NEXT: vmov r2, r3, d21
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v2i64:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #24]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #28]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: ldr r3, [sp, #16]
+; DEFAULT-NEXT: ldr lr, [sp, #32]
+; DEFAULT-NEXT: rsb r4, r12, #0
+; DEFAULT-NEXT: and r2, r3, r4
+; DEFAULT-NEXT: bic r4, lr, r4
+; DEFAULT-NEXT: orr r2, r2, r4
+; DEFAULT-NEXT: ldr lr, [sp, #36]
+; DEFAULT-NEXT: ldr r4, [sp, #20]
+; DEFAULT-NEXT: rsb r5, r12, #0
+; DEFAULT-NEXT: and r3, r4, r5
+; DEFAULT-NEXT: bic r5, lr, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v2i64:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #32]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #36]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ldr r3, [sp, #40]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r2, r5
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ands r2, r6
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: ldr r6, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r3, r6
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: ands r3, r7
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v2i64:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #24]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #28]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: ldr r3, [sp, #16]
+; THUMB2-NEXT: ldr.w lr, [sp, #32]
+; THUMB2-NEXT: rsb.w r4, r12, #0
+; THUMB2-NEXT: and.w r2, r3, r4
+; THUMB2-NEXT: bic.w r4, lr, r4
+; THUMB2-NEXT: orrs r2, r4
+; THUMB2-NEXT: ldr.w lr, [sp, #36]
+; THUMB2-NEXT: ldr r4, [sp, #20]
+; THUMB2-NEXT: rsb.w r5, r12, #0
+; THUMB2-NEXT: and.w r3, r4, r5
+; THUMB2-NEXT: bic.w r5, lr, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <2 x i64> @llvm.ct.select.v2i64(i1 %cond, <2 x i64> %a, <2 x i64> %b)
+ ret <2 x i64> %sel
+}
+
+define <4 x float> @ct_v4f32(i1 %cond, <4 x float> %a, <4 x float> %b) {
+; CT-LABEL: ct_v4f32:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d17, [sp]
+; CT-NEXT: add r1, sp, #8
+; CT-NEXT: vmov d16, r2, r3
+; CT-NEXT: vld1.64 {d18, d19}, [r1]
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 q11, r1
+; CT-NEXT: vand q10, q8, q11
+; CT-NEXT: vbic q11, q9, q11
+; CT-NEXT: vorr q10, q10, q11
+; CT-NEXT: vmov r0, r1, d20
+; CT-NEXT: vmov r2, r3, d21
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v4f32:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #24]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #28]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: ldr r3, [sp, #16]
+; DEFAULT-NEXT: ldr lr, [sp, #32]
+; DEFAULT-NEXT: rsb r4, r12, #0
+; DEFAULT-NEXT: and r2, r3, r4
+; DEFAULT-NEXT: bic r4, lr, r4
+; DEFAULT-NEXT: orr r2, r2, r4
+; DEFAULT-NEXT: ldr lr, [sp, #36]
+; DEFAULT-NEXT: ldr r4, [sp, #20]
+; DEFAULT-NEXT: rsb r5, r12, #0
+; DEFAULT-NEXT: and r3, r4, r5
+; DEFAULT-NEXT: bic r5, lr, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v4f32:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #32]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #36]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ldr r3, [sp, #40]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r2, r5
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ands r2, r6
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: ldr r6, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r3, r6
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: ands r3, r7
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v4f32:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #24]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #28]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: ldr r3, [sp, #16]
+; THUMB2-NEXT: ldr.w lr, [sp, #32]
+; THUMB2-NEXT: rsb.w r4, r12, #0
+; THUMB2-NEXT: and.w r2, r3, r4
+; THUMB2-NEXT: bic.w r4, lr, r4
+; THUMB2-NEXT: orrs r2, r4
+; THUMB2-NEXT: ldr.w lr, [sp, #36]
+; THUMB2-NEXT: ldr r4, [sp, #20]
+; THUMB2-NEXT: rsb.w r5, r12, #0
+; THUMB2-NEXT: and.w r3, r4, r5
+; THUMB2-NEXT: bic.w r5, lr, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <4 x float> @llvm.ct.select.v4f32(i1 %cond, <4 x float> %a, <4 x float> %b)
+ ret <4 x float> %sel
+}
+
+define <2 x double> @ct_v2f64(i1 %cond, <2 x double> %a, <2 x double> %b) {
+; CT-LABEL: ct_v2f64:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d17, [sp]
+; CT-NEXT: add r1, sp, #8
+; CT-NEXT: vmov d16, r2, r3
+; CT-NEXT: vld1.64 {d18, d19}, [r1]
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 q11, r1
+; CT-NEXT: vand q10, q8, q11
+; CT-NEXT: vbic q11, q9, q11
+; CT-NEXT: vorr q10, q10, q11
+; CT-NEXT: vmov r0, r1, d20
+; CT-NEXT: vmov r2, r3, d21
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v2f64:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #24]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #28]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: ldr r3, [sp, #16]
+; DEFAULT-NEXT: ldr lr, [sp, #32]
+; DEFAULT-NEXT: rsb r4, r12, #0
+; DEFAULT-NEXT: and r2, r3, r4
+; DEFAULT-NEXT: bic r4, lr, r4
+; DEFAULT-NEXT: orr r2, r2, r4
+; DEFAULT-NEXT: ldr lr, [sp, #36]
+; DEFAULT-NEXT: ldr r4, [sp, #20]
+; DEFAULT-NEXT: rsb r5, r12, #0
+; DEFAULT-NEXT: and r3, r4, r5
+; DEFAULT-NEXT: bic r5, lr, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v2f64:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: .pad #4
+; THUMB1-NEXT: sub sp, #4
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #32]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #36]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ldr r3, [sp, #40]
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r2, r5
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ands r2, r6
+; THUMB1-NEXT: eors r2, r3
+; THUMB1-NEXT: ldr r5, [sp, #44]
+; THUMB1-NEXT: ldr r6, [sp, #28]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r3, r6
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: ands r3, r7
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: add sp, #4
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v2f64:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #24]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #28]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: ldr r3, [sp, #16]
+; THUMB2-NEXT: ldr.w lr, [sp, #32]
+; THUMB2-NEXT: rsb.w r4, r12, #0
+; THUMB2-NEXT: and.w r2, r3, r4
+; THUMB2-NEXT: bic.w r4, lr, r4
+; THUMB2-NEXT: orrs r2, r4
+; THUMB2-NEXT: ldr.w lr, [sp, #36]
+; THUMB2-NEXT: ldr r4, [sp, #20]
+; THUMB2-NEXT: rsb.w r5, r12, #0
+; THUMB2-NEXT: and.w r3, r4, r5
+; THUMB2-NEXT: bic.w r5, lr, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <2 x double> @llvm.ct.select.v2f64(i1 %cond, <2 x double> %a, <2 x double> %b)
+ ret <2 x double> %sel
+}
+
+;
+; itty bitty vector type edge cases follow. these should be scalarised.
+;
+define <1 x i8> @ct_v1i8(i1 %cond, <1 x i8> %a, <1 x i8> %b) {
+; CT-LABEL: ct_v1i8:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v1i8:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_v1i8:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_v1i8:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+entry:
+ %sel = call <1 x i8> @llvm.ct.select.i8(i1 %cond, <1 x i8> %a, <1 x i8> %b)
+ ret <1 x i8> %sel
+}
+
+define <2 x i8> @ct_v2i8(i1 %cond, <2 x i8> %a, <2 x i8> %b) {
+; CT-LABEL: ct_v2i8:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v2i8:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r1, lr
+; DEFAULT-NEXT: bic lr, r3, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldrb r3, [sp, #8]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r2, lr
+; DEFAULT-NEXT: bic lr, r3, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: pop {r11, pc}
+;
+; THUMB1-LABEL: ct_v2i8:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r7, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r3
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r3
+; THUMB1-NEXT: ldr r3, [sp, #16]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r2
+; THUMB1-NEXT: eors r1, r3
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r3
+; THUMB1-NEXT: pop {r4, r5, r7, pc}
+;
+; THUMB2-LABEL: ct_v2i8:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r7, lr}
+; THUMB2-NEXT: push {r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r1, lr
+; THUMB2-NEXT: bic.w lr, r3, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldrb.w r3, [sp, #8]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r2, lr
+; THUMB2-NEXT: bic.w lr, r3, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: pop {r7, pc}
+entry:
+ %sel = call <2 x i8> @llvm.ct.select.i16(i1 %cond, <2 x i8> %a, <2 x i8> %b)
+ ret <2 x i8> %sel
+}
+
+define <4 x i8> @ct_v4i8(i1 %cond, <4 x i8> %a, <4 x i8> %b) {
+; CT-LABEL: ct_v4i8:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v4i8:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r4, r5, r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldrb lr, [sp, #20]
+; DEFAULT-NEXT: rsb r4, r12, #0
+; DEFAULT-NEXT: and r0, r1, r4
+; DEFAULT-NEXT: bic r4, lr, r4
+; DEFAULT-NEXT: orr r0, r0, r4
+; DEFAULT-NEXT: ldrb r4, [sp, #24]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r2, lr
+; DEFAULT-NEXT: bic lr, r4, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: ldrb r4, [sp, #28]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r2, r3, lr
+; DEFAULT-NEXT: bic lr, r4, lr
+; DEFAULT-NEXT: orr r2, r2, lr
+; DEFAULT-NEXT: ldrb lr, [sp, #32]
+; DEFAULT-NEXT: ldrb r4, [sp, #16]
+; DEFAULT-NEXT: rsb r5, r12, #0
+; DEFAULT-NEXT: and r3, r4, r5
+; DEFAULT-NEXT: bic r5, lr, r5
+; DEFAULT-NEXT: orr r3, r3, r5
+; DEFAULT-NEXT: pop {r4, r5, r11, pc}
+;
+; THUMB1-LABEL: ct_v4i8:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r6, r7, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r5, [sp, #24]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r5
+; THUMB1-NEXT: ands r0, r6
+; THUMB1-NEXT: eors r0, r5
+; THUMB1-NEXT: ldr r5, [sp, #28]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r1, r2
+; THUMB1-NEXT: eors r1, r5
+; THUMB1-NEXT: ands r1, r6
+; THUMB1-NEXT: eors r1, r5
+; THUMB1-NEXT: ldr r5, [sp, #32]
+; THUMB1-NEXT: mov r6, r4
+; THUMB1-NEXT: lsls r6, r6, #31
+; THUMB1-NEXT: asrs r6, r6, #31
+; THUMB1-NEXT: mov r2, r3
+; THUMB1-NEXT: eors r2, r5
+; THUMB1-NEXT: ands r2, r6
+; THUMB1-NEXT: eors r2, r5
+; THUMB1-NEXT: ldr r5, [sp, #36]
+; THUMB1-NEXT: ldr r6, [sp, #20]
+; THUMB1-NEXT: mov r7, r4
+; THUMB1-NEXT: lsls r7, r7, #31
+; THUMB1-NEXT: asrs r7, r7, #31
+; THUMB1-NEXT: mov r3, r6
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: ands r3, r7
+; THUMB1-NEXT: eors r3, r5
+; THUMB1-NEXT: pop {r4, r5, r6, r7, pc}
+;
+; THUMB2-LABEL: ct_v4i8:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r4, r5, r7, lr}
+; THUMB2-NEXT: push {r4, r5, r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldrb.w lr, [sp, #20]
+; THUMB2-NEXT: rsb.w r4, r12, #0
+; THUMB2-NEXT: and.w r0, r1, r4
+; THUMB2-NEXT: bic.w r4, lr, r4
+; THUMB2-NEXT: orrs r0, r4
+; THUMB2-NEXT: ldrb.w r4, [sp, #24]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r2, lr
+; THUMB2-NEXT: bic.w lr, r4, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: ldrb.w r4, [sp, #28]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r2, r3, lr
+; THUMB2-NEXT: bic.w lr, r4, lr
+; THUMB2-NEXT: orr.w r2, r2, lr
+; THUMB2-NEXT: ldrb.w lr, [sp, #32]
+; THUMB2-NEXT: ldrb.w r4, [sp, #16]
+; THUMB2-NEXT: rsb.w r5, r12, #0
+; THUMB2-NEXT: and.w r3, r4, r5
+; THUMB2-NEXT: bic.w r5, lr, r5
+; THUMB2-NEXT: orrs r3, r5
+; THUMB2-NEXT: pop {r4, r5, r7, pc}
+entry:
+ %sel = call <4 x i8> @llvm.ct.select.i32(i1 %cond, <4 x i8> %a, <4 x i8> %b)
+ ret <4 x i8> %sel
+}
+
+define <1 x i16> @ct_v1i16(i1 %cond, <1 x i16> %a, <1 x i16> %b) {
+; CT-LABEL: ct_v1i16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v1i16:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_v1i16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_v1i16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+entry:
+ %sel = call <1 x i16> @llvm.ct.select.i16(i1 %cond, <1 x i16> %a, <1 x i16> %b)
+ ret <1 x i16> %sel
+}
+
+define <2 x i16> @ct_v2i16(i1 %cond, <2 x i16> %a, <2 x i16> %b) {
+; CT-LABEL: ct_v2i16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v2i16:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r1, lr
+; DEFAULT-NEXT: bic lr, r3, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldrh r3, [sp, #8]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r2, lr
+; DEFAULT-NEXT: bic lr, r3, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: pop {r11, pc}
+;
+; THUMB1-LABEL: ct_v2i16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r7, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r3
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r3
+; THUMB1-NEXT: ldr r3, [sp, #16]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r2
+; THUMB1-NEXT: eors r1, r3
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r3
+; THUMB1-NEXT: pop {r4, r5, r7, pc}
+;
+; THUMB2-LABEL: ct_v2i16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r7, lr}
+; THUMB2-NEXT: push {r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r1, lr
+; THUMB2-NEXT: bic.w lr, r3, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldrh.w r3, [sp, #8]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r2, lr
+; THUMB2-NEXT: bic.w lr, r3, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: pop {r7, pc}
+entry:
+ %sel = call <2 x i16> @llvm.ct.select.i32(i1 %cond, <2 x i16> %a, <2 x i16> %b)
+ ret <2 x i16> %sel
+}
+
+define <1 x i32> @ct_v1i32(i1 %cond, <1 x i32> %a, <1 x i32> %b) {
+; CT-LABEL: ct_v1i32:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v1i32:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_v1i32:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_v1i32:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+entry:
+ %sel = call <1 x i32> @llvm.ct.select.i32(i1 %cond, <1 x i32> %a, <1 x i32> %b)
+ ret <1 x i32> %sel
+}
+
+define <1 x float> @ct_v1f32(i1 %cond, <1 x float> %a, <1 x float> %b) {
+; CT-LABEL: ct_v1f32:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: vmov s0, r2
+; CT-NEXT: vmov s2, r1
+; CT-NEXT: vmov r2, s2
+; CT-NEXT: vmov r3, s0
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: and r2, r2, r1
+; CT-NEXT: bic r1, r3, r1
+; CT-NEXT: orr r2, r2, r1
+; CT-NEXT: vmov s4, r2
+; CT-NEXT: vmov r0, s4
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_v1f32:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_v1f32:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_v1f32:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+entry:
+ %sel = call <1 x float> @llvm.ct.select.f32(i1 %cond, <1 x float> %a, <1 x float> %b)
+ ret <1 x float> %sel
+}
diff --git a/llvm/test/CodeGen/ARM/ctselect.ll b/llvm/test/CodeGen/ARM/ctselect.ll
new file mode 100644
index 0000000000000..7e64c90a2a9b1
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/ctselect.ll
@@ -0,0 +1,555 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=armv7-none-eabi -verify-machineinstrs | FileCheck --check-prefixes=CT %s
+; RUN: llc < %s -mtriple=armv6 -verify-machineinstrs | FileCheck --check-prefix=DEFAULT %s
+; RUN: llc < %s -mtriple=thumbv6m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB1 %s
+; RUN: llc < %s -mtriple=thumbv7m-none-eabi -verify-machineinstrs | FileCheck --check-prefix=THUMB2 %s
+; RUN: llc < %s -mtriple=thumbv7-linux-gnueabihf -mcpu=cortex-a9 -verify-machineinstrs | FileCheck --check-prefix=CORTEXA9 %s
+; RUN: llc < %s -mtriple=armv7-linux-gnueabihf -mcpu=cortex-a9 -verify-machineinstrs | FileCheck --check-prefix=CORTEX-NOTHUMB %s
+
+define i1 @ct_i1(i1 %cond, i1 %a, i1 %b) {
+; CT-LABEL: ct_i1:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_i1:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_i1:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_i1:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+;
+; CORTEXA9-LABEL: ct_i1:
+; CORTEXA9: @ %bb.0: @ %entry
+; CORTEXA9-NEXT: and r3, r0, #1
+; CORTEXA9-NEXT: rsb.w r12, r3, #0
+; CORTEXA9-NEXT: and.w r0, r1, r12
+; CORTEXA9-NEXT: bic.w r12, r2, r12
+; CORTEXA9-NEXT: orr.w r0, r0, r12
+; CORTEXA9-NEXT: bx lr
+;
+; CORTEX-NOTHUMB-LABEL: ct_i1:
+; CORTEX-NOTHUMB: @ %bb.0: @ %entry
+; CORTEX-NOTHUMB-NEXT: and r3, r0, #1
+; CORTEX-NOTHUMB-NEXT: rsb r12, r3, #0
+; CORTEX-NOTHUMB-NEXT: and r0, r1, r12
+; CORTEX-NOTHUMB-NEXT: bic r12, r2, r12
+; CORTEX-NOTHUMB-NEXT: orr r0, r0, r12
+; CORTEX-NOTHUMB-NEXT: bx lr
+entry:
+ %sel = call i1 @llvm.ct.select.i1(i1 %cond, i1 %a, i1 %b)
+ ret i1 %sel
+}
+
+define i8 @ct_int8(i1 %cond, i8 %a, i8 %b) {
+; CT-LABEL: ct_int8:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_int8:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_int8:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_int8:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+;
+; CORTEXA9-LABEL: ct_int8:
+; CORTEXA9: @ %bb.0: @ %entry
+; CORTEXA9-NEXT: and r3, r0, #1
+; CORTEXA9-NEXT: rsb.w r12, r3, #0
+; CORTEXA9-NEXT: and.w r0, r1, r12
+; CORTEXA9-NEXT: bic.w r12, r2, r12
+; CORTEXA9-NEXT: orr.w r0, r0, r12
+; CORTEXA9-NEXT: bx lr
+;
+; CORTEX-NOTHUMB-LABEL: ct_int8:
+; CORTEX-NOTHUMB: @ %bb.0: @ %entry
+; CORTEX-NOTHUMB-NEXT: and r3, r0, #1
+; CORTEX-NOTHUMB-NEXT: rsb r12, r3, #0
+; CORTEX-NOTHUMB-NEXT: and r0, r1, r12
+; CORTEX-NOTHUMB-NEXT: bic r12, r2, r12
+; CORTEX-NOTHUMB-NEXT: orr r0, r0, r12
+; CORTEX-NOTHUMB-NEXT: bx lr
+entry:
+ %sel = call i8 @llvm.ct.select.i8(i1 %cond, i8 %a, i8 %b)
+ ret i8 %sel
+}
+
+define i16 @ct_int16(i1 %cond, i16 %a, i16 %b) {
+; CT-LABEL: ct_int16:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_int16:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_int16:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_int16:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+;
+; CORTEXA9-LABEL: ct_int16:
+; CORTEXA9: @ %bb.0: @ %entry
+; CORTEXA9-NEXT: and r3, r0, #1
+; CORTEXA9-NEXT: rsb.w r12, r3, #0
+; CORTEXA9-NEXT: and.w r0, r1, r12
+; CORTEXA9-NEXT: bic.w r12, r2, r12
+; CORTEXA9-NEXT: orr.w r0, r0, r12
+; CORTEXA9-NEXT: bx lr
+;
+; CORTEX-NOTHUMB-LABEL: ct_int16:
+; CORTEX-NOTHUMB: @ %bb.0: @ %entry
+; CORTEX-NOTHUMB-NEXT: and r3, r0, #1
+; CORTEX-NOTHUMB-NEXT: rsb r12, r3, #0
+; CORTEX-NOTHUMB-NEXT: and r0, r1, r12
+; CORTEX-NOTHUMB-NEXT: bic r12, r2, r12
+; CORTEX-NOTHUMB-NEXT: orr r0, r0, r12
+; CORTEX-NOTHUMB-NEXT: bx lr
+entry:
+ %sel = call i16 @llvm.ct.select.i16(i1 %cond, i16 %a, i16 %b)
+ ret i16 %sel
+}
+
+define i32 @ct_int32(i1 %cond, i32 %a, i32 %b) {
+; CT-LABEL: ct_int32:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r3, r0, #1
+; CT-NEXT: rsb r12, r3, #0
+; CT-NEXT: and r0, r1, r12
+; CT-NEXT: bic r12, r2, r12
+; CT-NEXT: orr r0, r0, r12
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_int32:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_int32:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_int32:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+;
+; CORTEXA9-LABEL: ct_int32:
+; CORTEXA9: @ %bb.0: @ %entry
+; CORTEXA9-NEXT: and r3, r0, #1
+; CORTEXA9-NEXT: rsb.w r12, r3, #0
+; CORTEXA9-NEXT: and.w r0, r1, r12
+; CORTEXA9-NEXT: bic.w r12, r2, r12
+; CORTEXA9-NEXT: orr.w r0, r0, r12
+; CORTEXA9-NEXT: bx lr
+;
+; CORTEX-NOTHUMB-LABEL: ct_int32:
+; CORTEX-NOTHUMB: @ %bb.0: @ %entry
+; CORTEX-NOTHUMB-NEXT: and r3, r0, #1
+; CORTEX-NOTHUMB-NEXT: rsb r12, r3, #0
+; CORTEX-NOTHUMB-NEXT: and r0, r1, r12
+; CORTEX-NOTHUMB-NEXT: bic r12, r2, r12
+; CORTEX-NOTHUMB-NEXT: orr r0, r0, r12
+; CORTEX-NOTHUMB-NEXT: bx lr
+entry:
+ %sel = call i32 @llvm.ct.select.i32(i1 %cond, i32 %a, i32 %b)
+ ret i32 %sel
+}
+
+define i64 @ct_int64(i1 %cond, i64 %a, i64 %b) {
+; CT-LABEL: ct_int64:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: .save {r4, lr}
+; CT-NEXT: push {r4, lr}
+; CT-NEXT: ldr r1, [sp, #8]
+; CT-NEXT: and lr, r0, #1
+; CT-NEXT: ldr r12, [sp, #12]
+; CT-NEXT: rsb r4, lr, #0
+; CT-NEXT: and r0, r2, r4
+; CT-NEXT: bic r4, r1, r4
+; CT-NEXT: orr r0, r0, r4
+; CT-NEXT: rsb r2, lr, #0
+; CT-NEXT: and r1, r3, r2
+; CT-NEXT: bic r2, r12, r2
+; CT-NEXT: orr r1, r1, r2
+; CT-NEXT: pop {r4, pc}
+;
+; DEFAULT-LABEL: ct_int64:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #8]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #12]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: pop {r11, pc}
+;
+; THUMB1-LABEL: ct_int64:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r7, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #16]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #20]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: pop {r4, r5, r7, pc}
+;
+; THUMB2-LABEL: ct_int64:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r7, lr}
+; THUMB2-NEXT: push {r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #8]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #12]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: pop {r7, pc}
+;
+; CORTEXA9-LABEL: ct_int64:
+; CORTEXA9: @ %bb.0: @ %entry
+; CORTEXA9-NEXT: .save {r4, lr}
+; CORTEXA9-NEXT: push {r4, lr}
+; CORTEXA9-NEXT: ldrd r1, r12, [sp, #8]
+; CORTEXA9-NEXT: and lr, r0, #1
+; CORTEXA9-NEXT: rsb.w r4, lr, #0
+; CORTEXA9-NEXT: and.w r0, r2, r4
+; CORTEXA9-NEXT: bic.w r4, r1, r4
+; CORTEXA9-NEXT: orrs r0, r4
+; CORTEXA9-NEXT: rsb.w r2, lr, #0
+; CORTEXA9-NEXT: and.w r1, r3, r2
+; CORTEXA9-NEXT: bic.w r2, r12, r2
+; CORTEXA9-NEXT: orr.w r1, r1, r2
+; CORTEXA9-NEXT: pop {r4, pc}
+;
+; CORTEX-NOTHUMB-LABEL: ct_int64:
+; CORTEX-NOTHUMB: @ %bb.0: @ %entry
+; CORTEX-NOTHUMB-NEXT: .save {r4, lr}
+; CORTEX-NOTHUMB-NEXT: push {r4, lr}
+; CORTEX-NOTHUMB-NEXT: ldr r12, [sp, #12]
+; CORTEX-NOTHUMB-NEXT: and lr, r0, #1
+; CORTEX-NOTHUMB-NEXT: ldr r1, [sp, #8]
+; CORTEX-NOTHUMB-NEXT: rsb r4, lr, #0
+; CORTEX-NOTHUMB-NEXT: and r0, r2, r4
+; CORTEX-NOTHUMB-NEXT: bic r4, r1, r4
+; CORTEX-NOTHUMB-NEXT: orr r0, r0, r4
+; CORTEX-NOTHUMB-NEXT: rsb r2, lr, #0
+; CORTEX-NOTHUMB-NEXT: and r1, r3, r2
+; CORTEX-NOTHUMB-NEXT: bic r2, r12, r2
+; CORTEX-NOTHUMB-NEXT: orr r1, r1, r2
+; CORTEX-NOTHUMB-NEXT: pop {r4, pc}
+entry:
+ %sel = call i64 @llvm.ct.select.i64(i1 %cond, i64 %a, i64 %b)
+ ret i64 %sel
+}
+
+define float @ct_float(i1 %cond, float %a, float %b) {
+; CT-LABEL: ct_float:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: vmov s0, r2
+; CT-NEXT: vmov s2, r1
+; CT-NEXT: vmov r2, s2
+; CT-NEXT: vmov r3, s0
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: and r2, r2, r1
+; CT-NEXT: bic r1, r3, r1
+; CT-NEXT: orr r2, r2, r1
+; CT-NEXT: vmov s4, r2
+; CT-NEXT: vmov r0, s4
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_float:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: and r3, r0, #1
+; DEFAULT-NEXT: rsb r12, r3, #0
+; DEFAULT-NEXT: and r0, r1, r12
+; DEFAULT-NEXT: bic r12, r2, r12
+; DEFAULT-NEXT: orr r0, r0, r12
+; DEFAULT-NEXT: bx lr
+;
+; THUMB1-LABEL: ct_float:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, lr}
+; THUMB1-NEXT: push {r4, lr}
+; THUMB1-NEXT: movs r3, #1
+; THUMB1-NEXT: ands r3, r0
+; THUMB1-NEXT: mov r4, r3
+; THUMB1-NEXT: lsls r4, r4, #31
+; THUMB1-NEXT: asrs r4, r4, #31
+; THUMB1-NEXT: mov r0, r1
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: ands r0, r4
+; THUMB1-NEXT: eors r0, r2
+; THUMB1-NEXT: pop {r4, pc}
+;
+; THUMB2-LABEL: ct_float:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: and r3, r0, #1
+; THUMB2-NEXT: rsb.w r12, r3, #0
+; THUMB2-NEXT: and.w r0, r1, r12
+; THUMB2-NEXT: bic.w r12, r2, r12
+; THUMB2-NEXT: orr.w r0, r0, r12
+; THUMB2-NEXT: bx lr
+;
+; CORTEXA9-LABEL: ct_float:
+; CORTEXA9: @ %bb.0: @ %entry
+; CORTEXA9-NEXT: and r0, r0, #1
+; CORTEXA9-NEXT: vmov r2, s0
+; CORTEXA9-NEXT: vmov r3, s1
+; CORTEXA9-NEXT: rsbs r1, r0, #0
+; CORTEXA9-NEXT: ands r2, r1
+; CORTEXA9-NEXT: bic.w r1, r3, r1
+; CORTEXA9-NEXT: orrs r2, r1
+; CORTEXA9-NEXT: vmov s2, r2
+; CORTEXA9-NEXT: vmov.f32 s0, s2
+; CORTEXA9-NEXT: bx lr
+;
+; CORTEX-NOTHUMB-LABEL: ct_float:
+; CORTEX-NOTHUMB: @ %bb.0: @ %entry
+; CORTEX-NOTHUMB-NEXT: and r0, r0, #1
+; CORTEX-NOTHUMB-NEXT: vmov r2, s0
+; CORTEX-NOTHUMB-NEXT: vmov r3, s1
+; CORTEX-NOTHUMB-NEXT: rsb r1, r0, #0
+; CORTEX-NOTHUMB-NEXT: and r2, r2, r1
+; CORTEX-NOTHUMB-NEXT: bic r1, r3, r1
+; CORTEX-NOTHUMB-NEXT: orr r2, r2, r1
+; CORTEX-NOTHUMB-NEXT: vmov s2, r2
+; CORTEX-NOTHUMB-NEXT: vmov.f32 s0, s2
+; CORTEX-NOTHUMB-NEXT: bx lr
+entry:
+ %sel = call float @llvm.ct.select.f32(i1 %cond, float %a, float %b)
+ ret float %sel
+}
+
+define double @ct_f64(i1 %cond, double %a, double %b) {
+; CT-LABEL: ct_f64:
+; CT: @ %bb.0: @ %entry
+; CT-NEXT: vldr d16, [sp]
+; CT-NEXT: vmov d17, r2, r3
+; CT-NEXT: and r0, r0, #1
+; CT-NEXT: rsb r1, r0, #0
+; CT-NEXT: vdup.32 d19, r1
+; CT-NEXT: vand d18, d17, d19
+; CT-NEXT: vbic d19, d16, d19
+; CT-NEXT: vorr d18, d18, d19
+; CT-NEXT: vmov r0, r1, d18
+; CT-NEXT: bx lr
+;
+; DEFAULT-LABEL: ct_f64:
+; DEFAULT: @ %bb.0: @ %entry
+; DEFAULT-NEXT: push {r11, lr}
+; DEFAULT-NEXT: and r12, r0, #1
+; DEFAULT-NEXT: ldr r1, [sp, #8]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r0, r2, lr
+; DEFAULT-NEXT: bic lr, r1, lr
+; DEFAULT-NEXT: orr r0, r0, lr
+; DEFAULT-NEXT: ldr r2, [sp, #12]
+; DEFAULT-NEXT: rsb lr, r12, #0
+; DEFAULT-NEXT: and r1, r3, lr
+; DEFAULT-NEXT: bic lr, r2, lr
+; DEFAULT-NEXT: orr r1, r1, lr
+; DEFAULT-NEXT: pop {r11, pc}
+;
+; THUMB1-LABEL: ct_f64:
+; THUMB1: @ %bb.0: @ %entry
+; THUMB1-NEXT: .save {r4, r5, r7, lr}
+; THUMB1-NEXT: push {r4, r5, r7, lr}
+; THUMB1-NEXT: movs r4, #1
+; THUMB1-NEXT: ands r4, r0
+; THUMB1-NEXT: ldr r1, [sp, #16]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r0, r2
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ands r0, r5
+; THUMB1-NEXT: eors r0, r1
+; THUMB1-NEXT: ldr r2, [sp, #20]
+; THUMB1-NEXT: mov r5, r4
+; THUMB1-NEXT: lsls r5, r5, #31
+; THUMB1-NEXT: asrs r5, r5, #31
+; THUMB1-NEXT: mov r1, r3
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: ands r1, r5
+; THUMB1-NEXT: eors r1, r2
+; THUMB1-NEXT: pop {r4, r5, r7, pc}
+;
+; THUMB2-LABEL: ct_f64:
+; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: .save {r7, lr}
+; THUMB2-NEXT: push {r7, lr}
+; THUMB2-NEXT: and r12, r0, #1
+; THUMB2-NEXT: ldr r1, [sp, #8]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r0, r2, lr
+; THUMB2-NEXT: bic.w lr, r1, lr
+; THUMB2-NEXT: orr.w r0, r0, lr
+; THUMB2-NEXT: ldr r2, [sp, #12]
+; THUMB2-NEXT: rsb.w lr, r12, #0
+; THUMB2-NEXT: and.w r1, r3, lr
+; THUMB2-NEXT: bic.w lr, r2, lr
+; THUMB2-NEXT: orr.w r1, r1, lr
+; THUMB2-NEXT: pop {r7, pc}
+;
+; CORTEXA9-LABEL: ct_f64:
+; CORTEXA9: @ %bb.0: @ %entry
+; CORTEXA9-NEXT: and r0, r0, #1
+; CORTEXA9-NEXT: rsbs r1, r0, #0
+; CORTEXA9-NEXT: vdup.32 d17, r1
+; CORTEXA9-NEXT: vand d16, d0, d17
+; CORTEXA9-NEXT: vbic d17, d1, d17
+; CORTEXA9-NEXT: vorr d16, d16, d17
+; CORTEXA9-NEXT: vmov.f64 d0, d16
+; CORTEXA9-NEXT: bx lr
+;
+; CORTEX-NOTHUMB-LABEL: ct_f64:
+; CORTEX-NOTHUMB: @ %bb.0: @ %entry
+; CORTEX-NOTHUMB-NEXT: and r0, r0, #1
+; CORTEX-NOTHUMB-NEXT: rsb r1, r0, #0
+; CORTEX-NOTHUMB-NEXT: vdup.32 d17, r1
+; CORTEX-NOTHUMB-NEXT: vand d16, d0, d17
+; CORTEX-NOTHUMB-NEXT: vbic d17, d1, d17
+; CORTEX-NOTHUMB-NEXT: vorr d16, d16, d17
+; CORTEX-NOTHUMB-NEXT: vmov.f64 d0, d16
+; CORTEX-NOTHUMB-NEXT: bx lr
+entry:
+ %sel = call double @llvm.ct.select.f64(i1 %cond, double %a, double %b)
+ ret double %sel
+}
More information about the llvm-branch-commits
mailing list