[llvm] [X86][CodeGen] Support lowering for CCMP/CTEST (PR #91747)
Shengchen Kan via llvm-commits
llvm-commits at lists.llvm.org
Mon May 20 20:36:36 PDT 2024
https://github.com/KanRobert updated https://github.com/llvm/llvm-project/pull/91747
>From 5759bb63681fd2061eb4113a99db25170e883114 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Thu, 9 May 2024 03:48:46 +0800
Subject: [PATCH 1/5] [X86][CodeGen] Support lowering for CCMP/CTEST +
transform CTESTrr+ANDrr/rm -> CTESTrr/CTESTmr
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 5 +-
llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 36 +-
llvm/lib/Target/X86/X86ISelLowering.cpp | 206 +++-
llvm/lib/Target/X86/X86ISelLowering.h | 4 +
.../Target/X86/X86InstrConditionalCompare.td | 55 ++
llvm/lib/Target/X86/X86InstrFragments.td | 6 +
llvm/lib/Target/X86/X86InstrInfo.cpp | 57 ++
llvm/lib/Target/X86/X86InstrInfo.h | 3 +
llvm/test/CodeGen/X86/apx/ccmp.ll | 926 ++++++++++++++++++
llvm/test/CodeGen/X86/apx/ctest.ll | 698 +++++++++++++
10 files changed, 1981 insertions(+), 15 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/apx/ccmp.ll
create mode 100644 llvm/test/CodeGen/X86/apx/ctest.ll
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 2b181cd3ab1db..96ca22678e0ba 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1801,11 +1801,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
if (N->getNumValues() == RV->getNumValues())
DAG.ReplaceAllUsesWith(N, RV.getNode());
- else {
- assert(N->getValueType(0) == RV.getValueType() &&
- N->getNumValues() == 1 && "Type mismatch");
+ else
DAG.ReplaceAllUsesWith(N, &RV);
- }
// Push the new node and any users onto the worklist. Omit this if the
// new node is the EntryToken (e.g. if a store managed to get optimized
diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 7f76324fa5705..ad1febea56202 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1557,7 +1557,12 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
case X86::TEST8rr:
case X86::TEST16rr:
case X86::TEST32rr:
- case X86::TEST64rr: {
+ case X86::TEST64rr:
+ // CTESTrr+ANDrr/rm -> CTESTrr/CTESTmr
+ case X86::CTEST8rr:
+ case X86::CTEST16rr:
+ case X86::CTEST32rr:
+ case X86::CTEST64rr: {
auto &Op0 = N->getOperand(0);
if (Op0 != N->getOperand(1) || !Op0->hasNUsesOfValue(2, Op0.getResNo()) ||
!Op0.isMachineOpcode())
@@ -1575,8 +1580,11 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
CASE_ND(AND64rr) {
if (And->hasAnyUseOfValue(1))
continue;
- MachineSDNode *Test = CurDAG->getMachineNode(
- Opc, SDLoc(N), MVT::i32, And.getOperand(0), And.getOperand(1));
+ SmallVector<SDValue> Ops(N->op_values());
+ Ops[0] = And.getOperand(0);
+ Ops[1] = And.getOperand(1);
+ MachineSDNode *Test =
+ CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i32, Ops);
ReplaceUses(N, Test);
MadeChange = true;
continue;
@@ -1588,8 +1596,9 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
if (And->hasAnyUseOfValue(1))
continue;
unsigned NewOpc;
+ unsigned NumOps = N->getNumOperands();
#define FROM_TO(A, B) \
- CASE_ND(A) NewOpc = X86::B; \
+ CASE_ND(A) NewOpc = NumOps > 2 ? X86::C##B : X86::B; \
break;
switch (And.getMachineOpcode()) {
FROM_TO(AND8rm, TEST8mr);
@@ -1600,10 +1609,21 @@ void X86DAGToDAGISel::PostprocessISelDAG() {
#undef FROM_TO
#undef CASE_ND
// Need to swap the memory and register operand.
- SDValue Ops[] = {And.getOperand(1), And.getOperand(2),
- And.getOperand(3), And.getOperand(4),
- And.getOperand(5), And.getOperand(0),
- And.getOperand(6) /* Chain */};
+ SmallVector<SDValue> Ops = {And.getOperand(1), And.getOperand(2),
+ And.getOperand(3), And.getOperand(4),
+ And.getOperand(5), And.getOperand(0)};
+ bool IsCTESTCC = X86::isCTESTCC(Opc);
+ // CC, Cflags.
+ if (IsCTESTCC) {
+ Ops.push_back(N->getOperand(2));
+ Ops.push_back(N->getOperand(3));
+ }
+ // Chain
+ Ops.push_back(And.getOperand(6));
+ // Glue
+ if (IsCTESTCC)
+ Ops.push_back(N->getOperand(4));
+
MachineSDNode *Test = CurDAG->getMachineNode(
NewOpc, SDLoc(N), MVT::i32, MVT::Other, Ops);
CurDAG->setNodeMemRefs(
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5d0846453685f..2b404619c93e1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3412,6 +3412,9 @@ X86TargetLowering::getJumpConditionMergingParams(Instruction::BinaryOps Opc,
const Value *Rhs) const {
using namespace llvm::PatternMatch;
int BaseCost = BrMergingBaseCostThresh.getValue();
+ // With CCMP, branches can be merged in a more efficient way.
+ if (BaseCost >= 0 && Subtarget.hasCCMP())
+ BaseCost += 6;
// a == b && a == c is a fast pattern on x86.
ICmpInst::Predicate Pred;
if (BaseCost >= 0 && Opc == Instruction::And &&
@@ -33970,6 +33973,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(TESTUI)
NODE_NAME_CASE(FP80_ADD)
NODE_NAME_CASE(STRICT_FP80_ADD)
+ NODE_NAME_CASE(CCMP)
+ NODE_NAME_CASE(CTEST)
}
return nullptr;
#undef NODE_NAME_CASE
@@ -54605,7 +54610,187 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
return true;
}
+static SDValue combineX86SubCmpToCcmpCtestHelper(
+ SDNode *N, SDValue Flag, SDValue SetCC0, SDValue SetCC1, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI, unsigned NewOpc) {
+ SDValue LHS = N->getOperand(0);
+ SDValue Sub = SetCC1.getOperand(1);
+
+ SDNode *BrCond = *Flag->uses().begin();
+ if (BrCond->getOpcode() != X86ISD::BRCOND)
+ return SDValue();
+ unsigned CondNo = 2;
+ if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
+ X86::COND_NE)
+ return SDValue();
+
+ X86::CondCode CC0 =
+ static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
+ // CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
+ if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
+ return SDValue();
+
+ bool IsOR = LHS.getOpcode() == ISD::OR;
+
+ SDValue SCC =
+ IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
+ SDLoc(SetCC0.getOperand(0)), MVT::i8)
+ : SetCC0.getOperand(0);
+
+ SDValue CC1N = SetCC1.getOperand(0);
+ X86::CondCode CC1 =
+ static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
+ X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
+ X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
+ SDValue CFlags = DAG.getTargetConstant(
+ X86::getCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
+ SDValue CCMP = (NewOpc == X86ISD::CCMP)
+ ? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
+ {Sub.getOperand(0), Sub.getOperand(1),
+ CFlags, SCC, SetCC0.getOperand(1)})
+ : DAG.getNode(X86ISD::CTEST, SDLoc(N), Flag.getValueType(),
+ {Sub.getOperand(0), Sub.getOperand(0),
+ CFlags, SCC, SetCC0.getOperand(1)});
+ DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
+
+ SmallVector<SDValue> Ops(BrCond->op_values());
+ if (isNullConstant(N->getOperand(1)) && Ops[CondNo] != CC1N)
+ Ops[CondNo] = CC1N;
+ else if (isOneConstant(N->getOperand(1)))
+ Ops[CondNo] = DAG.getTargetConstant(OppositeCC1, SDLoc(BrCond), MVT::i8);
+
+ SDValue NewBrCond =
+ DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
+ if (BrCond != NewBrCond.getNode()) {
+ DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
+ DCI.recursivelyDeleteUnusedNodes(BrCond);
+ }
+ return CCMP;
+}
+
+static SDValue combineX86SubCmpToCcmp(SDNode *N, SDValue Flag,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &ST) {
+ // cmp(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 0)
+ // brcond ne
+ //
+ // ->
+ //
+ // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond cc1
+ //
+ //
+ // sub(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
+ // brcond ne
+ //
+ // ->
+ //
+ // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond ~cc1
+ //
+ // if only flag has users, where cflags is determined by cc1.
+
+ SDValue LHS = N->getOperand(0);
+
+ if (!ST.hasCCMP() ||
+ (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
+ !Flag.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC0 = LHS.getOperand(0);
+ SDValue SetCC1 = LHS.getOperand(1);
+ if (SetCC0.getOpcode() != X86ISD::SETCC ||
+ SetCC1.getOpcode() != X86ISD::SETCC)
+ return SDValue();
+
+ // and/or is commutable. Try to commute the operands and then test again.
+ if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB) {
+ std::swap(SetCC0, SetCC1);
+ if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB)
+ return SDValue();
+ }
+
+ return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
+ X86ISD::CCMP);
+}
+
+static SDValue combineX86SubCmpToCtest(SDNode *N, SDValue Flag,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &ST) {
+ // cmp(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 0)
+ // brcond ne
+ //
+ // ->
+ //
+ // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond cc1
+ //
+ //
+ // sub(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 1)
+ // brcond ne
+ //
+ // ->
+ //
+ // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond ~cc1
+ //
+ // if only flag has users, where cflags is determined by cc1.
+
+ SDValue LHS = N->getOperand(0);
+
+ if (!ST.hasCCMP() ||
+ (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
+ !Flag.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC0 = LHS.getOperand(0);
+ SDValue SetCC1 = LHS.getOperand(1);
+ if (SetCC0.getOpcode() != X86ISD::SETCC ||
+ SetCC1.getOpcode() != X86ISD::SETCC)
+ return SDValue();
+
+ auto IsOp1CmpZero = [&](SDValue V) {
+ SDValue Op = V.getOperand(1);
+ return Op.getOpcode() == X86ISD::CMP && isNullConstant(Op.getOperand(1));
+ };
+ // and/or is commutable. Try to commute the operands and then test again.
+ if (!IsOp1CmpZero(SetCC1)) {
+ std::swap(SetCC0, SetCC1);
+ if (!IsOp1CmpZero(SetCC1))
+ return SDValue();
+ }
+
+ return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
+ X86ISD::CTEST);
+}
+
+static bool isOnlyFlagUsedX86SubOne(SDNode *N) {
+ return N->getOpcode() == X86ISD::SUB && isOneConstant(N->getOperand(1)) &&
+ !N->hasAnyUseOfValue(0);
+}
+
+static SDValue combineX86SubToCcmp(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &ST) {
+ if (!isOnlyFlagUsedX86SubOne(N))
+ return SDValue();
+
+ return combineX86SubCmpToCcmp(N, SDValue(N, 1), DAG, DCI, ST);
+}
+
+static SDValue combineX86SubToCtest(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &ST) {
+ if (!isOnlyFlagUsedX86SubOne(N))
+ return SDValue();
+
+ return combineX86SubCmpToCtest(N, SDValue(N, 1), DAG, DCI, ST);
+}
+
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
// Only handle test patterns.
if (!isNullConstant(N->getOperand(1)))
@@ -54620,6 +54805,14 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
EVT VT = Op.getValueType();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (SDValue CCMP =
+ combineX86SubCmpToCcmp(N, SDValue(N, 0), DAG, DCI, Subtarget))
+ return CCMP;
+
+ if (SDValue CTEST =
+ combineX86SubCmpToCtest(N, SDValue(N, 0), DAG, DCI, Subtarget))
+ return CTEST;
+
// If we have a constant logical shift that's only used in a comparison
// against zero turn it into an equivalent AND. This allows turning it into
// a TEST instruction later.
@@ -54748,7 +54941,8 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
}
static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI) {
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &ST) {
assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) &&
"Expected X86ISD::ADD or X86ISD::SUB");
@@ -54759,6 +54953,12 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
bool IsSub = X86ISD::SUB == N->getOpcode();
unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
+ if (SDValue CCMP = combineX86SubToCcmp(N, DAG, DCI, ST))
+ return CCMP;
+
+ if (SDValue CTEST = combineX86SubToCtest(N, DAG, DCI, ST))
+ return CTEST;
+
// If we don't use the flag result, simplify back to a generic ADD/SUB.
if (!N->hasAnyUseOfValue(1)) {
SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS);
@@ -57058,11 +57258,11 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget);
case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget);
case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget);
- case X86ISD::CMP: return combineCMP(N, DAG, Subtarget);
+ case X86ISD::CMP: return combineCMP(N, DAG, DCI, Subtarget);
case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget);
case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget);
case X86ISD::ADD:
- case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI);
+ case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI, Subtarget);
case X86ISD::SBB: return combineSBB(N, DAG);
case X86ISD::ADC: return combineADC(N, DAG, DCI);
case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget);
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index ade54f73bff09..d8596dbdddcb7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -789,6 +789,10 @@ namespace llvm {
// Perform an FP80 add after changing precision control in FPCW.
STRICT_FP80_ADD,
+ // Conditional compare instructions
+ CCMP,
+ CTEST,
+
// WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
// non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
diff --git a/llvm/lib/Target/X86/X86InstrConditionalCompare.td b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
index e5c1143eba87f..e09a1f50ab9ee 100644
--- a/llvm/lib/Target/X86/X86InstrConditionalCompare.td
+++ b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
@@ -78,6 +78,34 @@ let mayLoad = 1 in {
}
}
+def : Pat<(X86ccmp GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP8rr GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP16rr GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP32rr GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP64rr GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
+
+def : Pat<(X86ccmp GR8:$src1, (i8 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP8ri GR8:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR16:$src1, (i16 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP16ri GR16:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR32:$src1, (i32 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP32ri GR32:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR64:$src1, (i64 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP64ri32 GR64:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+
+def : Pat<(X86ccmp GR8:$src1, (loadi8 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP8rm GR8:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR16:$src1, (loadi16 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP16rm GR16:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR32:$src1, (loadi32 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP32rm GR32:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ccmp GR64:$src1, (loadi64 addr:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CCMP64rm GR64:$src1, addr:$src2, timm:$dcf, timm:$cond)>;
+
+
//===----------------------------------------------------------------------===//
// CTEST Instructions
//
@@ -108,3 +136,30 @@ let mayLoad = 1 in {
def CTEST64mr: Ctest<0x85, MRMDestMem, Xi64, i64mem, GR64>;
}
}
+
+def : Pat<(X86ctest GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST8rr GR8:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST16rr GR16:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST32rr GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST64rr GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
+
+def : Pat<(X86ctest GR8:$src1, (i8 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST8ri GR8:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest GR16:$src1, (i16 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST16ri GR16:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest GR32:$src1, (i32 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST32ri GR32:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest GR64:$src1, (i64 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST64ri32 GR64:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
+
+def : Pat<(X86ctest (loadi8 addr:$src1), GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST8mr addr:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest (loadi16 addr:$src1), GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST16mr addr:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest (loadi32 addr:$src1), GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST32mr addr:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
+def : Pat<(X86ctest (loadi64 addr:$src1), GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
+ (CTEST64mr addr:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index f14c7200af968..664998e76353b 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -12,6 +12,9 @@ def SDTX86CmpTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisInt<1>,
def SDTX86FCmp : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisFP<1>,
SDTCisSameAs<1, 2>]>;
+def SDTX86Ccmp : SDTypeProfile<1, 5,
+ [SDTCisVT<3, i8>, SDTCisVT<4, i8>, SDTCisVT<5, i32>]>;
+
def SDTX86Cmov : SDTypeProfile<1, 4,
[SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>, SDTCisVT<4, i32>]>;
@@ -138,6 +141,9 @@ def X86strict_fcmp : SDNode<"X86ISD::STRICT_FCMP", SDTX86FCmp, [SDNPHasChain]>;
def X86strict_fcmps : SDNode<"X86ISD::STRICT_FCMPS", SDTX86FCmp, [SDNPHasChain]>;
def X86bt : SDNode<"X86ISD::BT", SDTX86CmpTest>;
+def X86ccmp : SDNode<"X86ISD::CCMP", SDTX86Ccmp>;
+def X86ctest : SDNode<"X86ISD::CTEST", SDTX86Ccmp>;
+
def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
[SDNPHasChain]>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 26c68ce3c1a2d..c7016ef67a1c8 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3164,6 +3164,63 @@ X86::CondCode X86::getCondFromCCMP(const MachineInstr &MI) {
: X86::COND_INVALID;
}
+int X86::getCondFlagsFromCondCode(X86::CondCode CC) {
+ // CCMP/CTEST has two conditional operands:
+ // - SCC: source conditonal code (same as CMOV)
+ // - DCF: destination conditional flags, which has 4 valid bits
+ //
+ // +----+----+----+----+
+ // | OF | SF | ZF | CF |
+ // +----+----+----+----+
+ //
+ // If SCC(source conditional code) evaluates to false, CCMP/CTEST will updates
+ // the conditional flags by as follows:
+ //
+ // OF = DCF.OF
+ // SF = DCF.SF
+ // ZF = DCF.ZF
+ // CF = DCF.CF
+ // PF = DCF.CF
+ // AF = 0 (Auxiliary Carry Flag)
+ //
+ // Otherwise, the CMP or TEST is executed and it updates the
+ // CSPAZO flags normally.
+ //
+ // NOTE:
+ // If SCC = P, then SCC evaluates to true regardless of the CSPAZO value.
+ // If SCC = NP, then SCC evaluates to false regardless of the CSPAZO value.
+
+ enum { CF = 1, ZF = 2, SF = 4, OF = 8, PF = CF };
+
+ switch (CC) {
+ default:
+ llvm_unreachable("Illegal condition code!");
+ case X86::COND_NO:
+ case X86::COND_NE:
+ case X86::COND_GE:
+ case X86::COND_G:
+ case X86::COND_AE:
+ case X86::COND_A:
+ case X86::COND_NS:
+ case X86::COND_NP:
+ return 0;
+ case X86::COND_O:
+ return OF;
+ case X86::COND_B:
+ case X86::COND_BE:
+ return CF;
+ break;
+ case X86::COND_E:
+ case X86::COND_LE:
+ return ZF;
+ case X86::COND_S:
+ case X86::COND_L:
+ return SF;
+ case X86::COND_P:
+ return PF;
+ }
+}
+
/// Return the inverse of the specified condition,
/// e.g. turning COND_E to COND_NE.
X86::CondCode X86::GetOppositeBranchCondition(X86::CondCode CC) {
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index 55deca73b1f3a..da27027fc23a0 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -74,6 +74,9 @@ CondCode getCondFromCFCMov(const MachineInstr &MI);
// Turn CCMP instruction into condition code.
CondCode getCondFromCCMP(const MachineInstr &MI);
+// Turn condition code into condition flags for CCMP/CTEST.
+int getCondFlagsFromCondCode(CondCode CC);
+
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
CondCode GetOppositeBranchCondition(CondCode CC);
diff --git a/llvm/test/CodeGen/X86/apx/ccmp.ll b/llvm/test/CodeGen/X86/apx/ccmp.ll
new file mode 100644
index 0000000000000..c7a0868976f95
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/ccmp.ll
@@ -0,0 +1,926 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ccmp -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ccmp,+ndd -verify-machineinstrs | FileCheck %s --check-prefix=NDD
+
+define void @ccmp8rr_zf(i8 noundef %a, i8 noundef %b, i8 noundef %c) {
+; CHECK-LABEL: ccmp8rr_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpb %dl, %dil
+; CHECK-NEXT: ccmpneb {dfv=zf} %dl, %sil
+; CHECK-NEXT: jne .LBB0_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB0_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp8rr_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpb %dl, %dil
+; NDD-NEXT: ccmpneb {dfv=zf} %dl, %sil
+; NDD-NEXT: jne .LBB0_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB0_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp eq i8 %a, %c
+ %cmp1 = icmp eq i8 %b, %c
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp8rr_cf(i8 noundef %a, i8 noundef %b) {
+; CHECK-LABEL: ccmp8rr_cf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpb $2, %dil
+; CHECK-NEXT: ccmpgeb {dfv=cf} $2, %sil
+; CHECK-NEXT: jb .LBB1_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB1_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp8rr_cf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpb $2, %dil
+; NDD-NEXT: ccmpgeb {dfv=cf} $2, %sil
+; NDD-NEXT: jb .LBB1_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB1_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp sgt i8 %a, 1
+ %tobool = icmp ugt i8 %b, 1
+ %or.cond = and i1 %cmp, %tobool
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+define i8 @ccmp8rr_sf(i8 %a, i8 %b, i8* nocapture %c) {
+; CHECK-LABEL: ccmp8rr_sf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ccmpneb {dfv=sf} $2, %sil
+; CHECK-NEXT: jl .LBB2_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: movb %dil, (%rdx)
+; CHECK-NEXT: .LBB2_2: # %if.end
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp8rr_sf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ccmpneb {dfv=sf} $2, %sil
+; NDD-NEXT: jl .LBB2_2
+; NDD-NEXT: # %bb.1: # %if.then
+; NDD-NEXT: movb %dil, (%rdx)
+; NDD-NEXT: .LBB2_2: # %if.end
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: retq
+entry:
+ %tobool = icmp ne i8 %a, 0
+ %cmp = icmp sgt i8 %b, 1
+ %or.cond = select i1 %tobool, i1 %cmp, i1 false
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+ store i8 %a, i8* %c, align 4
+ br label %if.end
+
+if.end:
+ ret i8 0
+}
+
+define i8 @ccmp8rr_none(i8 %a, i8 %b, i8* nocapture %c) {
+; CHECK-LABEL: ccmp8rr_none:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ccmpeb {dfv=} $2, %sil
+; CHECK-NEXT: jl .LBB3_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: movb %dil, (%rdx)
+; CHECK-NEXT: .LBB3_2: # %if.end
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp8rr_none:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ccmpeb {dfv=} $2, %sil
+; NDD-NEXT: jl .LBB3_2
+; NDD-NEXT: # %bb.1: # %if.then
+; NDD-NEXT: movb %dil, (%rdx)
+; NDD-NEXT: .LBB3_2: # %if.end
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: retq
+entry:
+ %tobool = icmp ne i8 %a, 0
+ %cmp = icmp sgt i8 %b, 1
+ %or.cond = select i1 %tobool, i1 true, i1 %cmp
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+ store i8 %a, i8* %c, align 4
+ br label %if.end
+
+if.end:
+ ret i8 0
+}
+
+define void @ccmp16rr_sf(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+; CHECK-LABEL: ccmp16rr_sf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpw %dx, %di
+; CHECK-NEXT: ccmplew {dfv=sf} %dx, %si
+; CHECK-NEXT: jge .LBB4_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB4_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp16rr_sf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpw %dx, %di
+; NDD-NEXT: ccmplew {dfv=sf} %dx, %si
+; NDD-NEXT: jge .LBB4_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB4_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp sgt i16 %a, %c
+ %cmp1 = icmp slt i16 %b, %c
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp32rr_cf(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+; CHECK-LABEL: ccmp32rr_cf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl %edx, %edi
+; CHECK-NEXT: ccmpbl {dfv=cf} %edx, %esi
+; CHECK-NEXT: ja .LBB5_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB5_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp32rr_cf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl %edx, %edi
+; NDD-NEXT: ccmpbl {dfv=cf} %edx, %esi
+; NDD-NEXT: ja .LBB5_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB5_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp uge i32 %a, %c
+ %cmp1 = icmp ule i32 %b, %c
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp64rr_of(i64 %a, i64 %b, i64 %c) {
+; CHECK-LABEL: ccmp64rr_of:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: cmpq %rdx, %rdi
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: cmpq %rsi, %rdi
+; CHECK-NEXT: setno %cl
+; CHECK-NEXT: testb %cl, %al
+; CHECK-NEXT: jne .LBB6_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB6_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp64rr_of:
+; NDD: # %bb.0: # %bb
+; NDD-NEXT: cmpq %rdx, %rdi
+; NDD-NEXT: setb %al
+; NDD-NEXT: cmpq %rsi, %rdi
+; NDD-NEXT: setno %cl
+; NDD-NEXT: testb %cl, %al
+; NDD-NEXT: jne .LBB6_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB6_1: # %if.end
+; NDD-NEXT: retq
+bb:
+ %cmp = icmp uge i64 %a, %c
+ %smul = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+ %obit = extractvalue {i64, i1} %smul, 1
+ %or.cond = or i1 %cmp, %obit
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp64rr_of_crossbb(i64 %a, i64 %b) {
+; CHECK-LABEL: ccmp64rr_of_crossbb:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: je .LBB7_2
+; CHECK-NEXT: # %bb.1: # %bb1
+; CHECK-NEXT: cmpq %rsi, %rdi
+; CHECK-NEXT: .LBB7_2: # %bb3
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp64rr_of_crossbb:
+; NDD: # %bb.0: # %bb
+; NDD-NEXT: testq %rdi, %rdi
+; NDD-NEXT: je .LBB7_2
+; NDD-NEXT: # %bb.1: # %bb1
+; NDD-NEXT: cmpq %rsi, %rdi
+; NDD-NEXT: .LBB7_2: # %bb3
+; NDD-NEXT: retq
+bb:
+ %cond1 = icmp eq i64 %a, 0
+ br i1 %cond1, label %bb3, label %bb1
+
+bb1: ; preds = %bb
+ %smul = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b)
+ %obit = extractvalue {i64, i1} %smul, 1
+ br i1 %obit, label %bb3, label %bb2
+
+bb2: ; preds = %bb1
+ %tmp = ptrtoint ptr null to i64
+ br label %bb3
+
+bb3: ; preds = %bb2, %bb1, %bb
+ ret void
+}
+
+define void @ccmp8ri_zf(i8 noundef %a, i8 noundef %b, i8 noundef %c) {
+; CHECK-LABEL: ccmp8ri_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpb %dl, %dil
+; CHECK-NEXT: ccmpleb {dfv=zf} $123, %sil
+; CHECK-NEXT: jne .LBB8_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB8_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp8ri_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpb %dl, %dil
+; NDD-NEXT: ccmpleb {dfv=zf} $123, %sil
+; NDD-NEXT: jne .LBB8_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB8_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp sgt i8 %a, %c
+ %cmp1 = icmp eq i8 %b, 123
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp16ri8_zf(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+; CHECK-LABEL: ccmp16ri8_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpw %dx, %di
+; CHECK-NEXT: ccmplew {dfv=sf} $123, %si
+; CHECK-NEXT: jge .LBB9_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB9_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp16ri8_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpw %dx, %di
+; NDD-NEXT: ccmplew {dfv=sf} $123, %si
+; NDD-NEXT: jge .LBB9_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB9_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp sgt i16 %a, %c
+ %cmp1 = icmp slt i16 %b, 123
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp16ri_zf(i16 noundef %a, i16 noundef %b, i16 noundef %c) {
+; CHECK-LABEL: ccmp16ri_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpw %dx, %di
+; CHECK-NEXT: movswl %si, %eax
+; CHECK-NEXT: ccmpael {dfv=sf} $1234, %eax # imm = 0x4D2
+; CHECK-NEXT: jge .LBB10_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB10_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp16ri_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpw %dx, %di
+; NDD-NEXT: movswl %si, %eax
+; NDD-NEXT: ccmpael {dfv=sf} $1234, %eax # imm = 0x4D2
+; NDD-NEXT: jge .LBB10_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB10_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp ult i16 %a, %c
+ %cmp1 = icmp slt i16 %b, 1234
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp32ri_cf(i32 noundef %a, i32 noundef %b, i32 noundef %c) {
+; CHECK-LABEL: ccmp32ri_cf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl %edx, %edi
+; CHECK-NEXT: ccmpbl {dfv=cf} $123457, %esi # imm = 0x1E241
+; CHECK-NEXT: jae .LBB11_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB11_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp32ri_cf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl %edx, %edi
+; NDD-NEXT: ccmpbl {dfv=cf} $123457, %esi # imm = 0x1E241
+; NDD-NEXT: jae .LBB11_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB11_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp uge i32 %a, %c
+ %cmp1 = icmp ule i32 %b, 123456
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp64ri32_zf(i64 noundef %a, i64 noundef %b, i64 noundef %c) {
+; CHECK-LABEL: ccmp64ri32_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpq %rdx, %rdi
+; CHECK-NEXT: ccmpbeq {dfv=sf} $123456, %rsi # imm = 0x1E240
+; CHECK-NEXT: jge .LBB12_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB12_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp64ri32_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpq %rdx, %rdi
+; NDD-NEXT: ccmpbeq {dfv=sf} $123456, %rsi # imm = 0x1E240
+; NDD-NEXT: jge .LBB12_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB12_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp ugt i64 %a, %c
+ %cmp1 = icmp slt i64 %b, 123456
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp8rm_zf(i8 noundef %a, i8 noundef %b, i8 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp8rm_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpb %dl, %dil
+; CHECK-NEXT: ccmpneb {dfv=zf} (%rcx), %sil
+; CHECK-NEXT: jne .LBB13_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB13_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp8rm_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpb %dl, %dil
+; NDD-NEXT: ccmpneb {dfv=zf} (%rcx), %sil
+; NDD-NEXT: jne .LBB13_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB13_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %d = load i8, ptr %ptr
+ %cmp = icmp eq i8 %a, %c
+ %cmp1 = icmp eq i8 %b, %d
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp16rm_sf(i16 noundef %a, i16 noundef %b, i16 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp16rm_sf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpw %dx, %di
+; CHECK-NEXT: ccmplew {dfv=sf} (%rcx), %si
+; CHECK-NEXT: jge .LBB14_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB14_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp16rm_sf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpw %dx, %di
+; NDD-NEXT: ccmplew {dfv=sf} (%rcx), %si
+; NDD-NEXT: jge .LBB14_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB14_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %d = load i16, ptr %ptr
+ %cmp = icmp sgt i16 %a, %c
+ %cmp1 = icmp slt i16 %b, %d
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp32rm_cf(i32 noundef %a, i32 noundef %b, i32 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp32rm_cf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl %edx, %edi
+; CHECK-NEXT: ccmpgl {dfv=cf} (%rcx), %esi
+; CHECK-NEXT: ja .LBB15_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB15_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp32rm_cf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl %edx, %edi
+; NDD-NEXT: ccmpgl {dfv=cf} (%rcx), %esi
+; NDD-NEXT: ja .LBB15_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB15_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %d = load i32, ptr %ptr
+ %cmp = icmp sle i32 %a, %c
+ %cmp1 = icmp ule i32 %b, %d
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp64rm_sf(i64 noundef %a, i64 noundef %b, i64 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp64rm_sf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpq %rdx, %rdi
+; CHECK-NEXT: ccmpleq {dfv=sf} (%rcx), %rsi
+; CHECK-NEXT: jge .LBB16_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB16_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp64rm_sf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpq %rdx, %rdi
+; NDD-NEXT: ccmpleq {dfv=sf} (%rcx), %rsi
+; NDD-NEXT: jge .LBB16_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB16_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %d = load i64, ptr %ptr
+ %cmp = icmp sgt i64 %a, %c
+ %cmp1 = icmp slt i64 %b, %d
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp8mr_zf(i8 noundef %a, i8 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp8mr_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpb %sil, %dil
+; CHECK-NEXT: ccmpgeb {dfv=zf} %sil, (%rdx)
+; CHECK-NEXT: jne .LBB17_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB17_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp8mr_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpb %sil, %dil
+; NDD-NEXT: ccmpgeb {dfv=zf} %sil, (%rdx)
+; NDD-NEXT: jne .LBB17_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB17_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i8, ptr %ptr
+ %cmp = icmp slt i8 %a, %c
+ %cmp1 = icmp eq i8 %b, %c
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp16mr_sf(i16 noundef %a, i16 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp16mr_sf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpw %si, %di
+; CHECK-NEXT: ccmplew {dfv=sf} %si, (%rdx)
+; CHECK-NEXT: jge .LBB18_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB18_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp16mr_sf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpw %si, %di
+; NDD-NEXT: ccmplew {dfv=sf} %si, (%rdx)
+; NDD-NEXT: jge .LBB18_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB18_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i16, ptr %ptr
+ %cmp = icmp sgt i16 %a, %c
+ %cmp1 = icmp slt i16 %b, %c
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp32mr_cf(i32 noundef %a, i32 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp32mr_cf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: ccmpll {dfv=cf} %esi, (%rdx)
+; CHECK-NEXT: ja .LBB19_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB19_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp32mr_cf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl %esi, %edi
+; NDD-NEXT: ccmpll {dfv=cf} %esi, (%rdx)
+; NDD-NEXT: ja .LBB19_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB19_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i32, ptr %ptr
+ %cmp = icmp sge i32 %a, %c
+ %cmp1 = icmp ule i32 %b, %c
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp64mr_sf(i64 noundef %a, i64 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp64mr_sf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpq %rsi, %rdi
+; CHECK-NEXT: ccmpleq {dfv=sf} %rsi, (%rdx)
+; CHECK-NEXT: jge .LBB20_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB20_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp64mr_sf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpq %rsi, %rdi
+; NDD-NEXT: ccmpleq {dfv=sf} %rsi, (%rdx)
+; NDD-NEXT: jge .LBB20_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB20_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i64, ptr %ptr
+ %cmp = icmp sgt i64 %a, %c
+ %cmp1 = icmp slt i64 %b, %c
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp8mi_zf(i8 noundef %a, i8 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp8mi_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpb %sil, %dil
+; CHECK-NEXT: ccmpneb {dfv=zf} $123, (%rdx)
+; CHECK-NEXT: jne .LBB21_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB21_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp8mi_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpb %sil, %dil
+; NDD-NEXT: ccmpneb {dfv=zf} $123, (%rdx)
+; NDD-NEXT: jne .LBB21_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB21_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i8, ptr %ptr
+ %cmp = icmp eq i8 %a, %c
+ %cmp1 = icmp eq i8 %b, 123
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp16mi_zf(i16 noundef %a, i16 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp16mi_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpw %si, %di
+; CHECK-NEXT: ccmplew {dfv=sf} $1234, (%rdx) # imm = 0x4D2
+; CHECK-NEXT: jge .LBB22_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB22_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp16mi_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpw %si, %di
+; NDD-NEXT: ccmplew {dfv=sf} $1234, (%rdx) # imm = 0x4D2
+; NDD-NEXT: jge .LBB22_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB22_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i16, ptr %ptr
+ %cmp = icmp sgt i16 %a, %c
+ %cmp1 = icmp slt i16 %b, 1234
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp32mi_cf(i32 noundef %a, i32 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp32mi_cf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpl %esi, %edi
+; CHECK-NEXT: ccmpnel {dfv=cf} $123457, (%rdx) # imm = 0x1E241
+; CHECK-NEXT: jae .LBB23_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB23_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp32mi_cf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpl %esi, %edi
+; NDD-NEXT: ccmpnel {dfv=cf} $123457, (%rdx) # imm = 0x1E241
+; NDD-NEXT: jae .LBB23_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB23_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i32, ptr %ptr
+ %cmp = icmp eq i32 %a, %c
+ %cmp1 = icmp ule i32 %b, 123456
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ccmp64mi32_zf(i64 noundef %a, i64 noundef %c, ptr %ptr) {
+; CHECK-LABEL: ccmp64mi32_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: cmpq %rsi, %rdi
+; CHECK-NEXT: ccmpleq {dfv=sf} $123456, (%rdx) # imm = 0x1E240
+; CHECK-NEXT: jge .LBB24_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB24_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ccmp64mi32_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: cmpq %rsi, %rdi
+; NDD-NEXT: ccmpleq {dfv=sf} $123456, (%rdx) # imm = 0x1E240
+; NDD-NEXT: jge .LBB24_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB24_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i64, ptr %ptr
+ %cmp = icmp sgt i64 %a, %c
+ %cmp1 = icmp slt i64 %b, 123456
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+declare dso_local void @foo(...)
+declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/apx/ctest.ll b/llvm/test/CodeGen/X86/apx/ctest.ll
new file mode 100644
index 0000000000000..9c03570eee308
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/ctest.ll
@@ -0,0 +1,698 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ccmp -verify-machineinstrs | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+ccmp,+ndd -verify-machineinstrs | FileCheck %s --check-prefix=NDD
+
+define void @ctest8rr_zf(i8 noundef %a, i8 noundef %b) {
+; CHECK-LABEL: ctest8rr_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ctestneb {dfv=zf} %sil, %sil
+; CHECK-NEXT: jne .LBB0_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB0_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest8rr_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ctestneb {dfv=zf} %sil, %sil
+; NDD-NEXT: jne .LBB0_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB0_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp eq i8 %a, 0
+ %cmp1 = icmp eq i8 %b, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest8rr_sf(i8 noundef %a, i8 noundef %b) {
+; CHECK-LABEL: ctest8rr_sf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ctesteb {dfv=sf} %sil, %sil
+; CHECK-NEXT: js .LBB1_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB1_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest8rr_sf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ctesteb {dfv=sf} %sil, %sil
+; NDD-NEXT: js .LBB1_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB1_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp ule i8 %a, 0
+ %tobool = icmp sge i8 %b, 0
+ %or.cond = and i1 %cmp, %tobool
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret void
+}
+
+define i8 @ctest8rr_sf_2(i8 %a, i8 %b, i8* nocapture %c) {
+; CHECK-LABEL: ctest8rr_sf_2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ctestleb {dfv=sf} %sil, %sil
+; CHECK-NEXT: jns .LBB2_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: movb %dil, (%rdx)
+; CHECK-NEXT: .LBB2_2: # %if.end
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest8rr_sf_2:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ctestleb {dfv=sf} %sil, %sil
+; NDD-NEXT: jns .LBB2_2
+; NDD-NEXT: # %bb.1: # %if.then
+; NDD-NEXT: movb %dil, (%rdx)
+; NDD-NEXT: .LBB2_2: # %if.end
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: retq
+entry:
+ %tobool = icmp sgt i8 %a, 0
+ %cmp = icmp slt i8 %b, 0
+ %or.cond = select i1 %tobool, i1 true, i1 %cmp
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+ store i8 %a, i8* %c, align 4
+ br label %if.end
+
+if.end:
+ ret i8 0
+}
+
+define i8 @ctest8rr_none(i8 %a, i8 %b, i8* nocapture %c) {
+; CHECK-LABEL: ctest8rr_none:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ctestneb {dfv=} %sil, %sil
+; CHECK-NEXT: jne .LBB3_2
+; CHECK-NEXT: # %bb.1: # %if.then
+; CHECK-NEXT: movb %dil, (%rdx)
+; CHECK-NEXT: .LBB3_2: # %if.end
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest8rr_none:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ctestneb {dfv=} %sil, %sil
+; NDD-NEXT: jne .LBB3_2
+; NDD-NEXT: # %bb.1: # %if.then
+; NDD-NEXT: movb %dil, (%rdx)
+; NDD-NEXT: .LBB3_2: # %if.end
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: retq
+entry:
+ %tobool = icmp ne i8 %a, 0
+ %cmp = icmp eq i8 %b, 0
+ %or.cond = select i1 %tobool, i1 %cmp, i1 false
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then:
+ store i8 %a, i8* %c, align 4
+ br label %if.end
+
+if.end:
+ ret i8 0
+}
+
+define void @ctest16rr_sf(i16 noundef %a, i16 noundef %b) {
+; CHECK-LABEL: ctest16rr_sf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testw %di, %di
+; CHECK-NEXT: ctestlew {dfv=sf} %si, %si
+; CHECK-NEXT: jns .LBB4_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB4_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest16rr_sf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testw %di, %di
+; NDD-NEXT: ctestlew {dfv=sf} %si, %si
+; NDD-NEXT: jns .LBB4_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB4_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp sgt i16 %a, 0
+ %cmp1 = icmp slt i16 %b, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest32rr_zf(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: ctest32rr_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: ctestsl {dfv=zf} %esi, %esi
+; CHECK-NEXT: jne .LBB5_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB5_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest32rr_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: testl %edi, %edi
+; NDD-NEXT: ctestsl {dfv=zf} %esi, %esi
+; NDD-NEXT: jne .LBB5_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB5_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp sge i32 %a, 0
+ %cmp1 = icmp eq i32 %b, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest8ri_zf(i8 noundef %a, i8 noundef %b) {
+; CHECK-LABEL: ctest8ri_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andb $123, %sil
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ctestneb {dfv=zf} %sil, %sil
+; CHECK-NEXT: jne .LBB6_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB6_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest8ri_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: andb $123, %sil, %al
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ctestneb {dfv=zf} %al, %al
+; NDD-NEXT: jne .LBB6_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB6_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp eq i8 %a, 0
+ %and = and i8 %b, 123
+ %cmp1 = icmp eq i8 %and, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest16ri_zf(i16 noundef %a, i16 noundef %b) {
+; CHECK-LABEL: ctest16ri_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andl $1234, %esi # imm = 0x4D2
+; CHECK-NEXT: testw %di, %di
+; CHECK-NEXT: ctestnew {dfv=zf} %si, %si
+; CHECK-NEXT: jne .LBB7_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB7_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest16ri_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: andl $1234, %esi, %eax # imm = 0x4D2
+; NDD-NEXT: testw %di, %di
+; NDD-NEXT: ctestnew {dfv=zf} %ax, %ax
+; NDD-NEXT: jne .LBB7_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB7_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp eq i16 %a, 0
+ %and = and i16 %b, 1234
+ %cmp1 = icmp eq i16 %and, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest32ri_zf(i32 noundef %a, i32 noundef %b) {
+; CHECK-LABEL: ctest32ri_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andl $12345, %esi # imm = 0x3039
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: ctestnel {dfv=zf} %esi, %esi
+; CHECK-NEXT: jne .LBB8_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB8_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest32ri_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: andl $12345, %esi, %eax # imm = 0x3039
+; NDD-NEXT: testl %edi, %edi
+; NDD-NEXT: ctestnel {dfv=zf} %eax, %eax
+; NDD-NEXT: jne .LBB8_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB8_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp eq i32 %a, 0
+ %and = and i32 %b, 12345
+ %cmp1 = icmp eq i32 %and, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest64ri32_zf(i64 noundef %a, i64 noundef %b) {
+; CHECK-LABEL: ctest64ri32_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andl $123456, %esi # imm = 0x1E240
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: ctestneq {dfv=zf} %rsi, %rsi
+; CHECK-NEXT: jne .LBB9_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB9_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest64ri32_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: andl $123456, %esi, %eax # imm = 0x1E240
+; NDD-NEXT: testq %rdi, %rdi
+; NDD-NEXT: ctestneq {dfv=zf} %rax, %rax
+; NDD-NEXT: jne .LBB9_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB9_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %cmp = icmp eq i64 %a, 0
+ %and = and i64 %b, 123456
+ %cmp1 = icmp eq i64 %and, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest8mr_zf(i8 noundef %a, ptr %ptr) {
+; CHECK-LABEL: ctest8mr_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzbl (%rsi), %eax
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ctestneb {dfv=zf} %al, %al
+; CHECK-NEXT: jne .LBB10_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB10_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest8mr_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: movzbl (%rsi), %eax
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ctestneb {dfv=zf} %al, %al
+; NDD-NEXT: jne .LBB10_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB10_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i8, ptr %ptr
+ %cmp = icmp eq i8 %a, 0
+ %cmp1 = icmp eq i8 %b, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest16mr_zf(i16 noundef %a, ptr %ptr) {
+; CHECK-LABEL: ctest16mr_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzwl (%rsi), %eax
+; CHECK-NEXT: testw %di, %di
+; CHECK-NEXT: ctestnew {dfv=zf} %ax, %ax
+; CHECK-NEXT: jne .LBB11_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB11_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest16mr_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: movzwl (%rsi), %eax
+; NDD-NEXT: testw %di, %di
+; NDD-NEXT: ctestnew {dfv=zf} %ax, %ax
+; NDD-NEXT: jne .LBB11_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB11_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i16, ptr %ptr
+ %cmp = icmp eq i16 %a, 0
+ %cmp1 = icmp eq i16 %b, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest32mr_cf(i32 noundef %a, ptr %ptr) {
+; CHECK-LABEL: ctest32mr_cf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl (%rsi), %eax
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: ctestnel {dfv=zf} %eax, %eax
+; CHECK-NEXT: jne .LBB12_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB12_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest32mr_cf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: movl (%rsi), %eax
+; NDD-NEXT: testl %edi, %edi
+; NDD-NEXT: ctestnel {dfv=zf} %eax, %eax
+; NDD-NEXT: jne .LBB12_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB12_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i32, ptr %ptr
+ %cmp = icmp eq i32 %a, 0
+ %cmp1 = icmp eq i32 %b, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest64mr_zf(i64 noundef %a, ptr %ptr) {
+; CHECK-LABEL: ctest64mr_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq (%rsi), %rax
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: ctestneq {dfv=zf} %rax, %rax
+; CHECK-NEXT: jne .LBB13_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB13_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest64mr_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: movq (%rsi), %rax
+; NDD-NEXT: testq %rdi, %rdi
+; NDD-NEXT: ctestneq {dfv=zf} %rax, %rax
+; NDD-NEXT: jne .LBB13_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB13_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i64, ptr %ptr
+ %cmp = icmp eq i64 %a, 0
+ %cmp1 = icmp eq i64 %b, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest8mi_zf(i8 noundef %a, ptr %ptr) {
+; CHECK-LABEL: ctest8mi_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzbl (%rsi), %eax
+; CHECK-NEXT: andb $123, %al
+; CHECK-NEXT: testb %dil, %dil
+; CHECK-NEXT: ctestneb {dfv=zf} %al, %al
+; CHECK-NEXT: jne .LBB14_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB14_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest8mi_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: andb $123, (%rsi), %al
+; NDD-NEXT: testb %dil, %dil
+; NDD-NEXT: ctestneb {dfv=zf} %al, %al
+; NDD-NEXT: jne .LBB14_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB14_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i8, ptr %ptr
+ %cmp = icmp eq i8 %a, 0
+ %and = and i8 %b, 123
+ %cmp1 = icmp eq i8 %and, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest16mi_zf(i16 noundef %a, ptr %ptr) {
+; CHECK-LABEL: ctest16mi_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzwl (%rsi), %eax
+; CHECK-NEXT: andl $1234, %eax # imm = 0x4D2
+; CHECK-NEXT: testw %di, %di
+; CHECK-NEXT: ctestnew {dfv=zf} %ax, %ax
+; CHECK-NEXT: jne .LBB15_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB15_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest16mi_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: movzwl (%rsi), %eax
+; NDD-NEXT: andl $1234, %eax # imm = 0x4D2
+; NDD-NEXT: testw %di, %di
+; NDD-NEXT: ctestnew {dfv=zf} %ax, %ax
+; NDD-NEXT: jne .LBB15_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB15_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i16, ptr %ptr
+ %cmp = icmp eq i16 %a, 0
+ %and = and i16 %b, 1234
+ %cmp1 = icmp eq i16 %and, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest32mi_zf(i32 noundef %a, ptr %ptr) {
+; CHECK-LABEL: ctest32mi_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movzwl (%rsi), %eax
+; CHECK-NEXT: andl $12345, %eax # imm = 0x3039
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: ctestnew {dfv=zf} %ax, %ax
+; CHECK-NEXT: jne .LBB16_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB16_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest32mi_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: movzwl (%rsi), %eax
+; NDD-NEXT: andl $12345, %eax # imm = 0x3039
+; NDD-NEXT: testl %edi, %edi
+; NDD-NEXT: ctestnew {dfv=zf} %ax, %ax
+; NDD-NEXT: jne .LBB16_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB16_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i32, ptr %ptr
+ %cmp = icmp eq i32 %a, 0
+ %and = and i32 %b, 12345
+ %cmp1 = icmp eq i32 %and, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+define void @ctest64mi32_zf(i64 noundef %a, ptr %ptr) {
+; CHECK-LABEL: ctest64mi32_zf:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testq %rdi, %rdi
+; CHECK-NEXT: movl $123456, %eax # imm = 0x1E240
+; CHECK-NEXT: ctestnel {dfv=zf} %eax, (%rsi)
+; CHECK-NEXT: jne .LBB17_1
+; CHECK-NEXT: # %bb.2: # %if.then
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: jmp foo # TAILCALL
+; CHECK-NEXT: .LBB17_1: # %if.end
+; CHECK-NEXT: retq
+;
+; NDD-LABEL: ctest64mi32_zf:
+; NDD: # %bb.0: # %entry
+; NDD-NEXT: andl $123456, (%rsi), %eax # imm = 0x1E240
+; NDD-NEXT: testq %rdi, %rdi
+; NDD-NEXT: ctestnel {dfv=zf} %eax, %eax
+; NDD-NEXT: jne .LBB17_1
+; NDD-NEXT: # %bb.2: # %if.then
+; NDD-NEXT: xorl %eax, %eax
+; NDD-NEXT: jmp foo # TAILCALL
+; NDD-NEXT: .LBB17_1: # %if.end
+; NDD-NEXT: retq
+entry:
+ %b = load i64, ptr %ptr
+ %cmp = icmp eq i64 %a, 0
+ %and = and i64 %b, 123456
+ %cmp1 = icmp eq i64 %and, 0
+ %or.cond = or i1 %cmp, %cmp1
+ br i1 %or.cond, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ tail call void (...) @foo()
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ ret void
+}
+
+declare dso_local void @foo(...)
>From 55121ff13a6148d579d6ed12b536b0da832beec4 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Sun, 19 May 2024 00:41:45 +0800
Subject: [PATCH 2/5] add more pattern for ctest
---
.../Target/X86/X86InstrConditionalCompare.td | 17 +++-------
llvm/lib/Target/X86/X86InstrFragments.td | 13 +++++++-
llvm/test/CodeGen/X86/apx/ctest.ll | 31 ++++++-------------
3 files changed, 26 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrConditionalCompare.td b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
index e09a1f50ab9ee..3d296773103b5 100644
--- a/llvm/lib/Target/X86/X86InstrConditionalCompare.td
+++ b/llvm/lib/Target/X86/X86InstrConditionalCompare.td
@@ -146,20 +146,11 @@ def : Pat<(X86ctest GR32:$src1, GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
def : Pat<(X86ctest GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
(CTEST64rr GR64:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
-def : Pat<(X86ctest GR8:$src1, (i8 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+def : Pat<(X86ctestpat GR8:$src1, imm:$src2, timm:$dcf, timm:$cond),
(CTEST8ri GR8:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
-def : Pat<(X86ctest GR16:$src1, (i16 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+def : Pat<(X86ctestpat GR16:$src1, imm:$src2, timm:$dcf, timm:$cond),
(CTEST16ri GR16:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
-def : Pat<(X86ctest GR32:$src1, (i32 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+def : Pat<(X86ctestpat GR32:$src1, imm:$src2, timm:$dcf, timm:$cond),
(CTEST32ri GR32:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
-def : Pat<(X86ctest GR64:$src1, (i64 imm:$src2), timm:$dcf, timm:$cond, EFLAGS),
+def : Pat<(X86ctestpat GR64:$src1, imm:$src2, timm:$dcf, timm:$cond),
(CTEST64ri32 GR64:$src1, imm:$src2, timm:$dcf, timm:$cond)>;
-
-def : Pat<(X86ctest (loadi8 addr:$src1), GR8:$src2, timm:$dcf, timm:$cond, EFLAGS),
- (CTEST8mr addr:$src1, GR8:$src2, timm:$dcf, timm:$cond)>;
-def : Pat<(X86ctest (loadi16 addr:$src1), GR16:$src2, timm:$dcf, timm:$cond, EFLAGS),
- (CTEST16mr addr:$src1, GR16:$src2, timm:$dcf, timm:$cond)>;
-def : Pat<(X86ctest (loadi32 addr:$src1), GR32:$src2, timm:$dcf, timm:$cond, EFLAGS),
- (CTEST32mr addr:$src1, GR32:$src2, timm:$dcf, timm:$cond)>;
-def : Pat<(X86ctest (loadi64 addr:$src1), GR64:$src2, timm:$dcf, timm:$cond, EFLAGS),
- (CTEST64mr addr:$src1, GR64:$src2, timm:$dcf, timm:$cond)>;
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index 664998e76353b..99ada3f711b53 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -583,6 +583,14 @@ def add_su : binop_oneuse<add>;
def and_su : binop_oneuse<and>;
def srl_su : binop_oneuse<srl>;
+class binop_twouses<SDPatternOperator operator>
+ : PatFrag<(ops node:$A, node:$B),
+ (operator node:$A, node:$B), [{
+ return N->hasNUsesOfValue(2, 0);
+}]>;
+
+def and_du : binop_twouses<and>;
+
// unary op with only one user
class unop_oneuse<SDPatternOperator operator>
: PatFrag<(ops node:$A),
@@ -607,7 +615,10 @@ def X86sub_flag_nocf : PatFrag<(ops node:$lhs, node:$rhs),
def X86testpat : PatFrag<(ops node:$lhs, node:$rhs),
(X86cmp (and_su node:$lhs, node:$rhs), 0)>;
-
+def X86ctestpat : PatFrag<(ops node:$lhs, node:$rhs, node:$dcf, node:$cond),
+ (X86ctest (and_du node:$lhs, node:$rhs),
+ (and_du node:$lhs, node:$rhs), node:$dcf,
+ node:$cond, EFLAGS)>;
def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
[(X86strict_fcmp node:$lhs, node:$rhs),
diff --git a/llvm/test/CodeGen/X86/apx/ctest.ll b/llvm/test/CodeGen/X86/apx/ctest.ll
index 9c03570eee308..d0bd43d070795 100644
--- a/llvm/test/CodeGen/X86/apx/ctest.ll
+++ b/llvm/test/CodeGen/X86/apx/ctest.ll
@@ -221,9 +221,8 @@ if.end: ; preds = %entry, %if.then
define void @ctest8ri_zf(i8 noundef %a, i8 noundef %b) {
; CHECK-LABEL: ctest8ri_zf:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andb $123, %sil
; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: ctestneb {dfv=zf} %sil, %sil
+; CHECK-NEXT: ctestneb {dfv=zf} $123, %sil
; CHECK-NEXT: jne .LBB6_1
; CHECK-NEXT: # %bb.2: # %if.then
; CHECK-NEXT: xorl %eax, %eax
@@ -233,9 +232,8 @@ define void @ctest8ri_zf(i8 noundef %a, i8 noundef %b) {
;
; NDD-LABEL: ctest8ri_zf:
; NDD: # %bb.0: # %entry
-; NDD-NEXT: andb $123, %sil, %al
; NDD-NEXT: testb %dil, %dil
-; NDD-NEXT: ctestneb {dfv=zf} %al, %al
+; NDD-NEXT: ctestneb {dfv=zf} $123, %sil
; NDD-NEXT: jne .LBB6_1
; NDD-NEXT: # %bb.2: # %if.then
; NDD-NEXT: xorl %eax, %eax
@@ -299,9 +297,8 @@ if.end: ; preds = %entry, %if.then
define void @ctest32ri_zf(i32 noundef %a, i32 noundef %b) {
; CHECK-LABEL: ctest32ri_zf:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andl $12345, %esi # imm = 0x3039
; CHECK-NEXT: testl %edi, %edi
-; CHECK-NEXT: ctestnel {dfv=zf} %esi, %esi
+; CHECK-NEXT: ctestnel {dfv=zf} $12345, %esi # imm = 0x3039
; CHECK-NEXT: jne .LBB8_1
; CHECK-NEXT: # %bb.2: # %if.then
; CHECK-NEXT: xorl %eax, %eax
@@ -311,9 +308,8 @@ define void @ctest32ri_zf(i32 noundef %a, i32 noundef %b) {
;
; NDD-LABEL: ctest32ri_zf:
; NDD: # %bb.0: # %entry
-; NDD-NEXT: andl $12345, %esi, %eax # imm = 0x3039
; NDD-NEXT: testl %edi, %edi
-; NDD-NEXT: ctestnel {dfv=zf} %eax, %eax
+; NDD-NEXT: ctestnel {dfv=zf} $12345, %esi # imm = 0x3039
; NDD-NEXT: jne .LBB8_1
; NDD-NEXT: # %bb.2: # %if.then
; NDD-NEXT: xorl %eax, %eax
@@ -338,9 +334,8 @@ if.end: ; preds = %entry, %if.then
define void @ctest64ri32_zf(i64 noundef %a, i64 noundef %b) {
; CHECK-LABEL: ctest64ri32_zf:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: andl $123456, %esi # imm = 0x1E240
; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: ctestneq {dfv=zf} %rsi, %rsi
+; CHECK-NEXT: ctestneq {dfv=zf} $123456, %rsi # imm = 0x1E240
; CHECK-NEXT: jne .LBB9_1
; CHECK-NEXT: # %bb.2: # %if.then
; CHECK-NEXT: xorl %eax, %eax
@@ -350,9 +345,8 @@ define void @ctest64ri32_zf(i64 noundef %a, i64 noundef %b) {
;
; NDD-LABEL: ctest64ri32_zf:
; NDD: # %bb.0: # %entry
-; NDD-NEXT: andl $123456, %esi, %eax # imm = 0x1E240
; NDD-NEXT: testq %rdi, %rdi
-; NDD-NEXT: ctestneq {dfv=zf} %rax, %rax
+; NDD-NEXT: ctestneq {dfv=zf} $123456, %rsi # imm = 0x1E240
; NDD-NEXT: jne .LBB9_1
; NDD-NEXT: # %bb.2: # %if.then
; NDD-NEXT: xorl %eax, %eax
@@ -533,10 +527,8 @@ if.end: ; preds = %entry, %if.then
define void @ctest8mi_zf(i8 noundef %a, ptr %ptr) {
; CHECK-LABEL: ctest8mi_zf:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movzbl (%rsi), %eax
-; CHECK-NEXT: andb $123, %al
; CHECK-NEXT: testb %dil, %dil
-; CHECK-NEXT: ctestneb {dfv=zf} %al, %al
+; CHECK-NEXT: ctestneb {dfv=zf} $123, (%rsi)
; CHECK-NEXT: jne .LBB14_1
; CHECK-NEXT: # %bb.2: # %if.then
; CHECK-NEXT: xorl %eax, %eax
@@ -546,9 +538,8 @@ define void @ctest8mi_zf(i8 noundef %a, ptr %ptr) {
;
; NDD-LABEL: ctest8mi_zf:
; NDD: # %bb.0: # %entry
-; NDD-NEXT: andb $123, (%rsi), %al
; NDD-NEXT: testb %dil, %dil
-; NDD-NEXT: ctestneb {dfv=zf} %al, %al
+; NDD-NEXT: ctestneb {dfv=zf} $123, (%rsi)
; NDD-NEXT: jne .LBB14_1
; NDD-NEXT: # %bb.2: # %if.then
; NDD-NEXT: xorl %eax, %eax
@@ -659,8 +650,7 @@ define void @ctest64mi32_zf(i64 noundef %a, ptr %ptr) {
; CHECK-LABEL: ctest64mi32_zf:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: testq %rdi, %rdi
-; CHECK-NEXT: movl $123456, %eax # imm = 0x1E240
-; CHECK-NEXT: ctestnel {dfv=zf} %eax, (%rsi)
+; CHECK-NEXT: ctestnel {dfv=zf} $123456, (%rsi) # imm = 0x1E240
; CHECK-NEXT: jne .LBB17_1
; CHECK-NEXT: # %bb.2: # %if.then
; CHECK-NEXT: xorl %eax, %eax
@@ -670,9 +660,8 @@ define void @ctest64mi32_zf(i64 noundef %a, ptr %ptr) {
;
; NDD-LABEL: ctest64mi32_zf:
; NDD: # %bb.0: # %entry
-; NDD-NEXT: andl $123456, (%rsi), %eax # imm = 0x1E240
; NDD-NEXT: testq %rdi, %rdi
-; NDD-NEXT: ctestnel {dfv=zf} %eax, %eax
+; NDD-NEXT: ctestnel {dfv=zf} $123456, (%rsi) # imm = 0x1E240
; NDD-NEXT: jne .LBB17_1
; NDD-NEXT: # %bb.2: # %if.then
; NDD-NEXT: xorl %eax, %eax
>From 9cfd8e14852e1416c8a743b1e96e9d793baed00d Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Sun, 19 May 2024 11:16:33 +0800
Subject: [PATCH 3/5] rename function
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
llvm/lib/Target/X86/X86InstrInfo.cpp | 2 +-
llvm/lib/Target/X86/X86InstrInfo.h | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2b404619c93e1..98b0c967c06a6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54643,7 +54643,7 @@ static SDValue combineX86SubCmpToCcmpCtestHelper(
X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
SDValue CFlags = DAG.getTargetConstant(
- X86::getCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
+ X86::getCCMPCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
SDValue CCMP = (NewOpc == X86ISD::CCMP)
? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
{Sub.getOperand(0), Sub.getOperand(1),
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index c7016ef67a1c8..7d05f950b6fe9 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -3164,7 +3164,7 @@ X86::CondCode X86::getCondFromCCMP(const MachineInstr &MI) {
: X86::COND_INVALID;
}
-int X86::getCondFlagsFromCondCode(X86::CondCode CC) {
+int X86::getCCMPCondFlagsFromCondCode(X86::CondCode CC) {
// CCMP/CTEST has two conditional operands:
// - SCC: source conditonal code (same as CMOV)
// - DCF: destination conditional flags, which has 4 valid bits
diff --git a/llvm/lib/Target/X86/X86InstrInfo.h b/llvm/lib/Target/X86/X86InstrInfo.h
index da27027fc23a0..295fac60c6e40 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.h
+++ b/llvm/lib/Target/X86/X86InstrInfo.h
@@ -75,7 +75,7 @@ CondCode getCondFromCFCMov(const MachineInstr &MI);
CondCode getCondFromCCMP(const MachineInstr &MI);
// Turn condition code into condition flags for CCMP/CTEST.
-int getCondFlagsFromCondCode(CondCode CC);
+int getCCMPCondFlagsFromCondCode(CondCode CC);
/// GetOppositeBranchCondition - Return the inverse of the specified cond,
/// e.g. turning COND_E to COND_NE.
>From e598224251a50cbfaf1e8566c88081d281e0c28e Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Mon, 20 May 2024 14:50:25 +0800
Subject: [PATCH 4/5] simplify code
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 195 +++++++++---------------
1 file changed, 68 insertions(+), 127 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 98b0c967c06a6..a8618234e9574 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54610,68 +54610,10 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
return true;
}
-static SDValue combineX86SubCmpToCcmpCtestHelper(
- SDNode *N, SDValue Flag, SDValue SetCC0, SDValue SetCC1, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI, unsigned NewOpc) {
- SDValue LHS = N->getOperand(0);
- SDValue Sub = SetCC1.getOperand(1);
-
- SDNode *BrCond = *Flag->uses().begin();
- if (BrCond->getOpcode() != X86ISD::BRCOND)
- return SDValue();
- unsigned CondNo = 2;
- if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
- X86::COND_NE)
- return SDValue();
-
- X86::CondCode CC0 =
- static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
- // CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
- if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
- return SDValue();
-
- bool IsOR = LHS.getOpcode() == ISD::OR;
-
- SDValue SCC =
- IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
- SDLoc(SetCC0.getOperand(0)), MVT::i8)
- : SetCC0.getOperand(0);
-
- SDValue CC1N = SetCC1.getOperand(0);
- X86::CondCode CC1 =
- static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
- X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
- X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
- SDValue CFlags = DAG.getTargetConstant(
- X86::getCCMPCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
- SDValue CCMP = (NewOpc == X86ISD::CCMP)
- ? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
- {Sub.getOperand(0), Sub.getOperand(1),
- CFlags, SCC, SetCC0.getOperand(1)})
- : DAG.getNode(X86ISD::CTEST, SDLoc(N), Flag.getValueType(),
- {Sub.getOperand(0), Sub.getOperand(0),
- CFlags, SCC, SetCC0.getOperand(1)});
- DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
-
- SmallVector<SDValue> Ops(BrCond->op_values());
- if (isNullConstant(N->getOperand(1)) && Ops[CondNo] != CC1N)
- Ops[CondNo] = CC1N;
- else if (isOneConstant(N->getOperand(1)))
- Ops[CondNo] = DAG.getTargetConstant(OppositeCC1, SDLoc(BrCond), MVT::i8);
-
- SDValue NewBrCond =
- DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
- if (BrCond != NewBrCond.getNode()) {
- DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
- DCI.recursivelyDeleteUnusedNodes(BrCond);
- }
- return CCMP;
-}
-
-static SDValue combineX86SubCmpToCcmp(SDNode *N, SDValue Flag,
- SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &ST) {
+static SDValue combineX86SubCmpToCcmpCtest(SDNode *N, SDValue Flag,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &ST) {
// cmp(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 0)
// brcond ne
//
@@ -54688,37 +54630,7 @@ static SDValue combineX86SubCmpToCcmp(SDNode *N, SDValue Flag,
//
// ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
// brcond ~cc1
- //
- // if only flag has users, where cflags is determined by cc1.
-
- SDValue LHS = N->getOperand(0);
-
- if (!ST.hasCCMP() ||
- (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
- !Flag.hasOneUse())
- return SDValue();
- SDValue SetCC0 = LHS.getOperand(0);
- SDValue SetCC1 = LHS.getOperand(1);
- if (SetCC0.getOpcode() != X86ISD::SETCC ||
- SetCC1.getOpcode() != X86ISD::SETCC)
- return SDValue();
-
- // and/or is commutable. Try to commute the operands and then test again.
- if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB) {
- std::swap(SetCC0, SetCC1);
- if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB)
- return SDValue();
- }
-
- return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
- X86ISD::CCMP);
-}
-
-static SDValue combineX86SubCmpToCtest(SDNode *N, SDValue Flag,
- SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &ST) {
// cmp(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 0)
// brcond ne
//
@@ -54735,7 +54647,7 @@ static SDValue combineX86SubCmpToCtest(SDNode *N, SDValue Flag,
//
// ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
// brcond ~cc1
- //
+
// if only flag has users, where cflags is determined by cc1.
SDValue LHS = N->getOperand(0);
@@ -54751,42 +54663,75 @@ static SDValue combineX86SubCmpToCtest(SDNode *N, SDValue Flag,
SetCC1.getOpcode() != X86ISD::SETCC)
return SDValue();
- auto IsOp1CmpZero = [&](SDValue V) {
+ auto GetCombineToOpc = [&](SDValue V) {
SDValue Op = V.getOperand(1);
- return Op.getOpcode() == X86ISD::CMP && isNullConstant(Op.getOperand(1));
+ unsigned Opc = Op.getOpcode();
+ return (Opc == X86ISD::SUB) ? X86ISD::CCMP
+ : (Opc == X86ISD::CMP && isNullConstant(Op.getOperand(1)))
+ ? X86ISD::CTEST
+ : 0U;
};
+
+ unsigned NewOpc = 0;
+
// and/or is commutable. Try to commute the operands and then test again.
- if (!IsOp1CmpZero(SetCC1)) {
+ if (!(NewOpc = GetCombineToOpc(SetCC1))) {
std::swap(SetCC0, SetCC1);
- if (!IsOp1CmpZero(SetCC1))
+ if (!(NewOpc = GetCombineToOpc(SetCC1)))
return SDValue();
}
- return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
- X86ISD::CTEST);
-}
-
-static bool isOnlyFlagUsedX86SubOne(SDNode *N) {
- return N->getOpcode() == X86ISD::SUB && isOneConstant(N->getOperand(1)) &&
- !N->hasAnyUseOfValue(0);
-}
+ SDValue Sub = SetCC1.getOperand(1);
+ SDNode *BrCond = *Flag->uses().begin();
+ if (BrCond->getOpcode() != X86ISD::BRCOND)
+ return SDValue();
+ unsigned CondNo = 2;
+ if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
+ X86::COND_NE)
+ return SDValue();
-static SDValue combineX86SubToCcmp(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &ST) {
- if (!isOnlyFlagUsedX86SubOne(N))
+ X86::CondCode CC0 =
+ static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
+ // CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
+ if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
return SDValue();
- return combineX86SubCmpToCcmp(N, SDValue(N, 1), DAG, DCI, ST);
-}
+ bool IsOR = LHS.getOpcode() == ISD::OR;
-static SDValue combineX86SubToCtest(SDNode *N, SelectionDAG &DAG,
- TargetLowering::DAGCombinerInfo &DCI,
- const X86Subtarget &ST) {
- if (!isOnlyFlagUsedX86SubOne(N))
- return SDValue();
+ SDValue SCC =
+ IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
+ SDLoc(SetCC0.getOperand(0)), MVT::i8)
+ : SetCC0.getOperand(0);
+
+ SDValue CC1N = SetCC1.getOperand(0);
+ X86::CondCode CC1 =
+ static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
+ X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
+ X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
+ SDValue CFlags = DAG.getTargetConstant(
+ X86::getCCMPCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
+ SDValue CCMP = (NewOpc == X86ISD::CCMP)
+ ? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
+ {Sub.getOperand(0), Sub.getOperand(1),
+ CFlags, SCC, SetCC0.getOperand(1)})
+ : DAG.getNode(X86ISD::CTEST, SDLoc(N), Flag.getValueType(),
+ {Sub.getOperand(0), Sub.getOperand(0),
+ CFlags, SCC, SetCC0.getOperand(1)});
+ DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
- return combineX86SubCmpToCtest(N, SDValue(N, 1), DAG, DCI, ST);
+ SmallVector<SDValue> Ops(BrCond->op_values());
+ if (isNullConstant(N->getOperand(1)) && Ops[CondNo] != CC1N)
+ Ops[CondNo] = CC1N;
+ else if (isOneConstant(N->getOperand(1)))
+ Ops[CondNo] = DAG.getTargetConstant(OppositeCC1, SDLoc(BrCond), MVT::i8);
+
+ SDValue NewBrCond =
+ DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
+ if (BrCond != NewBrCond.getNode()) {
+ DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
+ DCI.recursivelyDeleteUnusedNodes(BrCond);
+ }
+ return CCMP;
}
static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
@@ -54806,13 +54751,9 @@ static SDValue combineCMP(SDNode *N, SelectionDAG &DAG,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (SDValue CCMP =
- combineX86SubCmpToCcmp(N, SDValue(N, 0), DAG, DCI, Subtarget))
+ combineX86SubCmpToCcmpCtest(N, SDValue(N, 0), DAG, DCI, Subtarget))
return CCMP;
- if (SDValue CTEST =
- combineX86SubCmpToCtest(N, SDValue(N, 0), DAG, DCI, Subtarget))
- return CTEST;
-
// If we have a constant logical shift that's only used in a comparison
// against zero turn it into an equivalent AND. This allows turning it into
// a TEST instruction later.
@@ -54953,11 +54894,11 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
bool IsSub = X86ISD::SUB == N->getOpcode();
unsigned GenericOpc = IsSub ? ISD::SUB : ISD::ADD;
- if (SDValue CCMP = combineX86SubToCcmp(N, DAG, DCI, ST))
- return CCMP;
-
- if (SDValue CTEST = combineX86SubToCtest(N, DAG, DCI, ST))
- return CTEST;
+ if (IsSub && isOneConstant(N->getOperand(1)) && !N->hasAnyUseOfValue(0)) {
+ if (SDValue CCMP =
+ combineX86SubCmpToCcmpCtest(N, SDValue(N, 1), DAG, DCI, ST))
+ return CCMP;
+ }
// If we don't use the flag result, simplify back to a generic ADD/SUB.
if (!N->hasAnyUseOfValue(1)) {
>From 8213c9caa412f46417b6217e0335d87d8607861a Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan at intel.com>
Date: Tue, 21 May 2024 11:25:18 +0800
Subject: [PATCH 5/5] address review comment: refine comments
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 60 ++++++++++++++-----------
1 file changed, 33 insertions(+), 27 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a8618234e9574..1fbe8ece40c3f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54616,38 +54616,28 @@ static SDValue combineX86SubCmpToCcmpCtest(SDNode *N, SDValue Flag,
const X86Subtarget &ST) {
// cmp(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 0)
// brcond ne
- //
// ->
- //
- // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
- // brcond cc1
- //
+ // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond cc1
//
// sub(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
// brcond ne
+ // ->
+ // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond ~cc1
//
- // ->
- //
- // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
- // brcond ~cc1
-
// cmp(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 0)
// brcond ne
- //
// ->
- //
- // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
- // brcond cc1
- //
+ // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond cc1
//
// sub(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 1)
// brcond ne
- //
// ->
+ // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond ~cc1
//
- // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
- // brcond ~cc1
-
// if only flag has users, where cflags is determined by cc1.
SDValue LHS = N->getOperand(0);
@@ -54663,24 +54653,27 @@ static SDValue combineX86SubCmpToCcmpCtest(SDNode *N, SDValue Flag,
SetCC1.getOpcode() != X86ISD::SETCC)
return SDValue();
- auto GetCombineToOpc = [&](SDValue V) {
+ auto GetCombineToOpc = [&](SDValue V) -> unsigned {
SDValue Op = V.getOperand(1);
unsigned Opc = Op.getOpcode();
- return (Opc == X86ISD::SUB) ? X86ISD::CCMP
- : (Opc == X86ISD::CMP && isNullConstant(Op.getOperand(1)))
- ? X86ISD::CTEST
- : 0U;
+ if (Opc == X86ISD::SUB)
+ return X86ISD::CCMP;
+ if (Opc == X86ISD::CMP && isNullConstant(Op.getOperand(1)))
+ return X86ISD::CTEST;
+ return 0U;
};
unsigned NewOpc = 0;
- // and/or is commutable. Try to commute the operands and then test again.
+ // AND/OR is commutable. Canonicalize the operands to make SETCC with SUB/CMP
+ // appear on the right.
if (!(NewOpc = GetCombineToOpc(SetCC1))) {
std::swap(SetCC0, SetCC1);
if (!(NewOpc = GetCombineToOpc(SetCC1)))
return SDValue();
}
+ // Check the only user of flag is `brcond ne`.
SDValue Sub = SetCC1.getOperand(1);
SDNode *BrCond = *Flag->uses().begin();
if (BrCond->getOpcode() != X86ISD::BRCOND)
@@ -54698,11 +54691,13 @@ static SDValue combineX86SubCmpToCcmpCtest(SDNode *N, SDValue Flag,
bool IsOR = LHS.getOpcode() == ISD::OR;
+ // CMP/TEST is executed and updates the EFLAGS normally only when SCC
+ // evaluates to true. So we need to inverse CC0 as SCC when the logic operator
+ // is OR. Similar for CC1.
SDValue SCC =
IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
SDLoc(SetCC0.getOperand(0)), MVT::i8)
: SetCC0.getOperand(0);
-
SDValue CC1N = SetCC1.getOperand(0);
X86::CondCode CC1 =
static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
@@ -54710,6 +54705,9 @@ static SDValue combineX86SubCmpToCcmpCtest(SDNode *N, SDValue Flag,
X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
SDValue CFlags = DAG.getTargetConstant(
X86::getCCMPCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
+
+ // Replace any uses of the old flag produced by SUB/CMP with the new one
+ // produced by CCMP/CTEST.
SDValue CCMP = (NewOpc == X86ISD::CCMP)
? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
{Sub.getOperand(0), Sub.getOperand(1),
@@ -54717,16 +54715,24 @@ static SDValue combineX86SubCmpToCcmpCtest(SDNode *N, SDValue Flag,
: DAG.getNode(X86ISD::CTEST, SDLoc(N), Flag.getValueType(),
{Sub.getOperand(0), Sub.getOperand(0),
CFlags, SCC, SetCC0.getOperand(1)});
+ // Replace API is called manually here b/c the number of results may change.
DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
+ // Update CC for the consumer of the flag.
+ // The old CC is `ne`. Hence, when comparing the result with 0, we are
+ // checking if the second condition evaluates to true. When comparing the
+ // result with 1, we are checking uf the second condition evaluates to false.
SmallVector<SDValue> Ops(BrCond->op_values());
- if (isNullConstant(N->getOperand(1)) && Ops[CondNo] != CC1N)
+ if (isNullConstant(N->getOperand(1)))
Ops[CondNo] = CC1N;
else if (isOneConstant(N->getOperand(1)))
Ops[CondNo] = DAG.getTargetConstant(OppositeCC1, SDLoc(BrCond), MVT::i8);
SDValue NewBrCond =
DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
+ // Avoid self-assign error b/c CC1 can be `e/ne`.
+ // Replace API is called manually here b/c we're updating the user of the node
+ // being visited instead of the node itself.
if (BrCond != NewBrCond.getNode()) {
DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
DCI.recursivelyDeleteUnusedNodes(BrCond);
More information about the llvm-commits
mailing list