[llvm] [X86][CodeGen] Support lowering for CCMP/CTEST (PR #91747)
Shengchen Kan via llvm-commits
llvm-commits at lists.llvm.org
Sat May 18 20:27:01 PDT 2024
================
@@ -54605,7 +54610,187 @@ static bool onlyZeroFlagUsed(SDValue Flags) {
return true;
}
+static SDValue combineX86SubCmpToCcmpCtestHelper(
+ SDNode *N, SDValue Flag, SDValue SetCC0, SDValue SetCC1, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI, unsigned NewOpc) {
+ SDValue LHS = N->getOperand(0);
+ SDValue Sub = SetCC1.getOperand(1);
+
+ SDNode *BrCond = *Flag->uses().begin();
+ if (BrCond->getOpcode() != X86ISD::BRCOND)
+ return SDValue();
+ unsigned CondNo = 2;
+ if (static_cast<X86::CondCode>(BrCond->getConstantOperandVal(CondNo)) !=
+ X86::COND_NE)
+ return SDValue();
+
+ X86::CondCode CC0 =
+ static_cast<X86::CondCode>(SetCC0.getConstantOperandVal(0));
+ // CCMP/CTEST is not conditional when the source condition is COND_P/COND_NP.
+ if (CC0 == X86::COND_P || CC0 == X86::COND_NP)
+ return SDValue();
+
+ bool IsOR = LHS.getOpcode() == ISD::OR;
+
+ SDValue SCC =
+ IsOR ? DAG.getTargetConstant(X86::GetOppositeBranchCondition(CC0),
+ SDLoc(SetCC0.getOperand(0)), MVT::i8)
+ : SetCC0.getOperand(0);
+
+ SDValue CC1N = SetCC1.getOperand(0);
+ X86::CondCode CC1 =
+ static_cast<X86::CondCode>(CC1N->getAsAPIntVal().getSExtValue());
+ X86::CondCode OppositeCC1 = X86::GetOppositeBranchCondition(CC1);
+ X86::CondCode CFlagsCC = IsOR ? CC1 : OppositeCC1;
+ SDValue CFlags = DAG.getTargetConstant(
+ X86::getCondFlagsFromCondCode(CFlagsCC), SDLoc(BrCond), MVT::i8);
+ SDValue CCMP = (NewOpc == X86ISD::CCMP)
+ ? DAG.getNode(X86ISD::CCMP, SDLoc(N), Flag.getValueType(),
+ {Sub.getOperand(0), Sub.getOperand(1),
+ CFlags, SCC, SetCC0.getOperand(1)})
+ : DAG.getNode(X86ISD::CTEST, SDLoc(N), Flag.getValueType(),
+ {Sub.getOperand(0), Sub.getOperand(0),
+ CFlags, SCC, SetCC0.getOperand(1)});
+ DAG.ReplaceAllUsesOfValueWith(Flag, CCMP);
+
+ SmallVector<SDValue> Ops(BrCond->op_values());
+ if (isNullConstant(N->getOperand(1)) && Ops[CondNo] != CC1N)
+ Ops[CondNo] = CC1N;
+ else if (isOneConstant(N->getOperand(1)))
+ Ops[CondNo] = DAG.getTargetConstant(OppositeCC1, SDLoc(BrCond), MVT::i8);
+
+ SDValue NewBrCond =
+ DAG.getNode(X86ISD::BRCOND, SDLoc(BrCond), BrCond->getValueType(0), Ops);
+ if (BrCond != NewBrCond.getNode()) {
+ DAG.ReplaceAllUsesWith(BrCond, &NewBrCond);
+ DCI.recursivelyDeleteUnusedNodes(BrCond);
+ }
+ return CCMP;
+}
+
+static SDValue combineX86SubCmpToCcmp(SDNode *N, SDValue Flag,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &ST) {
+ // cmp(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 0)
+ // brcond ne
+ //
+ // ->
+ //
+ // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond cc1
+ //
+ //
+ // sub(and/or(setcc(cc0, flag0), setcc(cc1, sub (X, Y))), 1)
+ // brcond ne
+ //
+ // ->
+ //
+ // ccmp(X, Y, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond ~cc1
+ //
+ // if only flag has users, where cflags is determined by cc1.
+
+ SDValue LHS = N->getOperand(0);
+
+ if (!ST.hasCCMP() ||
+ (LHS.getOpcode() != ISD::AND && LHS.getOpcode() != ISD::OR) ||
+ !Flag.hasOneUse())
+ return SDValue();
+
+ SDValue SetCC0 = LHS.getOperand(0);
+ SDValue SetCC1 = LHS.getOperand(1);
+ if (SetCC0.getOpcode() != X86ISD::SETCC ||
+ SetCC1.getOpcode() != X86ISD::SETCC)
+ return SDValue();
+
+ // and/or is commutable. Try to commute the operands and then test again.
+ if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB) {
+ std::swap(SetCC0, SetCC1);
+ if (SetCC1.getOperand(1).getOpcode() != X86ISD::SUB)
+ return SDValue();
+ }
+
+ return combineX86SubCmpToCcmpCtestHelper(N, Flag, SetCC0, SetCC1, DAG, DCI,
+ X86ISD::CCMP);
+}
+
+static SDValue combineX86SubCmpToCtest(SDNode *N, SDValue Flag,
+ SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const X86Subtarget &ST) {
+ // cmp(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 0)
+ // brcond ne
+ //
+ // ->
+ //
+ // ctest(X, X, cflags/~cflags, cc0/~cc0, flag0)
+ // brcond cc1
+ //
+ //
+ // sub(and/or(setcc(cc0, flag0), setcc(cc1, cmp (X, 0))), 1)
----------------
KanRobert wrote:
No. It gets lowered to `sub` and may then be optimized to `cmp` in peephole for machine cse.
https://github.com/llvm/llvm-project/pull/91747
More information about the llvm-commits
mailing list