[llvm] [AMDGPU] Promote uniform ops to I32 in DAGISel (PR #106383)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 15 05:29:07 PDT 2024
================
@@ -6733,6 +6735,107 @@ SDValue SITargetLowering::lowerFLDEXP(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FLDEXP, DL, VT, Op.getOperand(0), TruncExp);
}
+static unsigned getExtOpcodeForPromotedOp(SDValue Op) {
+ switch (Op->getOpcode()) {
+ case ISD::SRA:
+ case ISD::SMIN:
+ case ISD::SMAX:
+ return ISD::SIGN_EXTEND;
+ case ISD::SRL:
+ case ISD::UMIN:
+ case ISD::UMAX:
+ return ISD::ZERO_EXTEND;
+ case ISD::ADD:
+ case ISD::SUB:
+ case ISD::AND:
+ case ISD::OR:
+ case ISD::XOR:
+ case ISD::SHL:
+ case ISD::SELECT:
+ case ISD::MUL:
+ // operation result won't be influenced by garbage high bits.
+ // TODO: are all of those cases correct, and are there more?
+ return ISD::ANY_EXTEND;
+ case ISD::SETCC: {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ return ISD::isSignedIntSetCC(CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
+ }
+ default:
+ llvm_unreachable("unexpected opcode!");
+ }
+}
+
+SDValue SITargetLowering::promoteUniformOpToI32(SDValue Op,
+ DAGCombinerInfo &DCI) const {
+ const unsigned Opc = Op.getOpcode();
+ assert(Opc == ISD::ADD || Opc == ISD::SUB || Opc == ISD::SHL ||
+ Opc == ISD::SRL || Opc == ISD::SRA || Opc == ISD::AND ||
+ Opc == ISD::OR || Opc == ISD::XOR || Opc == ISD::MUL ||
+ Opc == ISD::SETCC || Opc == ISD::SELECT || Opc == ISD::SMIN ||
+ Opc == ISD::SMAX || Opc == ISD::UMIN || Opc == ISD::UMAX);
+
+ EVT OpTy = (Opc != ISD::SETCC) ? Op.getValueType()
+ : Op->getOperand(0).getValueType();
+
+ if (DCI.isBeforeLegalizeOps())
+ return SDValue();
+
+ // Promote only if:
+ // - We have 16 bit insts (not true 16 bit insts).
+ // - We don't have packed instructions (for vector types only).
+ // TODO: For vector types, the set of packed operations is more limited, so
+ // may want to promote some anyway.
+ if (!Subtarget->has16BitInsts() ||
+ (OpTy.isVector() ? Subtarget->hasVOP3PInsts() : false))
+ return SDValue();
+
+ // Promote uniform scalar and vector integers between 2 and 16 bits.
+ if (Op->isDivergent() || !OpTy.isInteger() ||
+ OpTy.getScalarSizeInBits() == 1 || OpTy.getScalarSizeInBits() > 16)
+ return SDValue();
+
+ auto &DAG = DCI.DAG;
+
+ SDLoc DL(Op);
+ SDValue LHS;
+ SDValue RHS;
+ if (Opc == ISD::SELECT) {
+ LHS = Op->getOperand(1);
+ RHS = Op->getOperand(2);
+ } else {
+ LHS = Op->getOperand(0);
+ RHS = Op->getOperand(1);
+ }
+
+ auto ExtTy = OpTy.changeElementType(MVT::i32);
+
+ const unsigned ExtOp = getExtOpcodeForPromotedOp(Op);
+ LHS = DAG.getNode(ExtOp, DL, ExtTy, {LHS});
+
+ // Special case: for shifts, the RHS always needs a zext.
+ if (Op.getOpcode() == ISD::SRA || Op.getOpcode() == ISD::SRL ||
+ Op.getOpcode() == ISD::SRA)
+ RHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtTy, {RHS});
+ else
+ RHS = DAG.getNode(ExtOp, DL, ExtTy, {RHS});
+
+ // setcc always return i1/i1 vec so no need to truncate after.
+ if (Opc == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ return DAG.getSetCC(DL, Op.getValueType(), LHS, RHS, CC);
+ }
+
+ // For other ops, we extend the operation's return type as well so we need to
+ // truncate back to the original type.
+ SDValue NewVal;
+ if (Opc == ISD::SELECT)
+ NewVal = DAG.getSelect(DL, ExtTy, Op->getOperand(0), LHS, RHS);
----------------
arsenm wrote:
Should use getNode(ISD::SELECT, getSelect may swap you to a VSELECT depending on the type
https://github.com/llvm/llvm-project/pull/106383
More information about the llvm-commits
mailing list