[llvm] r290309 - AMDGPU: Move combines into separate functions
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 21 19:44:42 PST 2016
Author: arsenm
Date: Wed Dec 21 21:44:42 2016
New Revision: 290309
URL: http://llvm.org/viewvc/llvm-project?rev=290309&view=rev
Log:
AMDGPU: Move combines into separate functions
Modified:
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=290309&r1=290308&r2=290309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Dec 21 21:44:42 2016
@@ -3452,6 +3452,27 @@ SDValue SITargetLowering::performSHLPtrC
return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
}
+SDValue SITargetLowering::performMemSDNodeCombine(MemSDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue Ptr = N->getBasePtr();
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+
+ // TODO: We could also do this for multiplies.
+ unsigned AS = N->getAddressSpace();
+ if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
+ SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
+ if (NewPtr) {
+ SmallVector<SDValue, 8> NewOps(N->op_begin(), N->op_end());
+
+ NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
+ return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
+ }
+ }
+
+ return SDValue();
+}
+
static bool bitOpWithConstantIsReducible(unsigned Opc, uint32_t Val) {
return (Opc == ISD::AND && (Val == 0 || Val == 0xffffffff)) ||
(Opc == ISD::OR && (Val == 0xffffffff || Val == 0)) ||
@@ -3850,6 +3871,97 @@ SDValue SITargetLowering::performMinMaxC
return SDValue();
}
+SDValue SITargetLowering::performFAddCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT == MVT::f64)
+ return SDValue();
+
+ assert(!VT.isVector());
+
+ // Only do this if we are not trying to support denormals. v_mad_f32 does
+ // not support denormals ever.
+ if ((VT == MVT::f32 && Subtarget->hasFP32Denormals()) ||
+ (VT == MVT::f16 && Subtarget->hasFP16Denormals()))
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+
+ // These should really be instruction patterns, but writing patterns with
+ // source modiifiers is a pain.
+
+ // fadd (fadd (a, a), b) -> mad 2.0, a, b
+ if (LHS.getOpcode() == ISD::FADD) {
+ SDValue A = LHS.getOperand(0);
+ if (A == LHS.getOperand(1)) {
+ const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
+ return DAG.getNode(ISD::FMAD, SL, VT, Two, A, RHS);
+ }
+ }
+
+ // fadd (b, fadd (a, a)) -> mad 2.0, a, b
+ if (RHS.getOpcode() == ISD::FADD) {
+ SDValue A = RHS.getOperand(0);
+ if (A == RHS.getOperand(1)) {
+ const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
+ return DAG.getNode(ISD::FMAD, SL, VT, Two, A, LHS);
+ }
+ }
+
+ return SDValue();
+}
+
+SDValue SITargetLowering::performFSubCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDLoc SL(N);
+ EVT VT = N->getValueType(0);
+ assert(!VT.isVector());
+
+ // Try to get the fneg to fold into the source modifier. This undoes generic
+ // DAG combines and folds them into the mad.
+ //
+ // Only do this if we are not trying to support denormals. v_mad_f32 does
+ // not support denormals ever.
+ if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
+ (VT == MVT::f16 && !Subtarget->hasFP16Denormals())) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ if (LHS.getOpcode() == ISD::FADD) {
+ // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
+
+ SDValue A = LHS.getOperand(0);
+ if (A == LHS.getOperand(1)) {
+ const SDValue Two = DAG.getConstantFP(2.0, SL, VT);
+ SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS);
+
+ return DAG.getNode(ISD::FMAD, SL, VT, Two, A, NegRHS);
+ }
+ }
+
+ if (RHS.getOpcode() == ISD::FADD) {
+ // (fsub c, (fadd a, a)) -> mad -2.0, a, c
+
+ SDValue A = RHS.getOperand(0);
+ if (A == RHS.getOperand(1)) {
+ const SDValue NegTwo = DAG.getConstantFP(-2.0, SL, VT);
+ return DAG.getNode(ISD::FMAD, SL, VT, NegTwo, A, LHS);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
SDValue SITargetLowering::performSetCCCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -3882,14 +3994,59 @@ SDValue SITargetLowering::performSetCCCo
return SDValue();
}
-SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
- DAGCombinerInfo &DCI) const {
+SDValue SITargetLowering::performCvtF32UByteNCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
- SDLoc DL(N);
+ SDLoc SL(N);
+ unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
+
+ SDValue Src = N->getOperand(0);
+ SDValue Srl = N->getOperand(0);
+ if (Srl.getOpcode() == ISD::ZERO_EXTEND)
+ Srl = Srl.getOperand(0);
+
+ // TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero.
+ if (Srl.getOpcode() == ISD::SRL) {
+ // cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x
+ // cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
+ // cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
+
+ if (const ConstantSDNode *C =
+ dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
+ Srl = DAG.getZExtOrTrunc(Srl.getOperand(0), SDLoc(Srl.getOperand(0)),
+ EVT(MVT::i32));
+
+ unsigned SrcOffset = C->getZExtValue() + 8 * Offset;
+ if (SrcOffset < 32 && SrcOffset % 8 == 0) {
+ return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, SL,
+ MVT::f32, Srl);
+ }
+ }
+ }
+
+ APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
+
+ APInt KnownZero, KnownOne;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
+ TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
+ DCI.CommitTargetLoweringOpt(TLO);
+ }
+
+ return SDValue();
+}
+SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default:
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
+ case ISD::FADD:
+ return performFAddCombine(N, DCI);
+ case ISD::FSUB:
+ return performFSubCombine(N, DCI);
case ISD::SETCC:
return performSetCCCombine(N, DCI);
case ISD::FMAXNUM:
@@ -3906,139 +4063,6 @@ SDValue SITargetLowering::PerformDAGComb
return performMinMaxCombine(N, DCI);
break;
}
-
- case AMDGPUISD::CVT_F32_UBYTE0:
- case AMDGPUISD::CVT_F32_UBYTE1:
- case AMDGPUISD::CVT_F32_UBYTE2:
- case AMDGPUISD::CVT_F32_UBYTE3: {
- unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0;
-
- SDValue Src = N->getOperand(0);
- SDValue Srl = N->getOperand(0);
- if (Srl.getOpcode() == ISD::ZERO_EXTEND)
- Srl = Srl.getOperand(0);
-
- // TODO: Handle (or x, (srl y, 8)) pattern when known bits are zero.
- if (Srl.getOpcode() == ISD::SRL) {
- // cvt_f32_ubyte0 (srl x, 16) -> cvt_f32_ubyte2 x
- // cvt_f32_ubyte1 (srl x, 16) -> cvt_f32_ubyte3 x
- // cvt_f32_ubyte0 (srl x, 8) -> cvt_f32_ubyte1 x
-
- if (const ConstantSDNode *C =
- dyn_cast<ConstantSDNode>(Srl.getOperand(1))) {
- Srl = DAG.getZExtOrTrunc(Srl.getOperand(0), SDLoc(Srl.getOperand(0)),
- EVT(MVT::i32));
-
- unsigned SrcOffset = C->getZExtValue() + 8 * Offset;
- if (SrcOffset < 32 && SrcOffset % 8 == 0) {
- return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0 + SrcOffset / 8, DL,
- MVT::f32, Srl);
- }
- }
- }
-
- APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8);
-
- APInt KnownZero, KnownOne;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLO.ShrinkDemandedConstant(Src, Demanded) ||
- TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) {
- DCI.CommitTargetLoweringOpt(TLO);
- }
-
- break;
- }
- case ISD::SINT_TO_FP:
- case ISD::UINT_TO_FP: {
- return performUCharToFloatCombine(N, DCI);
- }
- case ISD::FADD: {
- if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
- break;
-
- EVT VT = N->getValueType(0);
- if (VT == MVT::f64)
- break;
-
- assert(!VT.isVector());
-
- // Only do this if we are not trying to support denormals. v_mad_f32 does
- // not support denormals ever.
- if ((VT == MVT::f32 && Subtarget->hasFP32Denormals()) ||
- (VT == MVT::f16 && Subtarget->hasFP16Denormals()))
- break;
-
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
-
- // These should really be instruction patterns, but writing patterns with
- // source modiifiers is a pain.
-
- // fadd (fadd (a, a), b) -> mad 2.0, a, b
- if (LHS.getOpcode() == ISD::FADD) {
- SDValue A = LHS.getOperand(0);
- if (A == LHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- return DAG.getNode(ISD::FMAD, DL, VT, Two, A, RHS);
- }
- }
-
- // fadd (b, fadd (a, a)) -> mad 2.0, a, b
- if (RHS.getOpcode() == ISD::FADD) {
- SDValue A = RHS.getOperand(0);
- if (A == RHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- return DAG.getNode(ISD::FMAD, DL, VT, Two, A, LHS);
- }
- }
-
- return SDValue();
- }
- case ISD::FSUB: {
- if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
- break;
-
- EVT VT = N->getValueType(0);
- assert(!VT.isVector());
-
- // Try to get the fneg to fold into the source modifier. This undoes generic
- // DAG combines and folds them into the mad.
- //
- // Only do this if we are not trying to support denormals. v_mad_f32 does
- // not support denormals ever.
- if ((VT == MVT::f32 && !Subtarget->hasFP32Denormals()) ||
- (VT == MVT::f16 && !Subtarget->hasFP16Denormals())) {
- SDValue LHS = N->getOperand(0);
- SDValue RHS = N->getOperand(1);
- if (LHS.getOpcode() == ISD::FADD) {
- // (fsub (fadd a, a), c) -> mad 2.0, a, (fneg c)
-
- SDValue A = LHS.getOperand(0);
- if (A == LHS.getOperand(1)) {
- const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
- SDValue NegRHS = DAG.getNode(ISD::FNEG, DL, VT, RHS);
-
- return DAG.getNode(ISD::FMAD, DL, VT, Two, A, NegRHS);
- }
- }
-
- if (RHS.getOpcode() == ISD::FADD) {
- // (fsub c, (fadd a, a)) -> mad -2.0, a, c
-
- SDValue A = RHS.getOperand(0);
- if (A == RHS.getOperand(1)) {
- const SDValue NegTwo = DAG.getConstantFP(-2.0, DL, VT);
- return DAG.getNode(ISD::FMAD, DL, VT, NegTwo, A, LHS);
- }
- }
-
- return SDValue();
- }
-
- break;
- }
case ISD::LOAD:
case ISD::STORE:
case ISD::ATOMIC_LOAD:
@@ -4060,22 +4084,7 @@ SDValue SITargetLowering::PerformDAGComb
case AMDGPUISD::ATOMIC_DEC: { // TODO: Target mem intrinsics.
if (DCI.isBeforeLegalize())
break;
-
- MemSDNode *MemNode = cast<MemSDNode>(N);
- SDValue Ptr = MemNode->getBasePtr();
-
- // TODO: We could also do this for multiplies.
- unsigned AS = MemNode->getAddressSpace();
- if (Ptr.getOpcode() == ISD::SHL && AS != AMDGPUAS::PRIVATE_ADDRESS) {
- SDValue NewPtr = performSHLPtrCombine(Ptr.getNode(), AS, DCI);
- if (NewPtr) {
- SmallVector<SDValue, 8> NewOps(MemNode->op_begin(), MemNode->op_end());
-
- NewOps[N->getOpcode() == ISD::STORE ? 2 : 1] = NewPtr;
- return SDValue(DAG.UpdateNodeOperands(MemNode, NewOps), 0);
- }
- }
- break;
+ return performMemSDNodeCombine(cast<MemSDNode>(N), DCI);
}
case ISD::AND:
return performAndCombine(N, DCI);
@@ -4099,6 +4108,14 @@ SDValue SITargetLowering::PerformDAGComb
return Src;
break;
}
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ return performUCharToFloatCombine(N, DCI);
+ case AMDGPUISD::CVT_F32_UBYTE0:
+ case AMDGPUISD::CVT_F32_UBYTE1:
+ case AMDGPUISD::CVT_F32_UBYTE2:
+ case AMDGPUISD::CVT_F32_UBYTE3:
+ return performCvtF32UByteNCombine(N, DCI);
}
return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h?rev=290309&r1=290308&r2=290309&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.h Wed Dec 21 21:44:42 2016
@@ -69,6 +69,8 @@ class SITargetLowering final : public AM
unsigned AS,
DAGCombinerInfo &DCI) const;
+ SDValue performMemSDNodeCombine(MemSDNode *N, DAGCombinerInfo &DCI) const;
+
SDValue splitBinaryBitConstantOp(DAGCombinerInfo &DCI, const SDLoc &SL,
unsigned Opc, SDValue LHS,
const ConstantSDNode *CRHS) const;
@@ -81,7 +83,10 @@ class SITargetLowering final : public AM
SDValue performMinMaxCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFAddCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performFSubCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSetCCCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performCvtF32UByteNCombine(SDNode *N, DAGCombinerInfo &DCI) const;
bool isLegalFlatAddressingMode(const AddrMode &AM) const;
bool isLegalMUBUFAddressingMode(const AddrMode &AM) const;
More information about the llvm-commits
mailing list