[llvm] r260490 - AMDGPU: Split R600 and SI store lowering
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 10 21:32:47 PST 2016
Author: arsenm
Date: Wed Feb 10 23:32:46 2016
New Revision: 260490
URL: http://llvm.org/viewvc/llvm-project?rev=260490&view=rev
Log:
AMDGPU: Split R600 and SI store lowering
These were only sharing some somewhat incorrect
logic for when to scalarize or split vectors.
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h
llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp?rev=260490&r1=260489&r2=260490&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.cpp Wed Feb 10 23:32:46 2016
@@ -671,11 +671,6 @@ void AMDGPUTargetLowering::ReplaceNodeRe
// ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do
// nothing here and let the illegal result integer be handled normally.
return;
- case ISD::STORE: {
- if (SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG))
- Results.push_back(Lowered);
- return;
- }
default:
return;
}
@@ -1146,6 +1141,8 @@ SDValue AMDGPUTargetLowering::SplitVecto
return DAG.getMergeValues(Ops, SL);
}
+// FIXME: This isn't doing anything for SI. This should be used in a target
+// combine during type legalization.
SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
SelectionDAG &DAG) const {
StoreSDNode *Store = cast<StoreSDNode>(Op);
@@ -1290,64 +1287,6 @@ SDValue AMDGPUTargetLowering::SplitVecto
return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore);
}
-SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
- if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
- return Result;
-
- StoreSDNode *Store = cast<StoreSDNode>(Op);
- SDValue Chain = Store->getChain();
- if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
- Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
- Store->getValue().getValueType().isVector()) {
- return SplitVectorStore(Op, DAG);
- }
-
- EVT MemVT = Store->getMemoryVT();
- if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
- MemVT.bitsLT(MVT::i32)) {
- unsigned Mask = 0;
- if (Store->getMemoryVT() == MVT::i8) {
- Mask = 0xff;
- } else if (Store->getMemoryVT() == MVT::i16) {
- Mask = 0xffff;
- }
- SDValue BasePtr = Store->getBasePtr();
- SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
- DAG.getConstant(2, DL, MVT::i32));
- SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
- Chain, Ptr,
- DAG.getTargetConstant(0, DL, MVT::i32));
-
- SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
- DAG.getConstant(0x3, DL, MVT::i32));
-
- SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
- DAG.getConstant(3, DL, MVT::i32));
-
- SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
- Store->getValue());
-
- SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
-
- SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
- MaskedValue, ShiftAmt);
-
- SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
- DAG.getConstant(Mask, DL, MVT::i32),
- ShiftAmt);
- DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
- DAG.getConstant(0xffffffff, DL, MVT::i32));
- Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
-
- SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
- return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
- Chain, Value, Ptr,
- DAG.getTargetConstant(0, DL, MVT::i32));
- }
- return SDValue();
-}
-
// This is a shortcut for integer division because we have fast i32<->f32
// conversions, and fast f32 reciprocal instructions. The fractional part of a
// float is enough to accurately represent up to a 24-bit integer.
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h?rev=260490&r1=260489&r2=260490&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUISelLowering.h Wed Feb 10 23:32:46 2016
@@ -28,7 +28,6 @@ class AMDGPUTargetLowering : public Targ
protected:
const AMDGPUSubtarget *Subtarget;
-private:
SDValue LowerConstantInitializer(const Constant* Init, const GlobalValue *GV,
const SDValue &InitPtr,
SDValue Chain,
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp?rev=260490&r1=260489&r2=260490&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.cpp Wed Feb 10 23:32:46 2016
@@ -1264,17 +1264,73 @@ void R600TargetLowering::getStackAddress
}
}
+SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Store);
+
+ unsigned Mask = 0;
+ if (Store->getMemoryVT() == MVT::i8) {
+ Mask = 0xff;
+ } else if (Store->getMemoryVT() == MVT::i16) {
+ Mask = 0xffff;
+ }
+
+ SDValue Chain = Store->getChain();
+ SDValue BasePtr = Store->getBasePtr();
+ EVT MemVT = Store->getMemoryVT();
+
+ SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, BasePtr,
+ DAG.getConstant(2, DL, MVT::i32));
+ SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
+ Chain, Ptr,
+ DAG.getTargetConstant(0, DL, MVT::i32));
+
+ SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, BasePtr,
+ DAG.getConstant(0x3, DL, MVT::i32));
+
+ SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
+ DAG.getConstant(3, DL, MVT::i32));
+
+ SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
+ Store->getValue());
+
+ SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
+
+ SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
+ MaskedValue, ShiftAmt);
+
+ SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32,
+ DAG.getConstant(Mask, DL, MVT::i32),
+ ShiftAmt);
+ DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
+ DAG.getConstant(0xffffffff, DL, MVT::i32));
+ Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
+
+ SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
+ return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
+ Chain, Value, Ptr,
+ DAG.getTargetConstant(0, DL, MVT::i32));
+}
+
SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
- SDLoc DL(Op);
+ if (SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG))
+ return Result;
+
StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
- SDValue Chain = Op.getOperand(0);
- SDValue Value = Op.getOperand(1);
- SDValue Ptr = Op.getOperand(2);
+ unsigned AS = StoreNode->getAddressSpace();
+ SDValue Value = StoreNode->getValue();
+ EVT ValueVT = Value.getValueType();
- if (SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
- return Result;
+ if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
+ ValueVT.isVector()) {
+ return SplitVectorStore(Op, DAG);
+ }
- if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
+ SDLoc DL(Op);
+ SDValue Chain = StoreNode->getChain();
+ SDValue Ptr = StoreNode->getBasePtr();
+
+ if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
if (StoreNode->isTruncatingStore()) {
EVT VT = Value.getValueType();
assert(VT.bitsLE(MVT::i32));
@@ -1309,7 +1365,7 @@ SDValue R600TargetLowering::LowerSTORE(S
Op->getVTList(), Args, MemVT,
StoreNode->getMemOperand());
} else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
- Value.getValueType().bitsGE(MVT::i32)) {
+ ValueVT.bitsGE(MVT::i32)) {
// Convert pointer from byte address to dword address.
Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
@@ -1324,13 +1380,12 @@ SDValue R600TargetLowering::LowerSTORE(S
}
}
- EVT ValueVT = Value.getValueType();
-
- if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
+ if (AS != AMDGPUAS::PRIVATE_ADDRESS)
return SDValue();
- if (SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
- return Ret;
+ EVT MemVT = StoreNode->getMemoryVT();
+ if (MemVT.bitsLT(MVT::i32))
+ return lowerPrivateTruncStore(StoreNode, DAG);
// Lowering for indirect addressing
const MachineFunction &MF = DAG.getMachineFunction();
Modified: llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h?rev=260490&r1=260489&r2=260490&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/R600ISelLowering.h Wed Feb 10 23:32:46 2016
@@ -59,6 +59,8 @@ private:
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue lowerPrivateTruncStore(StoreSDNode *Store, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const;
Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=260490&r1=260489&r2=260490&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed Feb 10 23:32:46 2016
@@ -1846,23 +1846,27 @@ SDValue SITargetLowering::LowerSTORE(SDV
StoreSDNode *Store = cast<StoreSDNode>(Op);
EVT VT = Store->getMemoryVT();
- // These stores are legal.
- if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) {
- if (VT.isVector() && VT.getVectorNumElements() > 4)
- return ScalarizeVectorStore(Op, DAG);
- return SDValue();
+ if (VT == MVT::i1) {
+ return DAG.getTruncStore(Store->getChain(), DL,
+ DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
+ Store->getBasePtr(), MVT::i1, Store->getMemOperand());
}
- if (SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG))
- return Ret;
+ assert(Store->getValue().getValueType().getScalarType() == MVT::i32);
+
+ unsigned NElts = VT.getVectorNumElements();
+ unsigned AS = Store->getAddressSpace();
+ if (AS == AMDGPUAS::LOCAL_ADDRESS) {
+ // If properly aligned, if we split we might be able to use ds_write_b64.
+ return SplitVectorStore(Op, DAG);
+ }
- if (VT.isVector() && VT.getVectorNumElements() >= 8)
- return SplitVectorStore(Op, DAG);
+ if (AS == AMDGPUAS::PRIVATE_ADDRESS && NElts > 4)
+ return ScalarizeVectorStore(Op, DAG);
- if (VT == MVT::i1)
- return DAG.getTruncStore(Store->getChain(), DL,
- DAG.getSExtOrTrunc(Store->getValue(), DL, MVT::i32),
- Store->getBasePtr(), MVT::i1, Store->getMemOperand());
+ // These stores are legal. private, global and flat.
+ if (NElts >= 8)
+ return SplitVectorStore(Op, DAG);
return SDValue();
}
More information about the llvm-commits
mailing list