[llvm] r189222 - R600: Add support for v4i32 and v2i32 local stores
Tom Stellard
thomas.stellard at amd.com
Mon Aug 26 08:05:44 PDT 2013
Author: tstellar
Date: Mon Aug 26 10:05:44 2013
New Revision: 189222
URL: http://llvm.org/viewvc/llvm-project?rev=189222&view=rev
Log:
R600: Add support for v4i32 and v2i32 local stores
Modified:
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
llvm/trunk/test/CodeGen/R600/store.ll
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp?rev=189222&r1=189221&r2=189222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.cpp Mon Aug 26 10:05:44 2013
@@ -67,6 +67,13 @@ AMDGPUTargetLowering::AMDGPUTargetLoweri
setOperationAction(ISD::STORE, MVT::f64, Promote);
AddPromotedToType(ISD::STORE, MVT::f64, MVT::i64);
+ // Custom lowering of vector stores is required for local address space
+ // stores.
+ setOperationAction(ISD::STORE, MVT::v4i32, Custom);
+ // XXX: Native v2i32 local address space stores are possible, but not
+ // currently implemented.
+ setOperationAction(ISD::STORE, MVT::v2i32, Custom);
+
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
@@ -221,7 +228,7 @@ SDValue AMDGPUTargetLowering::LowerOpera
case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG);
case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG);
case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
- case ISD::STORE: return LowerVectorStore(Op, DAG);
+ case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::UDIVREM: return LowerUDIVREM(Op, DAG);
}
return Op;
@@ -417,7 +424,98 @@ SDValue AMDGPUTargetLowering::LowerMinMa
return Op;
}
+SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
+ EVT MemVT = Store->getMemoryVT();
+ unsigned MemBits = MemVT.getSizeInBits();
+ // Byte stores are really expensive, so if possible, try to pack
+ // 32-bit vector truncatating store into an i32 store.
+ // XXX: We could also handle optimize other vector bitwidths
+ if (!MemVT.isVector() || MemBits > 32) {
+ return SDValue();
+ }
+
+ SDLoc DL(Op);
+ const SDValue &Value = Store->getValue();
+ EVT VT = Value.getValueType();
+ const SDValue &Ptr = Store->getBasePtr();
+ EVT MemEltVT = MemVT.getVectorElementType();
+ unsigned MemEltBits = MemEltVT.getSizeInBits();
+ unsigned MemNumElements = MemVT.getVectorNumElements();
+ EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
+ SDValue Mask;
+ switch(MemEltBits) {
+ case 8:
+ Mask = DAG.getConstant(0xFF, PackedVT);
+ break;
+ case 16:
+ Mask = DAG.getConstant(0xFFFF, PackedVT);
+ break;
+ default:
+ llvm_unreachable("Cannot lower this vector store");
+ }
+ SDValue PackedValue;
+ for (unsigned i = 0; i < MemNumElements; ++i) {
+ EVT ElemVT = VT.getVectorElementType();
+ SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
+ DAG.getConstant(i, MVT::i32));
+ Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
+ Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
+ SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
+ Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
+ if (i == 0) {
+ PackedValue = Elt;
+ } else {
+ PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
+ }
+ }
+ return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment());
+}
+
+SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
+ SelectionDAG &DAG) const {
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ EVT MemEltVT = Store->getMemoryVT().getVectorElementType();
+ EVT EltVT = Store->getValue().getValueType().getVectorElementType();
+ EVT PtrVT = Store->getBasePtr().getValueType();
+ unsigned NumElts = Store->getMemoryVT().getVectorNumElements();
+ SDLoc SL(Op);
+
+ SmallVector<SDValue, 8> Chains;
+
+ for (unsigned i = 0, e = NumElts; i != e; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, EltVT,
+ Store->getValue(), DAG.getConstant(i, MVT::i32));
+ SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT,
+ Store->getBasePtr(),
+ DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8),
+ PtrVT));
+ Chains.push_back(DAG.getStore(Store->getChain(), SL, Val, Ptr,
+ MachinePointerInfo(Store->getMemOperand()->getValue()),
+ Store->isVolatile(), Store->isNonTemporal(),
+ Store->getAlignment()));
+ }
+ return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
+}
+
+SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
+ SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
+ if (Result.getNode()) {
+ return Result;
+ }
+
+ StoreSDNode *Store = cast<StoreSDNode>(Op);
+ if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS &&
+ Store->getValue().getValueType().isVector()) {
+ return SplitVectorStore(Op, DAG);
+ }
+ return SDValue();
+}
SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op,
SelectionDAG &DAG) const {
@@ -524,58 +622,6 @@ SDValue AMDGPUTargetLowering::LowerUDIVR
return DAG.getMergeValues(Ops, 2, DL);
}
-SDValue AMDGPUTargetLowering::LowerVectorStore(const SDValue &Op,
- SelectionDAG &DAG) const {
- StoreSDNode *Store = dyn_cast<StoreSDNode>(Op);
- EVT MemVT = Store->getMemoryVT();
- unsigned MemBits = MemVT.getSizeInBits();
-
- // Byte stores are really expensive, so if possible, try to pack
- // 32-bit vector truncatating store into an i32 store.
- // XXX: We could also handle optimize other vector bitwidths
- if (!MemVT.isVector() || MemBits > 32) {
- return SDValue();
- }
-
- SDLoc DL(Op);
- const SDValue &Value = Store->getValue();
- EVT VT = Value.getValueType();
- const SDValue &Ptr = Store->getBasePtr();
- EVT MemEltVT = MemVT.getVectorElementType();
- unsigned MemEltBits = MemEltVT.getSizeInBits();
- unsigned MemNumElements = MemVT.getVectorNumElements();
- EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
- SDValue Mask;
- switch(MemEltBits) {
- case 8:
- Mask = DAG.getConstant(0xFF, PackedVT);
- break;
- case 16:
- Mask = DAG.getConstant(0xFFFF, PackedVT);
- break;
- default:
- llvm_unreachable("Cannot lower this vector store");
- }
- SDValue PackedValue;
- for (unsigned i = 0; i < MemNumElements; ++i) {
- EVT ElemVT = VT.getVectorElementType();
- SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
- DAG.getConstant(i, MVT::i32));
- Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
- Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
- SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
- Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
- if (i == 0) {
- PackedValue = Elt;
- } else {
- PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
- }
- }
- return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
- MachinePointerInfo(Store->getMemOperand()->getValue()),
- Store->isVolatile(), Store->isNonTemporal(),
- Store->getAlignment());
-}
//===----------------------------------------------------------------------===//
// Helper functions
Modified: llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h?rev=189222&r1=189221&r2=189222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h (original)
+++ llvm/trunk/lib/Target/R600/AMDGPUISelLowering.h Mon Aug 26 10:05:44 2013
@@ -31,6 +31,12 @@ private:
SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
+ /// \brief Lower vector stores by merging the vector elements into an integer
+ /// of the same bitwidth.
+ SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
+ /// \brief Split a vector store into multiple scalar stores.
+ /// \returns The resulting chain.
+ SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
protected:
@@ -44,17 +50,13 @@ protected:
unsigned Reg, EVT VT) const;
SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op,
SelectionDAG &DAG) const;
-
+ SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
bool isHWTrueValue(SDValue Op) const;
bool isHWFalseValue(SDValue Op) const;
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
- /// \brief Lower vector stores by merging the vector elements into an integer
- /// of the same bitwidth.
- SDValue LowerVectorStore(const SDValue &Op, SelectionDAG &DAG) const;
-
public:
AMDGPUTargetLowering(TargetMachine &TM);
Modified: llvm/trunk/lib/Target/R600/R600ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/R600/R600ISelLowering.cpp?rev=189222&r1=189221&r2=189222&view=diff
==============================================================================
--- llvm/trunk/lib/Target/R600/R600ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/R600/R600ISelLowering.cpp Mon Aug 26 10:05:44 2013
@@ -1002,7 +1002,7 @@ SDValue R600TargetLowering::LowerSTORE(S
SDValue Value = Op.getOperand(1);
SDValue Ptr = Op.getOperand(2);
- SDValue Result = AMDGPUTargetLowering::LowerVectorStore(Op, DAG);
+ SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
if (Result.getNode()) {
return Result;
}
Modified: llvm/trunk/test/CodeGen/R600/store.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/R600/store.ll?rev=189222&r1=189221&r2=189222&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/R600/store.ll (original)
+++ llvm/trunk/test/CodeGen/R600/store.ll Mon Aug 26 10:05:44 2013
@@ -168,6 +168,58 @@ entry:
ret void
}
+;===------------------------------------------------------------------------===;
+; Local Address Space
+;===------------------------------------------------------------------------===;
+
+; EG-CHECK: @store_local_v2i16
+; EG-CHECK: LDS_WRITE
+; CM-CHECK: @store_local_v2i16
+; CM-CHECK: LDS_WRITE
+; SI-CHECK: @store_local_v2i16
+; SI-CHECK: DS_WRITE_B32
+define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
+entry:
+ store <2 x i16> %in, <2 x i16> addrspace(3)* %out
+ ret void
+}
+
+; EG-CHECK: @store_local_v2i32
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; CM-CHECK: @store_local_v2i32
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; SI-CHECK: @store_local_v2i32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
+entry:
+ store <2 x i32> %in, <2 x i32> addrspace(3)* %out
+ ret void
+}
+
+; EG-CHECK: @store_local_v4i32
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; EG-CHECK: LDS_WRITE
+; CM-CHECK: @store_local_v4i32
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; CM-CHECK: LDS_WRITE
+; SI-CHECK: @store_local_v4i32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+; SI-CHECK: DS_WRITE_B32
+define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
+entry:
+ store <4 x i32> %in, <4 x i32> addrspace(3)* %out
+ ret void
+}
+
; The stores in this function are combined by the optimizer to create a
; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer
; should not try to split the 64-bit store back into 2 32-bit stores.
More information about the llvm-commits
mailing list