[PATCH] R600: Change how vector truncating stores are packed.
Tom Stellard
tom at stellard.net
Mon Apr 7 12:14:01 PDT 2014
On Sat, Apr 05, 2014 at 06:30:42PM -0700, Matt Arsenault wrote:
> Don't introduce new operations on an illegal sub 32-bit type.
> Do the operations on a 32-bit value, and then use a truncating store.
>
> http://reviews.llvm.org/D3301
LGTM.
>
> Files:
> lib/Target/R600/AMDGPUISelLowering.cpp
> test/CodeGen/R600/store.ll
>
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -719,32 +719,46 @@
> }
>
> SDLoc DL(Op);
> - const SDValue &Value = Store->getValue();
> + SDValue Value = Store->getValue();
> EVT VT = Value.getValueType();
> - const SDValue &Ptr = Store->getBasePtr();
> + EVT ElemVT = VT.getVectorElementType();
> + SDValue Ptr = Store->getBasePtr();
> EVT MemEltVT = MemVT.getVectorElementType();
> unsigned MemEltBits = MemEltVT.getSizeInBits();
> unsigned MemNumElements = MemVT.getVectorNumElements();
> - EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
> - SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, PackedVT);
> + unsigned PackedSize = MemVT.getStoreSizeInBits();
> + SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32);
> +
> + assert(Value.getValueType().getScalarSizeInBits() >= 32);
>
> SDValue PackedValue;
> for (unsigned i = 0; i < MemNumElements; ++i) {
> - EVT ElemVT = VT.getVectorElementType();
> SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
> DAG.getConstant(i, MVT::i32));
> - Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
> - Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
> - SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
> - Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
> + Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
> + Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
> +
> + SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32);
> + Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
> +
> if (i == 0) {
> PackedValue = Elt;
> } else {
> - PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
> + PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
> }
> }
> +
> + if (PackedSize < 32) {
> + EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
> + return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
> + Store->getMemOperand()->getPointerInfo(),
> + PackedVT,
> + Store->isNonTemporal(), Store->isVolatile(),
> + Store->getAlignment());
> + }
> +
> return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
> - MachinePointerInfo(Store->getMemOperand()->getValue()),
> + Store->getMemOperand()->getPointerInfo(),
> Store->isVolatile(), Store->isNonTemporal(),
> Store->getAlignment());
> }
> Index: test/CodeGen/R600/store.ll
> ===================================================================
> --- test/CodeGen/R600/store.ll
> +++ test/CodeGen/R600/store.ll
> @@ -75,7 +75,11 @@
> ; EG-CHECK: MEM_RAT MSKOR
> ; EG-CHECK-NOT: MEM_RAT MSKOR
> ; SI-CHECK-LABEL: @store_v2i8
> -; SI-CHECK: BUFFER_STORE_SHORT
> +; SI-CHECK: S_LSHL_B32 [[LSHL:s[0-9]+]], s{{[0-9]+}}, 8
> +; SI-CHECK: S_AND_B32 [[AND:s[0-9]+]], s{{[0-9]+}}, 255
> +; SI-CHECK: S_OR_B32 [[RESULT:s[0-9]+]], [[AND]], [[LSHL]]
> +; SI-CHECK: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
> +; SI-CHECK: BUFFER_STORE_SHORT [[VRESULT]]
> define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
> entry:
> %0 = trunc <2 x i32> %in to <2 x i8>
> Index: lib/Target/R600/AMDGPUISelLowering.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUISelLowering.cpp
> +++ lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -719,32 +719,46 @@
> }
>
> SDLoc DL(Op);
> - const SDValue &Value = Store->getValue();
> + SDValue Value = Store->getValue();
> EVT VT = Value.getValueType();
> - const SDValue &Ptr = Store->getBasePtr();
> + EVT ElemVT = VT.getVectorElementType();
> + SDValue Ptr = Store->getBasePtr();
> EVT MemEltVT = MemVT.getVectorElementType();
> unsigned MemEltBits = MemEltVT.getSizeInBits();
> unsigned MemNumElements = MemVT.getVectorNumElements();
> - EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits());
> - SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, PackedVT);
> + unsigned PackedSize = MemVT.getStoreSizeInBits();
> + SDValue Mask = DAG.getConstant((1 << MemEltBits) - 1, MVT::i32);
> +
> + assert(Value.getValueType().getScalarSizeInBits() >= 32);
>
> SDValue PackedValue;
> for (unsigned i = 0; i < MemNumElements; ++i) {
> - EVT ElemVT = VT.getVectorElementType();
> SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT, Value,
> DAG.getConstant(i, MVT::i32));
> - Elt = DAG.getZExtOrTrunc(Elt, DL, PackedVT);
> - Elt = DAG.getNode(ISD::AND, DL, PackedVT, Elt, Mask);
> - SDValue Shift = DAG.getConstant(MemEltBits * i, PackedVT);
> - Elt = DAG.getNode(ISD::SHL, DL, PackedVT, Elt, Shift);
> + Elt = DAG.getZExtOrTrunc(Elt, DL, MVT::i32);
> + Elt = DAG.getNode(ISD::AND, DL, MVT::i32, Elt, Mask); // getZeroExtendInReg
> +
> + SDValue Shift = DAG.getConstant(MemEltBits * i, MVT::i32);
> + Elt = DAG.getNode(ISD::SHL, DL, MVT::i32, Elt, Shift);
> +
> if (i == 0) {
> PackedValue = Elt;
> } else {
> - PackedValue = DAG.getNode(ISD::OR, DL, PackedVT, PackedValue, Elt);
> + PackedValue = DAG.getNode(ISD::OR, DL, MVT::i32, PackedValue, Elt);
> }
> }
> +
> + if (PackedSize < 32) {
> + EVT PackedVT = EVT::getIntegerVT(*DAG.getContext(), PackedSize);
> + return DAG.getTruncStore(Store->getChain(), DL, PackedValue, Ptr,
> + Store->getMemOperand()->getPointerInfo(),
> + PackedVT,
> + Store->isNonTemporal(), Store->isVolatile(),
> + Store->getAlignment());
> + }
> +
> return DAG.getStore(Store->getChain(), DL, PackedValue, Ptr,
> - MachinePointerInfo(Store->getMemOperand()->getValue()),
> + Store->getMemOperand()->getPointerInfo(),
> Store->isVolatile(), Store->isNonTemporal(),
> Store->getAlignment());
> }
> Index: test/CodeGen/R600/store.ll
> ===================================================================
> --- test/CodeGen/R600/store.ll
> +++ test/CodeGen/R600/store.ll
> @@ -75,7 +75,11 @@
> ; EG-CHECK: MEM_RAT MSKOR
> ; EG-CHECK-NOT: MEM_RAT MSKOR
> ; SI-CHECK-LABEL: @store_v2i8
> -; SI-CHECK: BUFFER_STORE_SHORT
> +; SI-CHECK: S_LSHL_B32 [[LSHL:s[0-9]+]], s{{[0-9]+}}, 8
> +; SI-CHECK: S_AND_B32 [[AND:s[0-9]+]], s{{[0-9]+}}, 255
> +; SI-CHECK: S_OR_B32 [[RESULT:s[0-9]+]], [[AND]], [[LSHL]]
> +; SI-CHECK: V_MOV_B32_e32 [[VRESULT:v[0-9]+]], [[RESULT]]
> +; SI-CHECK: BUFFER_STORE_SHORT [[VRESULT]]
> define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
> entry:
> %0 = trunc <2 x i32> %in to <2 x i8>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list