[PATCH] SelectionDAG: Use correct pointer size when lowering function arguments
Matt Arsenault
Matthew.Arsenault at amd.com
Tue Aug 13 16:16:12 PDT 2013
On 08/13/2013 03:48 PM, Tom Stellard wrote:
> From: Tom Stellard <thomas.stellard at amd.com>
>
> This adds minimal support to the SelectionDAG for handling address spaces
> with different pointer sizes. The SelectionDAG should now correctly
> lower pointer function arguments to the correct size as well as generate
> the correct code when lowering getelementptr.
>
> This patch also updates the R600 DataLayout to use 32-bit pointers for
> the local address space.
> ---
> include/llvm/Target/TargetLowering.h | 4 +--
> lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 +++++---
> lib/CodeGen/TargetLoweringBase.cpp | 4 +++
> lib/Target/R600/AMDGPUISelLowering.cpp | 4 ++-
> lib/Target/R600/AMDGPUSubtarget.cpp | 4 +++
> lib/Target/R600/SIISelLowering.cpp | 2 +-
> lib/Target/R600/SIInstructions.td | 9 +++--
> test/CodeGen/R600/32-bit-local-address-space.ll | 42 ++++++++++++++++++++++++
> 8 files changed, 68 insertions(+), 14 deletions(-)
> create mode 100644 test/CodeGen/R600/32-bit-local-address-space.ll
>
> diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
> index c3fa3cc..914043d 100644
> --- a/include/llvm/Target/TargetLowering.h
> +++ b/include/llvm/Target/TargetLowering.h
> @@ -151,7 +151,7 @@ public:
> // Return the pointer type for the given address space, defaults to
> // the pointer type from the data layout.
> // FIXME: The default needs to be removed once all the code is updated.
> - virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const { return PointerTy; }
> + virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const;
> virtual MVT getScalarShiftAmountTy(EVT LHSTy) const;
>
> EVT getShiftAmountTy(EVT LHSTy) const;
> @@ -568,7 +568,7 @@ public:
> /// otherwise it will assert.
> EVT getValueType(Type *Ty, bool AllowUnknown = false) const {
> // Lower scalar pointers to native pointer types.
> - if (Ty->isPointerTy()) return PointerTy;
> + if (Ty->isPointerTy()) return getPointerTy(Ty->getPointerAddressSpace());
>
> if (Ty->isVectorTy()) {
> VectorType *VTy = cast<VectorType>(Ty);
> diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> index 1101ee1..334bc2c 100644
> --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> @@ -3171,6 +3171,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>
> Ty = StTy->getElementType(Field);
> } else {
> + uint32_t AS = 0;
> + if (PointerType *PtrType = dyn_cast<PointerType>(Ty)) {
> + AS = PtrType->getAddressSpace();
> + }
> Ty = cast<SequentialType>(Ty)->getElementType();
>
> // If this is a constant subscript, handle it quickly.
> @@ -3180,14 +3184,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
> uint64_t Offs =
> TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
> SDValue OffsVal;
> - EVT PTy = TLI->getPointerTy();
> + EVT PTy = TLI->getPointerTy(AS);
> unsigned PtrBits = PTy.getSizeInBits();
> if (PtrBits < 64)
> - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
> - TLI->getPointerTy(),
> + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
> DAG.getConstant(Offs, MVT::i64));
> else
> - OffsVal = DAG.getIntPtrConstant(Offs);
> + OffsVal = DAG.getConstant(Offs, PTy);
>
> N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
> OffsVal);
> @@ -3195,7 +3198,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
> }
>
> // N = N + Idx * ElementSize;
> - APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
> + APInt ElementSize = APInt(TLI->getPointerTy(AS).getSizeInBits(),
I think wrappers similar to the one DataLayout has would be helpful
TLI->getPointerSizeInBits(AS), TLI->getPointerTypeSizeInBits(Ty)
> TD->getTypeAllocSize(Ty));
> SDValue IdxN = getValue(Idx);
>
> diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
> index b3711ad..ef931ec 100644
> --- a/lib/CodeGen/TargetLoweringBase.cpp
> +++ b/lib/CodeGen/TargetLoweringBase.cpp
> @@ -755,6 +755,10 @@ void TargetLoweringBase::initActions() {
> setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
> }
>
> +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
> + return MVT::getIntegerVT(TD->getPointerSizeInBits(AS));
> +}
> +
Another one:
MVT TargetLoweringBase::getPointerTy(Type *Ty) const {
return MVT::getIntegerVT(TD->getPointerTypeSizeInBits(Ty));
}
> MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
> return MVT::getIntegerVT(8*TD->getPointerSize(0));
> }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index dd8b73f..bde6b48 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -235,6 +235,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
>
> const DataLayout *TD = getTargetMachine().getDataLayout();
> GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
> +
> + assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
> // XXX: What does the value of G->getOffset() mean?
> assert(G->getOffset() == 0 &&
> "Do not know what to do with an non-zero offset");
> @@ -246,7 +248,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
> // XXX: Account for alignment?
> MFI->LDSSize += Size;
>
> - return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
> + return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
> }
>
> void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 8ed5a74..5623f9e 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -98,6 +98,10 @@ AMDGPUSubtarget::getDataLayout() const {
> DataLayout.append("-p:32:32:32");
> }
>
> + if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
> + DataLayout.append("-p3:32:32:32");
> + }
> +
> return DataLayout;
> }
>
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 0bd8bce..ee0c397 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -87,7 +87,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
> setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
> setTruncStoreAction(MVT::f64, MVT::f32, Expand);
>
> - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
> + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
>
> setTargetDAGCombine(ISD::SELECT_CC);
>
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 622b36a..e4ad62e 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1714,14 +1714,13 @@ def : Pat <
> /********** ======================= **********/
>
> def : Pat <
> - (local_load i64:$src0),
> - (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
> - (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
> + (local_load i32:$src0),
> + (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
> >;
>
> def : Pat <
> - (local_store i32:$src1, i64:$src0),
> - (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
> + (local_store i32:$src1, i32:$src0),
> + (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0)
> >;
>
> /********** ================== **********/
> diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
> new file mode 100644
> index 0000000..5a7ddca
> --- /dev/null
> +++ b/test/CodeGen/R600/32-bit-local-address-space.ll
> @@ -0,0 +1,42 @@
> +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
> +
> +; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
> +; the global address space(1) uses 64-bit pointers. These tests check to make sure
> +; the correct pointer size is used for the local address space.
> +
> +; The e{{32|64}} suffix on the instructions refers to the encoding size and not
> +; the size of the operands. The operand size is denoted in the instruction name.
> +; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
> +; instructions with B64, U64, and I64 take 64-bit operands.
> +
> +; CHECK: @local_address_load
> +; CHECK: V_MOV_B32_e{{32|64}} [[PTR:VGPR[0-9]]]
> +; CHECK: DS_READ_B32 [[PTR]]
> +
CHECK-LABEL?
More information about the llvm-commits
mailing list