[PATCH] SelectionDAG: Use correct pointer size when lowering function arguments

Tue Aug 13 16:16:12 PDT 2013

On 08/13/2013 03:48 PM, Tom Stellard wrote:
> From: Tom Stellard <thomas.stellard at amd.com>
>
> This adds minimal support to the SelectionDAG for handling address spaces
> with different pointer sizes.  The SelectionDAG should now correctly
> lower pointer function arguments to the correct size as well as generate
> the correct code when lowering getelementptr.
>
> This patch also updates the R600 DataLayout to use 32-bit pointers for
> the local address space.
> ---
>   include/llvm/Target/TargetLowering.h             |  4 +--
>   lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 +++++---
>   lib/CodeGen/TargetLoweringBase.cpp               |  4 +++
>   lib/Target/R600/AMDGPUISelLowering.cpp           |  4 ++-
>   lib/Target/R600/AMDGPUSubtarget.cpp              |  4 +++
>   lib/Target/R600/SIISelLowering.cpp               |  2 +-
>   lib/Target/R600/SIInstructions.td                |  9 +++--
>   test/CodeGen/R600/32-bit-local-address-space.ll  | 42 ++++++++++++++++++++++++
>   8 files changed, 68 insertions(+), 14 deletions(-)
>   create mode 100644 test/CodeGen/R600/32-bit-local-address-space.ll
>
> diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
> index c3fa3cc..914043d 100644
> --- a/include/llvm/Target/TargetLowering.h
> +++ b/include/llvm/Target/TargetLowering.h
> @@ -151,7 +151,7 @@ public:
>     // Return the pointer type for the given address space, defaults to
>     // the pointer type from the data layout.
>     // FIXME: The default needs to be removed once all the code is updated.
> -  virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const { return PointerTy; }
> +  virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const;
>     virtual MVT getScalarShiftAmountTy(EVT LHSTy) const;
>   
>     EVT getShiftAmountTy(EVT LHSTy) const;
> @@ -568,7 +568,7 @@ public:
>     /// otherwise it will assert.
>     EVT getValueType(Type *Ty, bool AllowUnknown = false) const {
>       // Lower scalar pointers to native pointer types.
> -    if (Ty->isPointerTy()) return PointerTy;
> +    if (Ty->isPointerTy()) return getPointerTy(Ty->getPointerAddressSpace());
>   
>       if (Ty->isVectorTy()) {
>         VectorType *VTy = cast<VectorType>(Ty);
> diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> index 1101ee1..334bc2c 100644
> --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> @@ -3171,6 +3171,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>   
>         Ty = StTy->getElementType(Field);
>       } else {
> +      uint32_t AS = 0;
> +      if (PointerType *PtrType = dyn_cast<PointerType>(Ty)) {
> +        AS = PtrType->getAddressSpace();
> +      }
>         Ty = cast<SequentialType>(Ty)->getElementType();
>   
>         // If this is a constant subscript, handle it quickly.
> @@ -3180,14 +3184,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>           uint64_t Offs =
>               TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
>           SDValue OffsVal;
> -        EVT PTy = TLI->getPointerTy();
> +        EVT PTy = TLI->getPointerTy(AS);
>           unsigned PtrBits = PTy.getSizeInBits();
>           if (PtrBits < 64)
> -          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
> -                                TLI->getPointerTy(),
> +          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
>                                   DAG.getConstant(Offs, MVT::i64));
>           else
> -          OffsVal = DAG.getIntPtrConstant(Offs);
> +          OffsVal = DAG.getConstant(Offs, PTy);
>   
>           N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
>                           OffsVal);
> @@ -3195,7 +3198,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>         }
>   
>         // N = N + Idx * ElementSize;
> -      APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
> +      APInt ElementSize = APInt(TLI->getPointerTy(AS).getSizeInBits(),
I think wrappers similar to the one DataLayout has would be helpful
TLI->getPointerSizeInBits(AS), TLI->getPointerTypeSizeInBits(Ty)
>                                   TD->getTypeAllocSize(Ty));
>         SDValue IdxN = getValue(Idx);
>   
> diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
> index b3711ad..ef931ec 100644
> --- a/lib/CodeGen/TargetLoweringBase.cpp
> +++ b/lib/CodeGen/TargetLoweringBase.cpp
> @@ -755,6 +755,10 @@ void TargetLoweringBase::initActions() {
>     setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
>   }
>   
> +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
> +  return MVT::getIntegerVT(TD->getPointerSizeInBits(AS));
> +}
> +
Another one:

MVT TargetLoweringBase::getPointerTy(Type *Ty) const {
   return MVT::getIntegerVT(TD->getPointerTypeSizeInBits(Ty));
}


>   MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
>     return MVT::getIntegerVT(8*TD->getPointerSize(0));
>   }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index dd8b73f..bde6b48 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -235,6 +235,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
>   
>     const DataLayout *TD = getTargetMachine().getDataLayout();
>     GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
> +
> +  assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
>     // XXX: What does the value of G->getOffset() mean?
>     assert(G->getOffset() == 0 &&
>            "Do not know what to do with an non-zero offset");
> @@ -246,7 +248,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
>     // XXX: Account for alignment?
>     MFI->LDSSize += Size;
>   
> -  return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
> +  return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
>   }
>   
>   void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 8ed5a74..5623f9e 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -98,6 +98,10 @@ AMDGPUSubtarget::getDataLayout() const {
>       DataLayout.append("-p:32:32:32");
>     }
>   
> +  if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
> +    DataLayout.append("-p3:32:32:32");
> +  }
> +
>     return DataLayout;
>   }
>   
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 0bd8bce..ee0c397 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -87,7 +87,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
>     setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
>     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
>   
> -  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
> +  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
>   
>     setTargetDAGCombine(ISD::SELECT_CC);
>   
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 622b36a..e4ad62e 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1714,14 +1714,13 @@ def : Pat <
>   /********** ======================= **********/
>   
>   def : Pat <
> -    (local_load i64:$src0),
> -    (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
> -                      (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
> +    (local_load i32:$src0),
> +    (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
>   >;
>   
>   def : Pat <
> -    (local_store i32:$src1, i64:$src0),
> -    (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
> +    (local_store i32:$src1, i32:$src0),
> +    (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0)
>   >;
>   
>   /********** ================== **********/
> diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
> new file mode 100644
> index 0000000..5a7ddca
> --- /dev/null
> +++ b/test/CodeGen/R600/32-bit-local-address-space.ll
> @@ -0,0 +1,42 @@
> +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
> +
> +; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
> +; the global address space(1) uses 64-bit pointers.  These tests check to make sure
> +; the correct pointer size is used for the local address space.
> +
> +; The e{{32|64}} suffix on the instructions refers to the encoding size and not
> +; the size of the operands.  The operand size is denoted in the instruction name.
> +; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
> +; instructions with B64, U64, and I64 take 64-bit operands.
> +
> +; CHECK: @local_address_load
> +; CHECK: V_MOV_B32_e{{32|64}} [[PTR:VGPR[0-9]]]
> +; CHECK: DS_READ_B32 [[PTR]]
> +
CHECK-LABEL?