[PATCH] SelectionDAG: Use correct pointer size when lowering function arguments

Mon Aug 19 07:40:37 PDT 2013

On Tue, Aug 13, 2013 at 08:17:08PM -0700, Tom Stellard wrote:
> On Tue, Aug 13, 2013 at 04:16:12PM -0700, Matt Arsenault wrote:
> > On 08/13/2013 03:48 PM, Tom Stellard wrote:
> > >+      APInt ElementSize = APInt(TLI->getPointerTy(AS).getSizeInBits(),
> > I think wrappers similar to the one DataLayout has would be helpful
> > TLI->getPointerSizeInBits(AS), TLI->getPointerTypeSizeInBits(Ty)
> > >                                  TD->getTypeAllocSize(Ty));
> > >        SDValue IdxN = getValue(Idx);
> > >diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
> > >index b3711ad..ef931ec 100644
> > >--- a/lib/CodeGen/TargetLoweringBase.cpp
> > >+++ b/lib/CodeGen/TargetLoweringBase.cpp
> > >@@ -755,6 +755,10 @@ void TargetLoweringBase::initActions() {
> > >    setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
> > >  }
> > >+MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
> > >+  return MVT::getIntegerVT(TD->getPointerSizeInBits(AS));
> > >+}
> > >+
> > Another one:
> > 
> > MVT TargetLoweringBase::getPointerTy(Type *Ty) const {
> >   return MVT::getIntegerVT(TD->getPointerTypeSizeInBits(Ty));
> > }
> > 
> 
> I didn't add this function, because it makes calling getPointerTy(0)
> ambiguous, but I made the other changes you suggested in the attached
> patch.
> 

Ping.

> From 5c815e63c0d41e3e22b1056e3a1a1dbc3d0675d8 Mon Sep 17 00:00:00 2001
> From: Tom Stellard <thomas.stellard at amd.com>
> Date: Tue, 13 Aug 2013 10:28:31 -0700
> Subject: [PATCH] SelectionDAG: Use correct pointer size when lowering
>  function arguments v2
> 
> This adds minimal support to the SelectionDAG for handling address spaces
> with different pointer sizes.  The SelectionDAG should now correctly
> lower pointer function arguments to the correct size as well as generate
> the correct code when lowering getelementptr.
> 
> This patch also updates the R600 DataLayout to use 32-bit pointers for
> the local address space.
> 
> v2:
>   - Add more helper functions to TargetLoweringBase
>   - Use CHECK-LABEL for tests
> ---
>  include/llvm/Target/TargetLowering.h             |  6 ++--
>  lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 +++++---
>  lib/CodeGen/TargetLoweringBase.cpp               | 13 ++++++++
>  lib/Target/R600/AMDGPUISelLowering.cpp           |  4 ++-
>  lib/Target/R600/AMDGPUSubtarget.cpp              |  4 +++
>  lib/Target/R600/SIISelLowering.cpp               |  2 +-
>  lib/Target/R600/SIInstructions.td                |  9 +++--
>  test/CodeGen/R600/32-bit-local-address-space.ll  | 42 ++++++++++++++++++++++++
>  8 files changed, 79 insertions(+), 14 deletions(-)
>  create mode 100644 test/CodeGen/R600/32-bit-local-address-space.ll
> 
> diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
> index c3fa3cc..d3c2695 100644
> --- a/include/llvm/Target/TargetLowering.h
> +++ b/include/llvm/Target/TargetLowering.h
> @@ -151,7 +151,9 @@ public:
>    // Return the pointer type for the given address space, defaults to
>    // the pointer type from the data layout.
>    // FIXME: The default needs to be removed once all the code is updated.
> -  virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const { return PointerTy; }
> +  virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const;
> +  unsigned getPointerSizeInBits(uint32_t AS = 0) const;
> +  unsigned getPointerTypeSizeInBits(Type *Ty) const;
>    virtual MVT getScalarShiftAmountTy(EVT LHSTy) const;
>  
>    EVT getShiftAmountTy(EVT LHSTy) const;
> @@ -568,7 +570,7 @@ public:
>    /// otherwise it will assert.
>    EVT getValueType(Type *Ty, bool AllowUnknown = false) const {
>      // Lower scalar pointers to native pointer types.
> -    if (Ty->isPointerTy()) return PointerTy;
> +    if (Ty->isPointerTy()) return getPointerTy(Ty->getPointerAddressSpace());
>  
>      if (Ty->isVectorTy()) {
>        VectorType *VTy = cast<VectorType>(Ty);
> diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> index 1101ee1..1bcdd94 100644
> --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
> @@ -3171,6 +3171,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>  
>        Ty = StTy->getElementType(Field);
>      } else {
> +      uint32_t AS = 0;
> +      if (PointerType *PtrType = dyn_cast<PointerType>(Ty)) {
> +        AS = PtrType->getAddressSpace();
> +      }
>        Ty = cast<SequentialType>(Ty)->getElementType();
>  
>        // If this is a constant subscript, handle it quickly.
> @@ -3180,14 +3184,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>          uint64_t Offs =
>              TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
>          SDValue OffsVal;
> -        EVT PTy = TLI->getPointerTy();
> +        EVT PTy = TLI->getPointerTy(AS);
>          unsigned PtrBits = PTy.getSizeInBits();
>          if (PtrBits < 64)
> -          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
> -                                TLI->getPointerTy(),
> +          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
>                                  DAG.getConstant(Offs, MVT::i64));
>          else
> -          OffsVal = DAG.getIntPtrConstant(Offs);
> +          OffsVal = DAG.getConstant(Offs, PTy);
>  
>          N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
>                          OffsVal);
> @@ -3195,7 +3198,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>        }
>  
>        // N = N + Idx * ElementSize;
> -      APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
> +      APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
>                                  TD->getTypeAllocSize(Ty));
>        SDValue IdxN = getValue(Idx);
>  
> diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
> index b3711ad..9032ff8 100644
> --- a/lib/CodeGen/TargetLoweringBase.cpp
> +++ b/lib/CodeGen/TargetLoweringBase.cpp
> @@ -755,6 +755,19 @@ void TargetLoweringBase::initActions() {
>    setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
>  }
>  
> +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
> +  return MVT::getIntegerVT(getPointerSizeInBits(AS));
> +}
> +
> +unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
> +  return TD->getPointerSizeInBits(AS);
> +}
> +
> +unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
> +  assert(Ty->isPointerTy());
> +  return getPointerSizeInBits(Ty->getPointerAddressSpace());
> +}
> +
>  MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
>    return MVT::getIntegerVT(8*TD->getPointerSize(0));
>  }
> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
> index dd8b73f..bde6b48 100644
> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
> @@ -235,6 +235,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
>  
>    const DataLayout *TD = getTargetMachine().getDataLayout();
>    GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
> +
> +  assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
>    // XXX: What does the value of G->getOffset() mean?
>    assert(G->getOffset() == 0 &&
>           "Do not know what to do with an non-zero offset");
> @@ -246,7 +248,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
>    // XXX: Account for alignment?
>    MFI->LDSSize += Size;
>  
> -  return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
> +  return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
>  }
>  
>  void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
> index 8ed5a74..5623f9e 100644
> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
> @@ -98,6 +98,10 @@ AMDGPUSubtarget::getDataLayout() const {
>      DataLayout.append("-p:32:32:32");
>    }
>  
> +  if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
> +    DataLayout.append("-p3:32:32:32");
> +  }
> +
>    return DataLayout;
>  }
>  
> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
> index 0bd8bce..ee0c397 100644
> --- a/lib/Target/R600/SIISelLowering.cpp
> +++ b/lib/Target/R600/SIISelLowering.cpp
> @@ -87,7 +87,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
>    setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
>    setTruncStoreAction(MVT::f64, MVT::f32, Expand);
>  
> -  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
> +  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
>  
>    setTargetDAGCombine(ISD::SELECT_CC);
>  
> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
> index 622b36a..e4ad62e 100644
> --- a/lib/Target/R600/SIInstructions.td
> +++ b/lib/Target/R600/SIInstructions.td
> @@ -1714,14 +1714,13 @@ def : Pat <
>  /********** ======================= **********/
>  
>  def : Pat <
> -    (local_load i64:$src0),
> -    (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
> -                      (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
> +    (local_load i32:$src0),
> +    (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
>  >;
>  
>  def : Pat <
> -    (local_store i32:$src1, i64:$src0),
> -    (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
> +    (local_store i32:$src1, i32:$src0),
> +    (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0)
>  >;
>  
>  /********** ================== **********/
> diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
> new file mode 100644
> index 0000000..d19a82e
> --- /dev/null
> +++ b/test/CodeGen/R600/32-bit-local-address-space.ll
> @@ -0,0 +1,42 @@
> +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
> +
> +; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
> +; the global address space(1) uses 64-bit pointers.  These tests check to make sure
> +; the correct pointer size is used for the local address space.
> +
> +; The e{{32|64}} suffix on the instructions refers to the encoding size and not
> +; the size of the operands.  The operand size is denoted in the instruction name.
> +; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
> +; instructions with B64, U64, and I64 take 64-bit operands.
> +
> +; CHECK-LABEL: @local_address_load
> +; CHECK: V_MOV_B32_e{{32|64}} [[PTR:VGPR[0-9]]]
> +; CHECK: DS_READ_B32 [[PTR]]
> +define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
> +entry:
> +  %0 = load i32 addrspace(3)* %in
> +  store i32 %0, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: @local_address_gep
> +; CHECK: V_ADD_I32_e{{32|64}} [[PTR:VGPR[0-9]]]
> +; CHECK: DS_READ_B32 [[PTR]]
> +define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
> +entry:
> +  %0 = getelementptr i32 addrspace(3)* %in, i32 %offset
> +  %1 = load i32 addrspace(3)* %0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> +
> +; CHECK-LABEL: @local_address_gep_const_offset
> +; CHECK: V_ADD_I32_e{{32|64}} [[PTR:VGPR[0-9]]]
> +; CHECK: DS_READ_B32 [[PTR]]
> +define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
> +entry:
> +  %0 = getelementptr i32 addrspace(3)* %in, i32 1
> +  %1 = load i32 addrspace(3)* %0
> +  store i32 %1, i32 addrspace(1)* %out
> +  ret void
> +}
> -- 
> 1.7.11.4
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits