[PATCH] SelectionDAG: Use correct pointer size when lowering function arguments

Fri Aug 23 10:50:16 PDT 2013

LGTM.

--Owen

On Aug 19, 2013, at 7:40 AM, Tom Stellard <tom at stellard.net> wrote:
> Ping.
> 
> 
>> From 5c815e63c0d41e3e22b1056e3a1a1dbc3d0675d8 Mon Sep 17 00:00:00 2001
>> From: Tom Stellard <thomas.stellard at amd.com>
>> Date: Tue, 13 Aug 2013 10:28:31 -0700
>> Subject: [PATCH] SelectionDAG: Use correct pointer size when lowering
>> function arguments v2
>> 
>> This adds minimal support to the SelectionDAG for handling address spaces
>> with different pointer sizes.  The SelectionDAG should now correctly
>> lower pointer function arguments to the correct size as well as generate
>> the correct code when lowering getelementptr.
>> 
>> This patch also updates the R600 DataLayout to use 32-bit pointers for
>> the local address space.
>> 
>> v2:
>>  - Add more helper functions to TargetLoweringBase
>>  - Use CHECK-LABEL for tests
>> ---
>> include/llvm/Target/TargetLowering.h             |  6 ++--
>> lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 +++++---
>> lib/CodeGen/TargetLoweringBase.cpp               | 13 ++++++++
>> lib/Target/R600/AMDGPUISelLowering.cpp           |  4 ++-
>> lib/Target/R600/AMDGPUSubtarget.cpp              |  4 +++
>> lib/Target/R600/SIISelLowering.cpp               |  2 +-
>> lib/Target/R600/SIInstructions.td                |  9 +++--
>> test/CodeGen/R600/32-bit-local-address-space.ll  | 42 ++++++++++++++++++++++++
>> 8 files changed, 79 insertions(+), 14 deletions(-)
>> create mode 100644 test/CodeGen/R600/32-bit-local-address-space.ll
>> 
>> diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
>> index c3fa3cc..d3c2695 100644
>> --- a/include/llvm/Target/TargetLowering.h
>> +++ b/include/llvm/Target/TargetLowering.h
>> @@ -151,7 +151,9 @@ public:
>>   // Return the pointer type for the given address space, defaults to
>>   // the pointer type from the data layout.
>>   // FIXME: The default needs to be removed once all the code is updated.
>> -  virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const { return PointerTy; }
>> +  virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const;
>> +  unsigned getPointerSizeInBits(uint32_t AS = 0) const;
>> +  unsigned getPointerTypeSizeInBits(Type *Ty) const;
>>   virtual MVT getScalarShiftAmountTy(EVT LHSTy) const;
>> 
>>   EVT getShiftAmountTy(EVT LHSTy) const;
>> @@ -568,7 +570,7 @@ public:
>>   /// otherwise it will assert.
>>   EVT getValueType(Type *Ty, bool AllowUnknown = false) const {
>>     // Lower scalar pointers to native pointer types.
>> -    if (Ty->isPointerTy()) return PointerTy;
>> +    if (Ty->isPointerTy()) return getPointerTy(Ty->getPointerAddressSpace());
>> 
>>     if (Ty->isVectorTy()) {
>>       VectorType *VTy = cast<VectorType>(Ty);
>> diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>> index 1101ee1..1bcdd94 100644
>> --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>> +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
>> @@ -3171,6 +3171,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>> 
>>       Ty = StTy->getElementType(Field);
>>     } else {
>> +      uint32_t AS = 0;
>> +      if (PointerType *PtrType = dyn_cast<PointerType>(Ty)) {
>> +        AS = PtrType->getAddressSpace();
>> +      }
>>       Ty = cast<SequentialType>(Ty)->getElementType();
>> 
>>       // If this is a constant subscript, handle it quickly.
>> @@ -3180,14 +3184,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>>         uint64_t Offs =
>>             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
>>         SDValue OffsVal;
>> -        EVT PTy = TLI->getPointerTy();
>> +        EVT PTy = TLI->getPointerTy(AS);
>>         unsigned PtrBits = PTy.getSizeInBits();
>>         if (PtrBits < 64)
>> -          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
>> -                                TLI->getPointerTy(),
>> +          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
>>                                 DAG.getConstant(Offs, MVT::i64));
>>         else
>> -          OffsVal = DAG.getIntPtrConstant(Offs);
>> +          OffsVal = DAG.getConstant(Offs, PTy);
>> 
>>         N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
>>                         OffsVal);
>> @@ -3195,7 +3198,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
>>       }
>> 
>>       // N = N + Idx * ElementSize;
>> -      APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
>> +      APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
>>                                 TD->getTypeAllocSize(Ty));
>>       SDValue IdxN = getValue(Idx);
>> 
>> diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
>> index b3711ad..9032ff8 100644
>> --- a/lib/CodeGen/TargetLoweringBase.cpp
>> +++ b/lib/CodeGen/TargetLoweringBase.cpp
>> @@ -755,6 +755,19 @@ void TargetLoweringBase::initActions() {
>>   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
>> }
>> 
>> +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
>> +  return MVT::getIntegerVT(getPointerSizeInBits(AS));
>> +}
>> +
>> +unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
>> +  return TD->getPointerSizeInBits(AS);
>> +}
>> +
>> +unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
>> +  assert(Ty->isPointerTy());
>> +  return getPointerSizeInBits(Ty->getPointerAddressSpace());
>> +}
>> +
>> MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
>>   return MVT::getIntegerVT(8*TD->getPointerSize(0));
>> }
>> diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
>> index dd8b73f..bde6b48 100644
>> --- a/lib/Target/R600/AMDGPUISelLowering.cpp
>> +++ b/lib/Target/R600/AMDGPUISelLowering.cpp
>> @@ -235,6 +235,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
>> 
>>   const DataLayout *TD = getTargetMachine().getDataLayout();
>>   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
>> +
>> +  assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
>>   // XXX: What does the value of G->getOffset() mean?
>>   assert(G->getOffset() == 0 &&
>>          "Do not know what to do with an non-zero offset");
>> @@ -246,7 +248,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
>>   // XXX: Account for alignment?
>>   MFI->LDSSize += Size;
>> 
>> -  return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
>> +  return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
>> }
>> 
>> void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
>> diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
>> index 8ed5a74..5623f9e 100644
>> --- a/lib/Target/R600/AMDGPUSubtarget.cpp
>> +++ b/lib/Target/R600/AMDGPUSubtarget.cpp
>> @@ -98,6 +98,10 @@ AMDGPUSubtarget::getDataLayout() const {
>>     DataLayout.append("-p:32:32:32");
>>   }
>> 
>> +  if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
>> +    DataLayout.append("-p3:32:32:32");
>> +  }
>> +
>>   return DataLayout;
>> }
>> 
>> diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
>> index 0bd8bce..ee0c397 100644
>> --- a/lib/Target/R600/SIISelLowering.cpp
>> +++ b/lib/Target/R600/SIISelLowering.cpp
>> @@ -87,7 +87,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
>>   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
>>   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
>> 
>> -  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
>> +  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
>> 
>>   setTargetDAGCombine(ISD::SELECT_CC);
>> 
>> diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
>> index 622b36a..e4ad62e 100644
>> --- a/lib/Target/R600/SIInstructions.td
>> +++ b/lib/Target/R600/SIInstructions.td
>> @@ -1714,14 +1714,13 @@ def : Pat <
>> /********** ======================= **********/
>> 
>> def : Pat <
>> -    (local_load i64:$src0),
>> -    (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
>> -                      (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
>> +    (local_load i32:$src0),
>> +    (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
>>> ;
>> 
>> def : Pat <
>> -    (local_store i32:$src1, i64:$src0),
>> -    (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
>> +    (local_store i32:$src1, i32:$src0),
>> +    (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0)
>>> ;
>> 
>> /********** ================== **********/
>> diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
>> new file mode 100644
>> index 0000000..d19a82e
>> --- /dev/null
>> +++ b/test/CodeGen/R600/32-bit-local-address-space.ll
>> @@ -0,0 +1,42 @@
>> +; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
>> +
>> +; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
>> +; the global address space(1) uses 64-bit pointers.  These tests check to make sure
>> +; the correct pointer size is used for the local address space.
>> +
>> +; The e{{32|64}} suffix on the instructions refers to the encoding size and not
>> +; the size of the operands.  The operand size is denoted in the instruction name.
>> +; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
>> +; instructions with B64, U64, and I64 take 64-bit operands.
>> +
>> +; CHECK-LABEL: @local_address_load
>> +; CHECK: V_MOV_B32_e{{32|64}} [[PTR:VGPR[0-9]]]
>> +; CHECK: DS_READ_B32 [[PTR]]
>> +define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
>> +entry:
>> +  %0 = load i32 addrspace(3)* %in
>> +  store i32 %0, i32 addrspace(1)* %out
>> +  ret void
>> +}
>> +
>> +; CHECK-LABEL: @local_address_gep
>> +; CHECK: V_ADD_I32_e{{32|64}} [[PTR:VGPR[0-9]]]
>> +; CHECK: DS_READ_B32 [[PTR]]
>> +define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
>> +entry:
>> +  %0 = getelementptr i32 addrspace(3)* %in, i32 %offset
>> +  %1 = load i32 addrspace(3)* %0
>> +  store i32 %1, i32 addrspace(1)* %out
>> +  ret void
>> +}
>> +
>> +; CHECK-LABEL: @local_address_gep_const_offset
>> +; CHECK: V_ADD_I32_e{{32|64}} [[PTR:VGPR[0-9]]]
>> +; CHECK: DS_READ_B32 [[PTR]]
>> +define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
>> +entry:
>> +  %0 = getelementptr i32 addrspace(3)* %in, i32 1
>> +  %1 = load i32 addrspace(3)* %0
>> +  store i32 %1, i32 addrspace(1)* %out
>> +  ret void
>> +}
>> -- 
>> 1.7.11.4
>> 
> 
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
> 
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits