[PATCH] SelectionDAG: Use correct pointer size when lowering function arguments

Tue Aug 13 20:17:08 PDT 2013

On Tue, Aug 13, 2013 at 04:16:12PM -0700, Matt Arsenault wrote:
> On 08/13/2013 03:48 PM, Tom Stellard wrote:
> >+      APInt ElementSize = APInt(TLI->getPointerTy(AS).getSizeInBits(),
> I think wrappers similar to the one DataLayout has would be helpful
> TLI->getPointerSizeInBits(AS), TLI->getPointerTypeSizeInBits(Ty)
> >                                  TD->getTypeAllocSize(Ty));
> >        SDValue IdxN = getValue(Idx);
> >diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
> >index b3711ad..ef931ec 100644
> >--- a/lib/CodeGen/TargetLoweringBase.cpp
> >+++ b/lib/CodeGen/TargetLoweringBase.cpp
> >@@ -755,6 +755,10 @@ void TargetLoweringBase::initActions() {
> >    setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
> >  }
> >+MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
> >+  return MVT::getIntegerVT(TD->getPointerSizeInBits(AS));
> >+}
> >+
> Another one:
> 
> MVT TargetLoweringBase::getPointerTy(Type *Ty) const {
>   return MVT::getIntegerVT(TD->getPointerTypeSizeInBits(Ty));
> }
> 

I didn't add this function, because it makes calling getPointerTy(0)
ambiguous, but I made the other changes you suggested in the attached
patch.

-Tom
-------------- next part --------------
>From 5c815e63c0d41e3e22b1056e3a1a1dbc3d0675d8 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Tue, 13 Aug 2013 10:28:31 -0700
Subject: [PATCH] SelectionDAG: Use correct pointer size when lowering
 function arguments v2

This adds minimal support to the SelectionDAG for handling address spaces
with different pointer sizes.  The SelectionDAG should now correctly
lower pointer function arguments to the correct size as well as generate
the correct code when lowering getelementptr.

This patch also updates the R600 DataLayout to use 32-bit pointers for
the local address space.

v2:
  - Add more helper functions to TargetLoweringBase
  - Use CHECK-LABEL for tests
---
 include/llvm/Target/TargetLowering.h             |  6 ++--
 lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 13 +++++---
 lib/CodeGen/TargetLoweringBase.cpp               | 13 ++++++++
 lib/Target/R600/AMDGPUISelLowering.cpp           |  4 ++-
 lib/Target/R600/AMDGPUSubtarget.cpp              |  4 +++
 lib/Target/R600/SIISelLowering.cpp               |  2 +-
 lib/Target/R600/SIInstructions.td                |  9 +++--
 test/CodeGen/R600/32-bit-local-address-space.ll  | 42 ++++++++++++++++++++++++
 8 files changed, 79 insertions(+), 14 deletions(-)
 create mode 100644 test/CodeGen/R600/32-bit-local-address-space.ll

diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h
index c3fa3cc..d3c2695 100644
--- a/include/llvm/Target/TargetLowering.h
+++ b/include/llvm/Target/TargetLowering.h
@@ -151,7 +151,9 @@ public:
   // Return the pointer type for the given address space, defaults to
   // the pointer type from the data layout.
   // FIXME: The default needs to be removed once all the code is updated.
-  virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const { return PointerTy; }
+  virtual MVT getPointerTy(uint32_t /*AS*/ = 0) const;
+  unsigned getPointerSizeInBits(uint32_t AS = 0) const;
+  unsigned getPointerTypeSizeInBits(Type *Ty) const;
   virtual MVT getScalarShiftAmountTy(EVT LHSTy) const;
 
   EVT getShiftAmountTy(EVT LHSTy) const;
@@ -568,7 +570,7 @@ public:
   /// otherwise it will assert.
   EVT getValueType(Type *Ty, bool AllowUnknown = false) const {
     // Lower scalar pointers to native pointer types.
-    if (Ty->isPointerTy()) return PointerTy;
+    if (Ty->isPointerTy()) return getPointerTy(Ty->getPointerAddressSpace());
 
     if (Ty->isVectorTy()) {
       VectorType *VTy = cast<VectorType>(Ty);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1101ee1..1bcdd94 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -3171,6 +3171,10 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
 
       Ty = StTy->getElementType(Field);
     } else {
+      uint32_t AS = 0;
+      if (PointerType *PtrType = dyn_cast<PointerType>(Ty)) {
+        AS = PtrType->getAddressSpace();
+      }
       Ty = cast<SequentialType>(Ty)->getElementType();
 
       // If this is a constant subscript, handle it quickly.
@@ -3180,14 +3184,13 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
         uint64_t Offs =
             TD->getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
         SDValue OffsVal;
-        EVT PTy = TLI->getPointerTy();
+        EVT PTy = TLI->getPointerTy(AS);
         unsigned PtrBits = PTy.getSizeInBits();
         if (PtrBits < 64)
-          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(),
-                                TLI->getPointerTy(),
+          OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy,
                                 DAG.getConstant(Offs, MVT::i64));
         else
-          OffsVal = DAG.getIntPtrConstant(Offs);
+          OffsVal = DAG.getConstant(Offs, PTy);
 
         N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N,
                         OffsVal);
@@ -3195,7 +3198,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
       }
 
       // N = N + Idx * ElementSize;
-      APInt ElementSize = APInt(TLI->getPointerTy().getSizeInBits(),
+      APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS),
                                 TD->getTypeAllocSize(Ty));
       SDValue IdxN = getValue(Idx);
 
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index b3711ad..9032ff8 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -755,6 +755,19 @@ void TargetLoweringBase::initActions() {
   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand);
 }
 
+MVT TargetLoweringBase::getPointerTy(uint32_t AS) const {
+  return MVT::getIntegerVT(getPointerSizeInBits(AS));
+}
+
+unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const {
+  return TD->getPointerSizeInBits(AS);
+}
+
+unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const {
+  assert(Ty->isPointerTy());
+  return getPointerSizeInBits(Ty->getPointerAddressSpace());
+}
+
 MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const {
   return MVT::getIntegerVT(8*TD->getPointerSize(0));
 }
diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp
index dd8b73f..bde6b48 100644
--- a/lib/Target/R600/AMDGPUISelLowering.cpp
+++ b/lib/Target/R600/AMDGPUISelLowering.cpp
@@ -235,6 +235,8 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
 
   const DataLayout *TD = getTargetMachine().getDataLayout();
   GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op);
+
+  assert(G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS);
   // XXX: What does the value of G->getOffset() mean?
   assert(G->getOffset() == 0 &&
          "Do not know what to do with an non-zero offset");
@@ -246,7 +248,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI,
   // XXX: Account for alignment?
   MFI->LDSSize += Size;
 
-  return DAG.getConstant(Offset, TD->getPointerSize() == 8 ? MVT::i64 : MVT::i32);
+  return DAG.getConstant(Offset, getPointerTy(G->getAddressSpace()));
 }
 
 void AMDGPUTargetLowering::ExtractVectorElements(SDValue Op, SelectionDAG &DAG,
diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp
index 8ed5a74..5623f9e 100644
--- a/lib/Target/R600/AMDGPUSubtarget.cpp
+++ b/lib/Target/R600/AMDGPUSubtarget.cpp
@@ -98,6 +98,10 @@ AMDGPUSubtarget::getDataLayout() const {
     DataLayout.append("-p:32:32:32");
   }
 
+  if (Gen >= AMDGPUSubtarget::SOUTHERN_ISLANDS) {
+    DataLayout.append("-p3:32:32:32");
+  }
+
   return DataLayout;
 }
 
diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp
index 0bd8bce..ee0c397 100644
--- a/lib/Target/R600/SIISelLowering.cpp
+++ b/lib/Target/R600/SIISelLowering.cpp
@@ -87,7 +87,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
   setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 
-  setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
+  setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
 
   setTargetDAGCombine(ISD::SELECT_CC);
 
diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td
index 622b36a..e4ad62e 100644
--- a/lib/Target/R600/SIInstructions.td
+++ b/lib/Target/R600/SIInstructions.td
@@ -1714,14 +1714,13 @@ def : Pat <
 /********** ======================= **********/
 
 def : Pat <
-    (local_load i64:$src0),
-    (i32 (DS_READ_B32 0, (EXTRACT_SUBREG $src0, sub0),
-                      (EXTRACT_SUBREG $src0, sub0), (EXTRACT_SUBREG $src0, sub0), 0, 0))
+    (local_load i32:$src0),
+    (i32 (DS_READ_B32 0, $src0, $src0, $src0, 0, 0))
 >;
 
 def : Pat <
-    (local_store i32:$src1, i64:$src0),
-    (DS_WRITE_B32 0, (EXTRACT_SUBREG $src0, sub0), $src1, $src1, 0, 0)
+    (local_store i32:$src1, i32:$src0),
+    (DS_WRITE_B32 0, $src0, $src1, $src1, 0, 0)
 >;
 
 /********** ================== **********/
diff --git a/test/CodeGen/R600/32-bit-local-address-space.ll b/test/CodeGen/R600/32-bit-local-address-space.ll
new file mode 100644
index 0000000..d19a82e
--- /dev/null
+++ b/test/CodeGen/R600/32-bit-local-address-space.ll
@@ -0,0 +1,42 @@
+; RUN: llc < %s -march=r600 -mcpu=verde | FileCheck %s
+
+; On Southern Islands GPUs the local address space(3) uses 32-bit pointers and
+; the global address space(1) uses 64-bit pointers.  These tests check to make sure
+; the correct pointer size is used for the local address space.
+
+; The e{{32|64}} suffix on the instructions refers to the encoding size and not
+; the size of the operands.  The operand size is denoted in the instruction name.
+; Instructions with B32, U32, and I32 in their name take 32-bit operands, while
+; instructions with B64, U64, and I64 take 64-bit operands.
+
+; CHECK-LABEL: @local_address_load
+; CHECK: V_MOV_B32_e{{32|64}} [[PTR:VGPR[0-9]]]
+; CHECK: DS_READ_B32 [[PTR]]
+define void @local_address_load(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %0 = load i32 addrspace(3)* %in
+  store i32 %0, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @local_address_gep
+; CHECK: V_ADD_I32_e{{32|64}} [[PTR:VGPR[0-9]]]
+; CHECK: DS_READ_B32 [[PTR]]
+define void @local_address_gep(i32 addrspace(1)* %out, i32 addrspace(3)* %in, i32 %offset) {
+entry:
+  %0 = getelementptr i32 addrspace(3)* %in, i32 %offset
+  %1 = load i32 addrspace(3)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
+
+; CHECK-LABEL: @local_address_gep_const_offset
+; CHECK: V_ADD_I32_e{{32|64}} [[PTR:VGPR[0-9]]]
+; CHECK: DS_READ_B32 [[PTR]]
+define void @local_address_gep_const_offset(i32 addrspace(1)* %out, i32 addrspace(3)* %in) {
+entry:
+  %0 = getelementptr i32 addrspace(3)* %in, i32 1
+  %1 = load i32 addrspace(3)* %0
+  store i32 %1, i32 addrspace(1)* %out
+  ret void
+}
-- 
1.7.11.4