[PATCH] D11225: AMDPGU/SI: Use AssertAnd node to mask high bit for scratch offsets

Tom Stellard thomas.stellard at amd.com
Wed Jul 15 10:20:53 PDT 2015


tstellarAMD created this revision.
tstellarAMD added a reviewer: arsenm.
tstellarAMD added a subscriber: llvm-commits.

We can safely assume that the high bit of scratch offsets will never
be set, because this would require at least 128 GB of GPU memory.

http://reviews.llvm.org/D11225

Files:
  lib/Target/AMDGPU/SIISelLowering.cpp

Index: lib/Target/AMDGPU/SIISelLowering.cpp
===================================================================
--- lib/Target/AMDGPU/SIISelLowering.cpp
+++ lib/Target/AMDGPU/SIISelLowering.cpp
@@ -812,10 +812,28 @@
 
 SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
 
+  SDLoc SL(Op);
   FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
   unsigned FrameIndex = FINode->getIndex();
 
-  return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
+  // A FrameIndex node represents a 32-bit offset into scratch memory.  If
+  // the high bit of a frame index offset were to be set, this would mean
+  // that it represented an offset of ~2GB * 64 = ~128GB from the start of the
+  // scratch buffer, with 64 being the number of threads per wave.
+  //
+  // It may be theoretically possible to construct a system that makes
+  // 128 GB of memory accessible to the GPU, but for now we assume such
+  // machines do not exist.
+  //
+  // This allows us to mark the high bit of the FrameIndex node as known zero,
+  // which is important, because it means in most situations we can
+  // prove that values derived from FrameIndex nodes are non-negative.
+  // This enables us to take advantage of more addressing modes when
+  // accessing scratch buffers, since for scratch reads/writes, the register
+  // offset must always be positive.
+  return DAG.getNode(ISD::AssertAnd, SL, MVT::i32,
+                     DAG.getTargetFrameIndex(FrameIndex, MVT::i32),
+                     DAG.getConstant(APInt::getLowBitsSet(32,31), SL, MVT::i32));
 }
 
 /// This transforms the control flow intrinsics to get the branch destination as
@@ -2033,15 +2051,22 @@
   }
 }
 
+static bool isFrameIndexOp(SDValue Op) {
+  if (Op.getOpcode() == ISD::AssertAnd)
+    Op = Op.getOperand(0);
+
+  return isa<FrameIndexSDNode>(Op);
+}
+
 /// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
 /// with frame index operands.
 /// LLVM assumes that inputs are to these instructions are registers.
 void SITargetLowering::legalizeTargetIndependentNode(SDNode *Node,
                                                      SelectionDAG &DAG) const {
 
   SmallVector<SDValue, 8> Ops;
   for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
-    if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
+    if (!isFrameIndexOp(Node->getOperand(i))) {
       Ops.push_back(Node->getOperand(i));
       continue;
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D11225.29791.patch
Type: text/x-patch
Size: 2444 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150715/6f4a25af/attachment.bin>


More information about the llvm-commits mailing list