[llvm-branch-commits] [llvm-branch] r242683 - Merging r242433:
Hans Wennborg
hans at hanshq.net
Mon Jul 20 09:41:33 PDT 2015
Author: hans
Date: Mon Jul 20 11:41:33 2015
New Revision: 242683
URL: http://llvm.org/viewvc/llvm-project?rev=242683&view=rev
Log:
Merging r242433:
------------------------------------------------------------------------
r242433 | tstellar | 2015-07-16 12:40:07 -0700 (Thu, 16 Jul 2015) | 11 lines
AMDPGU/SI: Use AssertZext node to mask high bit for scratch offsets
Summary:
We can safely assume that the high bit of scratch offsets will never
be set, because this would require at least 128 GB of GPU memory.
Reviewers: arsenm
Subscribers: llvm-commits
Differential Revision: http://reviews.llvm.org/D11225
------------------------------------------------------------------------
Modified:
llvm/branches/release_37/ (props changed)
llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td
llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp
Propchange: llvm/branches/release_37/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Mon Jul 20 11:41:33 2015
@@ -1,3 +1,3 @@
/llvm/branches/Apple/Pertwee:110850,110961
/llvm/branches/type-system-rewrite:133420-134817
-/llvm/trunk:155241,242236,242239,242281,242288,242296,242331,242341,242410,242412,242442,242543
+/llvm/trunk:155241,242236,242239,242281,242288,242296,242331,242341,242410,242412,242433,242442,242543
Modified: llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td?rev=242683&r1=242682&r2=242683&view=diff
==============================================================================
--- llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/branches/release_37/lib/Target/AMDGPU/AMDGPU.td Mon Jul 20 11:41:33 2015
@@ -123,6 +123,11 @@ def FeatureSGPRInitBug : SubtargetFeatur
"true",
"VI SGPR initilization bug requiring a fixed SGPR allocation size">;
+def FeatureEnableHugeScratchBuffer : SubtargetFeature<"huge-scratch-buffer",
+ "EnableHugeScratchBuffer",
+ "true",
+ "Enable scratch buffer sizes greater than 128 GB">;
+
class SubtargetFeatureFetchLimit <string Value> :
SubtargetFeature <"fetch"#Value,
"TexVTXClauseSize",
Modified: llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=242683&r1=242682&r2=242683&view=diff
==============================================================================
--- llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Mon Jul 20 11:41:33 2015
@@ -73,7 +73,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
- IsaVersion(ISAVersion0_0_0),
+ IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
FrameLowering(TargetFrameLowering::StackGrowsUp,
64 * 16, // Maximum stack alignment (long16)
0),
Modified: llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=242683&r1=242682&r2=242683&view=diff
==============================================================================
--- llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/branches/release_37/lib/Target/AMDGPU/AMDGPUSubtarget.h Mon Jul 20 11:41:33 2015
@@ -89,6 +89,7 @@ private:
bool FeatureDisable;
int LDSBankCount;
unsigned IsaVersion;
+ bool EnableHugeScratchBuffer;
AMDGPUFrameLowering FrameLowering;
std::unique_ptr<AMDGPUTargetLowering> TLInfo;
@@ -271,6 +272,10 @@ public:
return DevName;
}
+ bool enableHugeScratchBuffer() const {
+ return EnableHugeScratchBuffer;
+ }
+
bool dumpCode() const {
return DumpCode;
}
Modified: llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp?rev=242683&r1=242682&r2=242683&view=diff
==============================================================================
--- llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/branches/release_37/lib/Target/AMDGPU/SIISelLowering.cpp Mon Jul 20 11:41:33 2015
@@ -812,10 +812,29 @@ static SDNode *findUser(SDValue Value, u
SDValue SITargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Op);
unsigned FrameIndex = FINode->getIndex();
- return DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
+ // A FrameIndex node represents a 32-bit offset into scratch memory. If
+ // the high bit of a frame index offset were to be set, this would mean
+ // that it represented an offset of ~2GB * 64 = ~128GB from the start of the
+ // scratch buffer, with 64 being the number of threads per wave.
+ //
+ // If we know the machine uses less than 128GB of scratch, then we can
+ // amrk the high bit of the FrameIndex node as known zero,
+ // which is important, because it means in most situations we can
+ // prove that values derived from FrameIndex nodes are non-negative.
+ // This enables us to take advantage of more addressing modes when
+ // accessing scratch buffers, since for scratch reads/writes, the register
+ // offset must always be positive.
+
+ SDValue TFI = DAG.getTargetFrameIndex(FrameIndex, MVT::i32);
+ if (Subtarget->enableHugeScratchBuffer())
+ return TFI;
+
+ return DAG.getNode(ISD::AssertZext, SL, MVT::i32, TFI,
+ DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), 31)));
}
/// This transforms the control flow intrinsics to get the branch destination as
@@ -2034,6 +2053,13 @@ void SITargetLowering::adjustWritemask(M
}
}
+static bool isFrameIndexOp(SDValue Op) {
+ if (Op.getOpcode() == ISD::AssertZext)
+ Op = Op.getOperand(0);
+
+ return isa<FrameIndexSDNode>(Op);
+}
+
/// \brief Legalize target independent instructions (e.g. INSERT_SUBREG)
/// with frame index operands.
/// LLVM assumes that inputs are to these instructions are registers.
@@ -2042,7 +2068,7 @@ void SITargetLowering::legalizeTargetInd
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0; i < Node->getNumOperands(); ++i) {
- if (!isa<FrameIndexSDNode>(Node->getOperand(i))) {
+ if (!isFrameIndexOp(Node->getOperand(i))) {
Ops.push_back(Node->getOperand(i));
continue;
}
More information about the llvm-branch-commits
mailing list