[llvm-branch-commits] [llvm-branch] r271679 - Merging r260651:
Tom Stellard via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jun 3 08:58:29 PDT 2016
Author: tstellar
Date: Fri Jun 3 10:58:20 2016
New Revision: 271679
URL: http://llvm.org/viewvc/llvm-project?rev=271679&view=rev
Log:
Merging r260651:
------------------------------------------------------------------------
r260651 | Matthew.Arsenault | 2016-02-11 18:40:47 -0800 (Thu, 11 Feb 2016) | 7 lines
AMDGPU: Set element_size in private resource descriptor
Introduce a subtarget feature for this, and leave the default with
the current behavior which assumes up to 16-byte loads/stores can
be used. The field also seems to have the ability to be set to 2 bytes,
but I'm not sure what that would be used for.
------------------------------------------------------------------------
Modified:
llvm/branches/release_38/lib/Target/AMDGPU/AMDGPU.td
llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/branches/release_38/lib/Target/AMDGPU/AMDKernelCodeT.h
llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.h
llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-compute.ll
llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-graphics.ll
llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
Modified: llvm/branches/release_38/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/AMDGPU/AMDGPU.td?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/branches/release_38/lib/Target/AMDGPU/AMDGPU.td Fri Jun 3 10:58:20 2016
@@ -206,6 +206,17 @@ def FeatureCIInsts : SubtargetFeature<"c
"true",
"Additional intstructions for CI+">;
+class FeatureMaxPrivateElementSize<int size> : SubtargetFeature<
+ "max-private-element-size-"#size,
+ "MaxPrivateElementSize",
+ !cast<string>(size),
+ "Maximum private access size may be "#size
+>;
+
+def FeatureMaxPrivateElementSize4 : FeatureMaxPrivateElementSize<4>;
+def FeatureMaxPrivateElementSize8 : FeatureMaxPrivateElementSize<8>;
+def FeatureMaxPrivateElementSize16 : FeatureMaxPrivateElementSize<16>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
Modified: llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Fri Jun 3 10:58:20 2016
@@ -593,6 +593,20 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI
}
}
+// This is supposed to be log2(Size)
+static amd_element_byte_size_t getElementByteSizeValue(unsigned Size) {
+ switch (Size) {
+ case 4:
+ return AMD_ELEMENT_4_BYTES;
+ case 8:
+ return AMD_ELEMENT_8_BYTES;
+ case 16:
+ return AMD_ELEMENT_16_BYTES;
+ default:
+ llvm_unreachable("invalid private_element_size");
+ }
+}
+
void AMDGPUAsmPrinter::EmitAmdKernelCodeT(const MachineFunction &MF,
const SIProgramInfo &KernelInfo) const {
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
@@ -606,6 +620,11 @@ void AMDGPUAsmPrinter::EmitAmdKernelCode
(KernelInfo.ComputePGMRSrc2 << 32);
header.code_properties = AMD_CODE_PROPERTY_IS_PTR64;
+
+ AMD_HSA_BITS_SET(header.code_properties,
+ AMD_CODE_PROPERTY_PRIVATE_ELEMENT_SIZE,
+ getElementByteSizeValue(STM.getMaxPrivateElementSize()));
+
if (MFI->hasPrivateSegmentBuffer()) {
header.code_properties |=
AMD_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER;
Modified: llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Fri Jun 3 10:58:20 2016
@@ -58,6 +58,11 @@ AMDGPUSubtarget::initializeSubtargetDepe
FP32Denormals = false;
FP64Denormals = false;
}
+
+ // Set defaults if needed.
+ if (MaxPrivateElementSize == 0)
+ MaxPrivateElementSize = 16;
+
return *this;
}
@@ -72,6 +77,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const T
EnableLoadStoreOpt(false), EnableUnsafeDSOffsetFolding(false),
EnableXNACK(false),
WavefrontSize(0), CFALUBug(false), LocalMemorySize(0),
+ MaxPrivateElementSize(0),
EnableVGPRSpilling(false), SGPRInitBug(false), IsGCN(false),
GCN1Encoding(false), GCN3Encoding(false), CIInsts(false), LDSBankCount(0),
IsaVersion(ISAVersion0_0_0), EnableHugeScratchBuffer(false),
Modified: llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/branches/release_38/lib/Target/AMDGPU/AMDGPUSubtarget.h Fri Jun 3 10:58:20 2016
@@ -79,6 +79,7 @@ private:
unsigned WavefrontSize;
bool CFALUBug;
int LocalMemorySize;
+ unsigned MaxPrivateElementSize;
bool EnableVGPRSpilling;
bool SGPRInitBug;
bool IsGCN;
@@ -243,6 +244,10 @@ public:
return LocalMemorySize;
}
+ unsigned getMaxPrivateElementSize() const {
+ return MaxPrivateElementSize;
+ }
+
bool hasSGPRInitBug() const {
return SGPRInitBug;
}
Modified: llvm/branches/release_38/lib/Target/AMDGPU/AMDKernelCodeT.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/AMDGPU/AMDKernelCodeT.h?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/AMDGPU/AMDKernelCodeT.h (original)
+++ llvm/branches/release_38/lib/Target/AMDGPU/AMDKernelCodeT.h Fri Jun 3 10:58:20 2016
@@ -44,6 +44,15 @@ enum amd_code_version_t {
AMD_CODE_VERSION_MINOR = 1
};
+// Sets val bits for specified mask in specified dst packed instance.
+#define AMD_HSA_BITS_SET(dst, mask, val) \
+ dst &= (~(1 << mask ## _SHIFT) & ~mask); \
+ dst |= (((val) << mask ## _SHIFT) & mask)
+
+// Gets bits for specified mask from specified src packed instance.
+#define AMD_HSA_BITS_GET(src, mask) \
+ ((src & mask) >> mask ## _SHIFT) \
+
/// The values used to define the number of bytes to use for the
/// swizzle element size.
enum amd_element_byte_size_t {
Modified: llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.cpp Fri Jun 3 10:58:20 2016
@@ -3120,6 +3120,10 @@ uint64_t SIInstrInfo::getScratchRsrcWord
AMDGPU::RSRC_TID_ENABLE |
0xffffffff; // Size;
+ uint64_t EltSizeValue = Log2_32(ST.getMaxPrivateElementSize()) - 1;
+
+ Rsrc23 |= (EltSizeValue << AMDGPU::RSRC_ELEMENT_SIZE_SHIFT);
+
// If TID_ENABLE is set, DATA_FORMAT specifies stride bits [14:17].
// Clear them unless we want a huge stride.
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
Modified: llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.h?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.h (original)
+++ llvm/branches/release_38/lib/Target/AMDGPU/SIInstrInfo.h Fri Jun 3 10:58:20 2016
@@ -498,7 +498,7 @@ namespace AMDGPU {
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_TID_ENABLE = 1LL << 55;
-
+ const uint64_t RSRC_ELEMENT_SIZE_SHIFT = 51;
} // End namespace AMDGPU
namespace SI {
Modified: llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-compute.ll?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-compute.ll (original)
+++ llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-compute.ll Fri Jun 3 10:58:20 2016
@@ -10,8 +10,8 @@
; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x80f000
-; VI: s_mov_b32 s11, 0x800000
+; CI: s_mov_b32 s11, 0x98f000
+; VI: s_mov_b32 s11, 0x980000
; GCNHSA: .amd_kernel_code_t
Modified: llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-graphics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-graphics.ll?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-graphics.ll (original)
+++ llvm/branches/release_38/test/CodeGen/AMDGPU/large-alloca-graphics.ll Fri Jun 3 10:58:20 2016
@@ -5,8 +5,8 @@
; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x80f000
-; VI: s_mov_b32 s11, 0x800000
+; CI: s_mov_b32 s11, 0x98f000
+; VI: s_mov_b32 s11, 0x980000
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
@@ -26,8 +26,8 @@ define void @large_alloca_pixel_shader(i
; GCN: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; GCN: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; GCN: s_mov_b32 s10, -1
-; CI: s_mov_b32 s11, 0x80f000
-; VI: s_mov_b32 s11, 0x800000
+; CI: s_mov_b32 s11, 0x98f000
+; VI: s_mov_b32 s11, 0x980000
; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, s[8:11], s1 offen
Modified: llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll (original)
+++ llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot-compute.ll Fri Jun 3 10:58:20 2016
@@ -21,8 +21,8 @@ declare i32 @llvm.r600.read.tgid.z() #1
; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s14, -1
-; SI-NEXT: s_mov_b32 s15, 0x80f000
-; VI-NEXT: s_mov_b32 s15, 0x800000
+; SI-NEXT: s_mov_b32 s15, 0x98f000
+; VI-NEXT: s_mov_b32 s15, 0x980000
; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s16 offset:{{[0-9]+}} ; 4-byte Folded Spill
Modified: llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll?rev=271679&r1=271678&r2=271679&view=diff
==============================================================================
--- llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll (original)
+++ llvm/branches/release_38/test/CodeGen/AMDGPU/vgpr-spill-emergency-stack-slot.ll Fri Jun 3 10:58:20 2016
@@ -14,8 +14,8 @@
; GCN: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GCN-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s14, -1
-; SI-NEXT: s_mov_b32 s15, 0x80f000
-; VI-NEXT: s_mov_b32 s15, 0x800000
+; SI-NEXT: s_mov_b32 s15, 0x98f000
+; VI-NEXT: s_mov_b32 s15, 0x980000
; s12 is offset user SGPR
; GCN: buffer_store_dword {{v[0-9]+}}, s[12:15], s11 offset:{{[0-9]+}} ; 4-byte Folded Spill
More information about the llvm-branch-commits
mailing list