[llvm] r359113 - [AMDGPU] Add gfx1010 target definitions
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 24 10:03:16 PDT 2019
Author: rampitec
Date: Wed Apr 24 10:03:15 2019
New Revision: 359113
URL: http://llvm.org/viewvc/llvm-project?rev=359113&view=rev
Log:
[AMDGPU] Add gfx1010 target definitions
Differential Revision: https://reviews.llvm.org/D61041
Modified:
llvm/trunk/include/llvm/BinaryFormat/ELF.h
llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h
llvm/trunk/include/llvm/Support/TargetParser.h
llvm/trunk/lib/ObjectYAML/ELFYAML.cpp
llvm/trunk/lib/Support/TargetParser.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/GCNProcessors.td
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/trunk/lib/Target/AMDGPU/SIDefines.h
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/trunk/lib/Target/AMDGPU/SISchedule.td
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll
llvm/trunk/tools/llvm-readobj/ELFDumper.cpp
Modified: llvm/trunk/include/llvm/BinaryFormat/ELF.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/BinaryFormat/ELF.h?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/include/llvm/BinaryFormat/ELF.h (original)
+++ llvm/trunk/include/llvm/BinaryFormat/ELF.h Wed Apr 24 10:03:15 2019
@@ -703,14 +703,17 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX904 = 0x02e,
EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f,
EF_AMDGPU_MACH_AMDGCN_GFX909 = 0x031,
+ // AMDGCN GFX10.
+ EF_AMDGPU_MACH_AMDGCN_GFX1010 = 0x033,
// Reserved for AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_RESERVED0 = 0x027,
EF_AMDGPU_MACH_AMDGCN_RESERVED1 = 0x030,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED2 = 0x032,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX909,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1010,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
Modified: llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h (original)
+++ llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h Wed Apr 24 10:03:15 2019
@@ -88,8 +88,11 @@ enum : int32_t {
COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
COMPUTE_PGM_RSRC1(BULKY, 24, 1),
COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
- COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
- COMPUTE_PGM_RSRC1(RESERVED0, 27, 5),
+ COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1), // GFX9+
+ COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
+ COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1), // GFX10+
+ COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1), // GFX10+
+ COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
};
#undef COMPUTE_PGM_RSRC1
@@ -119,6 +122,15 @@ enum : int32_t {
};
#undef COMPUTE_PGM_RSRC2
+// Compute program resource register 3. Must match hardware definition.
+#define COMPUTE_PGM_RSRC3(NAME, SHIFT, WIDTH) \
+ AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_ ## NAME, SHIFT, WIDTH)
+enum : int32_t {
+ COMPUTE_PGM_RSRC3(SHARED_VGPR_COUNT, 0, 4), // GFX10+
+ COMPUTE_PGM_RSRC3(RESERVED0, 4, 28),
+};
+#undef COMPUTE_PGM_RSRC3
+
// Kernel code properties. Must be kept backwards compatible.
#define KERNEL_CODE_PROPERTY(NAME, SHIFT, WIDTH) \
AMDHSA_BITS_ENUM_ENTRY(KERNEL_CODE_PROPERTY_ ## NAME, SHIFT, WIDTH)
@@ -130,7 +142,8 @@ enum : int32_t {
KERNEL_CODE_PROPERTY(ENABLE_SGPR_DISPATCH_ID, 4, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
- KERNEL_CODE_PROPERTY(RESERVED0, 7, 9),
+ KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
+ KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
};
#undef KERNEL_CODE_PROPERTY
@@ -140,7 +153,8 @@ struct kernel_descriptor_t {
uint32_t private_segment_fixed_size;
uint8_t reserved0[8];
int64_t kernel_code_entry_byte_offset;
- uint8_t reserved1[24];
+ uint8_t reserved1[20];
+ uint32_t compute_pgm_rsrc3; // GFX10+
uint32_t compute_pgm_rsrc1;
uint32_t compute_pgm_rsrc2;
uint16_t kernel_code_properties;
@@ -166,6 +180,9 @@ static_assert(
offsetof(kernel_descriptor_t, reserved1) == 24,
"invalid offset for reserved1");
static_assert(
+ offsetof(kernel_descriptor_t, compute_pgm_rsrc3) == 44,
+ "invalid offset for compute_pgm_rsrc3");
+static_assert(
offsetof(kernel_descriptor_t, compute_pgm_rsrc1) == 48,
"invalid offset for compute_pgm_rsrc1");
static_assert(
Modified: llvm/trunk/include/llvm/Support/TargetParser.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetParser.h?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/TargetParser.h (original)
+++ llvm/trunk/include/llvm/Support/TargetParser.h Wed Apr 24 10:03:15 2019
@@ -123,8 +123,10 @@ enum GPUKind : uint32_t {
GK_GFX906 = 63,
GK_GFX909 = 65,
+ GK_GFX1010 = 71,
+
GK_AMDGCN_FIRST = GK_GFX600,
- GK_AMDGCN_LAST = GK_GFX909,
+ GK_AMDGCN_LAST = GK_GFX1010,
};
/// Instruction set architecture version.
Modified: llvm/trunk/lib/ObjectYAML/ELFYAML.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ObjectYAML/ELFYAML.cpp?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/ObjectYAML/ELFYAML.cpp (original)
+++ llvm/trunk/lib/ObjectYAML/ELFYAML.cpp Wed Apr 24 10:03:15 2019
@@ -411,6 +411,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX904, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX906, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCase(EF_AMDGPU_XNACK);
BCase(EF_AMDGPU_SRAM_ECC);
break;
Modified: llvm/trunk/lib/Support/TargetParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Support/TargetParser.cpp?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Support/TargetParser.cpp (original)
+++ llvm/trunk/lib/Support/TargetParser.cpp Wed Apr 24 10:03:15 2019
@@ -62,7 +62,7 @@ constexpr GPUInfo R600GPUs[26] = {
// This table should be sorted by the value of GPUKind
// Don't bother listing the implicitly true features
-constexpr GPUInfo AMDGCNGPUs[33] = {
+constexpr GPUInfo AMDGCNGPUs[34] = {
// Name Canonical Kind Features
// Name
{{"gfx600"}, {"gfx600"}, GK_GFX600, FEATURE_FAST_FMA_F32},
@@ -98,6 +98,7 @@ constexpr GPUInfo AMDGCNGPUs[33] = {
{{"gfx904"}, {"gfx904"}, GK_GFX904, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx906"}, {"gfx906"}, GK_GFX906, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
+ {{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32},
};
const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
@@ -179,22 +180,23 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion
}
switch (AK) {
- case GK_GFX600: return {6, 0, 0};
- case GK_GFX601: return {6, 0, 1};
- case GK_GFX700: return {7, 0, 0};
- case GK_GFX701: return {7, 0, 1};
- case GK_GFX702: return {7, 0, 2};
- case GK_GFX703: return {7, 0, 3};
- case GK_GFX704: return {7, 0, 4};
- case GK_GFX801: return {8, 0, 1};
- case GK_GFX802: return {8, 0, 2};
- case GK_GFX803: return {8, 0, 3};
- case GK_GFX810: return {8, 1, 0};
- case GK_GFX900: return {9, 0, 0};
- case GK_GFX902: return {9, 0, 2};
- case GK_GFX904: return {9, 0, 4};
- case GK_GFX906: return {9, 0, 6};
- case GK_GFX909: return {9, 0, 9};
- default: return {0, 0, 0};
+ case GK_GFX600: return {6, 0, 0};
+ case GK_GFX601: return {6, 0, 1};
+ case GK_GFX700: return {7, 0, 0};
+ case GK_GFX701: return {7, 0, 1};
+ case GK_GFX702: return {7, 0, 2};
+ case GK_GFX703: return {7, 0, 3};
+ case GK_GFX704: return {7, 0, 4};
+ case GK_GFX801: return {8, 0, 1};
+ case GK_GFX802: return {8, 0, 2};
+ case GK_GFX803: return {8, 0, 3};
+ case GK_GFX810: return {8, 1, 0};
+ case GK_GFX900: return {9, 0, 0};
+ case GK_GFX902: return {9, 0, 2};
+ case GK_GFX904: return {9, 0, 4};
+ case GK_GFX906: return {9, 0, 6};
+ case GK_GFX909: return {9, 0, 9};
+ case GK_GFX1010: return {10, 1, 0};
+ default: return {0, 0, 0};
}
}
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPU.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPU.td?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPU.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPU.td Wed Apr 24 10:03:15 2019
@@ -60,6 +60,12 @@ def FeatureFlatScratchInsts : SubtargetF
"Have scratch_* flat memory instructions"
>;
+def FeatureScalarFlatScratchInsts : SubtargetFeature<"scalar-flat-scratch-insts",
+ "ScalarFlatScratchInsts",
+ "true",
+ "Have s_scratch_* flat memory instructions"
+>;
+
def FeatureAddNoCarryInsts : SubtargetFeature<"add-no-carry-insts",
"AddNoCarryInsts",
"true",
@@ -115,12 +121,72 @@ def FeatureXNACK : SubtargetFeature<"xna
"Enable XNACK support"
>;
+def FeatureCuMode : SubtargetFeature<"cumode",
+ "EnableCuMode",
+ "true",
+ "Enable CU wavefront execution mode"
+>;
+
def FeatureSGPRInitBug : SubtargetFeature<"sgpr-init-bug",
"SGPRInitBug",
"true",
"VI SGPR initialization bug requiring a fixed SGPR allocation size"
>;
+def FeatureLdsMisalignedBug : SubtargetFeature<"lds-misaligned-bug",
+ "LDSMisalignedBug",
+ "true",
+ "Some GFX10 bug with misaligned multi-dword LDS access in WGP mode"
+>;
+
+def FeatureVcmpxPermlaneHazard : SubtargetFeature<"vcmpx-permlane-hazard",
+ "HasVcmpxPermlaneHazard",
+ "true",
+ "TODO: describe me"
+>;
+
+def FeatureVMEMtoScalarWriteHazard : SubtargetFeature<"vmem-to-scalar-write-hazard",
+ "HasVMEMtoScalarWriteHazard",
+ "true",
+ "VMEM instruction followed by scalar writing to EXEC mask, M0 or SGPR leads to incorrect execution."
+>;
+
+def FeatureSMEMtoVectorWriteHazard : SubtargetFeature<"smem-to-vector-write-hazard",
+ "HasSMEMtoVectorWriteHazard",
+ "true",
+ "s_load_dword followed by v_cmp page faults"
+>;
+
+def FeatureInstFwdPrefetchBug : SubtargetFeature<"inst-fwd-prefetch-bug",
+ "HasInstFwdPrefetchBug",
+ "true",
+ "S_INST_PREFETCH instruction causes shader to hang"
+>;
+
+def FeatureVcmpxExecWARHazard : SubtargetFeature<"vcmpx-exec-war-hazard",
+ "HasVcmpxExecWARHazard",
+ "true",
+ "V_CMPX WAR hazard on EXEC (V_CMPX issue ONLY)"
+>;
+
+def FeatureLdsBranchVmemWARHazard : SubtargetFeature<"lds-branch-vmem-war-hazard",
+ "HasLdsBranchVmemWARHazard",
+ "true",
+ "Switching between LDS and VMEM-tex not waiting VM_VSRC=0"
+>;
+
+def FeatureNSAtoVMEMBug : SubtargetFeature<"nsa-to-vmem-bug",
+ "HasNSAtoVMEMBug",
+ "true",
+ "MIMG-NSA followed by VMEM fail if EXEC_LO or EXEC_HI equals zero"
+>;
+
+def FeatureFlatSegmentOffsetBug : SubtargetFeature<"flat-segment-offset-bug",
+ "HasFlatSegmentOffsetBug",
+ "true",
+ "GFX10 bug, inst_offset ignored in flat segment"
+>;
+
class SubtargetFeatureLDSBankCount <int Value> : SubtargetFeature <
"ldsbankcount"#Value,
"LDSBankCount",
@@ -155,6 +221,12 @@ def FeatureGFX9Insts : SubtargetFeature<
"Additional instructions for GFX9+"
>;
+def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
+ "GFX10Insts",
+ "true",
+ "Additional instructions for GFX10+"
+>;
+
def FeatureGFX7GFX8GFX9Insts : SubtargetFeature<"gfx7-gfx8-gfx9-insts",
"GFX7GFX8GFX9Insts",
"true",
@@ -257,6 +329,12 @@ def FeatureR128A16 : SubtargetFeature<"r
"Support 16 bit coordindates/gradients/lod/clamp/mip types on gfx9"
>;
+def FeatureNSAEncoding : SubtargetFeature<"nsa-encoding",
+ "HasNSAEncoding",
+ "true",
+ "Support NSA encoding for image instructions"
+>;
+
def FeatureIntClamp : SubtargetFeature<"int-clamp-insts",
"HasIntClamp",
"true",
@@ -299,6 +377,36 @@ def FeatureSRAMECC : SubtargetFeature<"s
"Enable SRAM ECC"
>;
+def FeatureNoSdstCMPX : SubtargetFeature<"no-sdst-cmpx",
+ "HasNoSdstCMPX",
+ "true",
+ "V_CMPX does not write VCC/SGPR in addition to EXEC"
+>;
+
+def FeatureVscnt : SubtargetFeature<"vscnt",
+ "HasVscnt",
+ "true",
+ "Has separate store vscnt counter"
+>;
+
+def FeatureRegisterBanking : SubtargetFeature<"register-banking",
+ "HasRegisterBanking",
+ "true",
+ "Has register banking"
+>;
+
+def FeatureVOP3Literal : SubtargetFeature<"vop3-literal",
+ "HasVOP3Literal",
+ "true",
+ "Can use one literal in VOP3"
+>;
+
+def FeatureNoDataDepHazard : SubtargetFeature<"no-data-dep-hazard",
+ "HasNoDataDepHazard",
+ "true",
+ "Does not need SW waitstates"
+>;
+
//===------------------------------------------------------------===//
// Subtarget Features (options and debugging)
//===------------------------------------------------------------===//
@@ -487,7 +595,24 @@ def FeatureGFX9 : GCNSubtargetFeatureGen
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
- FeatureScalarAtomics, FeatureR128A16
+ FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
+ ]
+>;
+
+def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
+ "gfx10",
+ [FeatureFP64, FeatureLocalMemorySize65536, FeatureMIMG_R128,
+ FeatureFlatAddressSpace,
+ FeatureCIInsts, Feature16BitInsts,
+ FeatureSMemRealTime, FeatureInv2PiInlineImm,
+ FeatureApertureRegs, FeatureGFX9Insts, FeatureGFX10Insts, FeatureVOP3P,
+ FeatureMovrel, FeatureFastFMAF32, FeatureDPP, FeatureIntClamp,
+ FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
+ FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
+ FeatureAddNoCarryInsts, FeatureFmaMixInsts, FeatureGFX8Insts,
+ FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
+ FeatureVOP3Literal, FeatureNoDataDepHazard,
+ FeatureDoesNotSupportSRAMECC
]
>;
@@ -601,6 +726,34 @@ def FeatureISAVersion9_0_9 : FeatureSet<
FeatureXNACK,
FeatureCodeObjectV3]>;
+// TODO: Organize more features into groups.
+def FeatureGroup {
+ // Bugs present on gfx10.1.
+ list<SubtargetFeature> GFX10_1_Bugs = [
+ FeatureVcmpxPermlaneHazard,
+ FeatureVMEMtoScalarWriteHazard,
+ FeatureSMEMtoVectorWriteHazard,
+ FeatureInstFwdPrefetchBug,
+ FeatureVcmpxExecWARHazard,
+ FeatureLdsBranchVmemWARHazard,
+ FeatureNSAtoVMEMBug,
+ FeatureFlatSegmentOffsetBug
+ ];
+}
+
+def FeatureISAVersion10_1_0 : FeatureSet<
+ !listconcat(FeatureGroup.GFX10_1_Bugs,
+ [FeatureGFX10,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureNSAEncoding,
+ FeatureWavefrontSize64,
+ FeatureScalarStores,
+ FeatureScalarAtomics,
+ FeatureScalarFlatScratchInsts,
+ FeatureLdsMisalignedBug,
+ FeatureCodeObjectV3])>;
+
//===----------------------------------------------------------------------===//
def AMDGPUInstrInfo : InstrInfo {
@@ -687,10 +840,21 @@ def isGFX6 :
def isGFX6GFX7 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,!FeatureGFX10Insts">;
+
+def isGFX6GFX7GFX10 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
AssemblerPredicate<"!FeatureGCN3Encoding">;
def isGFX7Only :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts,!FeatureGFX10Insts">;
+
+def isGFX7GFX10 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX10">,
AssemblerPredicate<"!FeatureGCN3Encoding,FeatureCIInsts">;
def isGFX7GFX8GFX9 :
@@ -699,6 +863,13 @@ def isGFX7GFX8GFX9 :
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
AssemblerPredicate<"FeatureGFX7GFX8GFX9Insts">;
+def isGFX6GFX7GFX8GFX9 :
+ Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::SOUTHERN_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ "Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
+ AssemblerPredicate<"!FeatureGFX10Insts">;
+
def isGFX7Plus :
Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::SEA_ISLANDS">,
AssemblerPredicate<"FeatureCIInsts">;
@@ -724,6 +895,10 @@ def isGFX8GFX9 :
"Subtarget->getGeneration() == AMDGPUSubtarget::GFX9">,
AssemblerPredicate<"FeatureGFX8Insts,FeatureGCN3Encoding">;
+def isGFX10Plus :
+ Predicate<"Subtarget->getGeneration() >= AMDGPUSubtarget::GFX10">,
+ AssemblerPredicate<"FeatureGFX10Insts">;
+
def HasFlatAddressSpace : Predicate<"Subtarget->hasFlatAddressSpace()">,
AssemblerPredicate<"FeatureFlatAddressSpace">;
@@ -731,6 +906,8 @@ def HasFlatGlobalInsts : Predicate<"Subt
AssemblerPredicate<"FeatureFlatGlobalInsts">;
def HasFlatScratchInsts : Predicate<"Subtarget->hasFlatScratchInsts()">,
AssemblerPredicate<"FeatureFlatScratchInsts">;
+def HasScalarFlatScratchInsts : Predicate<"Subtarget->hasScalarFlatScratchInsts()">,
+ AssemblerPredicate<"FeatureScalarFlatScratchInsts">;
def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<"FeatureGFX9Insts">;
@@ -766,6 +943,10 @@ def HasSDWA9 :
Predicate<"Subtarget->hasSDWA()">,
AssemblerPredicate<"FeatureGCN3Encoding,FeatureGFX9Insts,FeatureSDWA">;
+def HasSDWA10 :
+ Predicate<"Subtarget->hasSDWA()">,
+ AssemblerPredicate<"!FeatureGCN3Encoding,FeatureGFX10Insts,FeatureSDWA">;
+
def HasDPP : Predicate<"Subtarget->hasDPP()">,
AssemblerPredicate<"FeatureGCN3Encoding,FeatureDPP">;
@@ -778,9 +959,18 @@ def HasIntClamp : Predicate<"Subtarget->
def HasMadMixInsts : Predicate<"Subtarget->hasMadMixInsts()">,
AssemblerPredicate<"FeatureMadMixInsts">;
+def HasScalarStores : Predicate<"Subtarget->hasScalarStores()">,
+ AssemblerPredicate<"FeatureScalarStores">;
+
def HasScalarAtomics : Predicate<"Subtarget->hasScalarAtomics()">,
AssemblerPredicate<"FeatureScalarAtomics">;
+def HasNoSdstCMPX : Predicate<"Subtarget->hasNoSdstCMPX()">,
+ AssemblerPredicate<"FeatureNoSdstCMPX">;
+
+def HasSdstCMPX : Predicate<"!Subtarget->hasNoSdstCMPX()">,
+ AssemblerPredicate<"!FeatureNoSdstCMPX">;
+
def has16BankLDS : Predicate<"Subtarget->getLDSBankCount() == 16">;
def has32BankLDS : Predicate<"Subtarget->getLDSBankCount() == 32">;
def HasVGPRIndexMode : Predicate<"Subtarget->hasVGPRIndexMode()">,
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Wed Apr 24 10:03:15 2019
@@ -181,6 +181,7 @@ GCNSubtarget::GCNSubtarget(const Triple
HasApertureRegs(false),
EnableXNACK(false),
+ EnableCuMode(false),
TrapHandler(false),
EnableHugePrivateBuffer(false),
@@ -196,6 +197,7 @@ GCNSubtarget::GCNSubtarget(const Triple
CIInsts(false),
GFX8Insts(false),
GFX9Insts(false),
+ GFX10Insts(false),
GFX7GFX8GFX9Insts(false),
SGPRInitBug(false),
HasSMemRealTime(false),
@@ -212,20 +214,37 @@ GCNSubtarget::GCNSubtarget(const Triple
HasSDWAOutModsVOPC(false),
HasDPP(false),
HasR128A16(false),
+ HasNSAEncoding(false),
HasDLInsts(false),
HasDot1Insts(false),
HasDot2Insts(false),
EnableSRAMECC(false),
DoesNotSupportSRAMECC(false),
+ HasNoSdstCMPX(false),
+ HasVscnt(false),
+ HasRegisterBanking(false),
+ HasVOP3Literal(false),
+ HasNoDataDepHazard(false),
FlatAddressSpace(false),
FlatInstOffsets(false),
FlatGlobalInsts(false),
FlatScratchInsts(false),
+ ScalarFlatScratchInsts(false),
AddNoCarryInsts(false),
HasUnpackedD16VMem(false),
+ LDSMisalignedBug(false),
ScalarizeGlobal(false),
+ HasVcmpxPermlaneHazard(false),
+ HasVMEMtoScalarWriteHazard(false),
+ HasSMEMtoVectorWriteHazard(false),
+ HasInstFwdPrefetchBug(false),
+ HasVcmpxExecWARHazard(false),
+ HasLdsBranchVmemWARHazard(false),
+ HasNSAtoVMEMBug(false),
+ HasFlatSegmentOffsetBug(false),
+
FeatureDisable(false),
InstrInfo(initializeSubtargetDependencies(TT, GPU, FS)),
TLInfo(TM, *this),
@@ -243,6 +262,8 @@ unsigned AMDGPUSubtarget::getMaxLocalMem
return getLocalMemorySize();
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+ if (!WorkGroupsPerCu)
+ return 0;
unsigned MaxWaves = getMaxWavesPerEU();
return getLocalMemorySize() * MaxWaves / WorkGroupsPerCu / NWaves;
}
@@ -251,6 +272,8 @@ unsigned AMDGPUSubtarget::getOccupancyWi
const Function &F) const {
unsigned WorkGroupSize = getFlatWorkGroupSizes(F).second;
unsigned WorkGroupsPerCu = getMaxWorkGroupsPerCU(WorkGroupSize);
+ if (!WorkGroupsPerCu)
+ return 0;
unsigned MaxWaves = getMaxWavesPerEU();
unsigned Limit = getLocalMemorySize() * MaxWaves / WorkGroupsPerCu;
unsigned NumWaves = Limit / (Bytes ? Bytes : 1u);
@@ -271,7 +294,8 @@ AMDGPUSubtarget::getDefaultFlatWorkGroup
case CallingConv::AMDGPU_CS:
case CallingConv::AMDGPU_KERNEL:
case CallingConv::SPIR_KERNEL:
- return std::make_pair(getWavefrontSize() * 2, getWavefrontSize() * 4);
+ return std::make_pair(getWavefrontSize() * 2,
+ std::max(getWavefrontSize() * 4, 256u));
case CallingConv::AMDGPU_VS:
case CallingConv::AMDGPU_LS:
case CallingConv::AMDGPU_HS:
@@ -496,7 +520,14 @@ void GCNSubtarget::overrideSchedPolicy(M
Policy.ShouldTrackLaneMasks = true;
}
+bool GCNSubtarget::hasMadF16() const {
+ return InstrInfo.pseudoToMCOpcode(AMDGPU::V_MAD_F16) != -1;
+}
+
unsigned GCNSubtarget::getOccupancyWithNumSGPRs(unsigned SGPRs) const {
+ if (getGeneration() >= AMDGPUSubtarget::GFX10)
+ return 10;
+
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
if (SGPRs <= 80)
return 10;
@@ -543,6 +574,9 @@ unsigned GCNSubtarget::getOccupancyWithN
unsigned GCNSubtarget::getReservedNumSGPRs(const MachineFunction &MF) const {
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
+ if (getGeneration() >= AMDGPUSubtarget::GFX10)
+ return 2; // VCC. FLAT_SCRATCH and XNACK are no longer in SGPRs.
+
if (MFI.hasFlatScratchInit()) {
if (getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS)
return 6; // FLAT_SCRATCH, XNACK, VCC (in that order).
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed Apr 24 10:03:15 2019
@@ -55,7 +55,8 @@ public:
SOUTHERN_ISLANDS = 4,
SEA_ISLANDS = 5,
VOLCANIC_ISLANDS = 6,
- GFX9 = 7
+ GFX9 = 7,
+ GFX10 = 8
};
private:
@@ -293,6 +294,7 @@ protected:
bool UnalignedBufferAccess;
bool HasApertureRegs;
bool EnableXNACK;
+ bool EnableCuMode;
bool TrapHandler;
// Used as options.
@@ -313,6 +315,7 @@ protected:
bool CIInsts;
bool GFX8Insts;
bool GFX9Insts;
+ bool GFX10Insts;
bool GFX7GFX8GFX9Insts;
bool SGPRInitBug;
bool HasSMemRealTime;
@@ -329,24 +332,41 @@ protected:
bool HasSDWAOutModsVOPC;
bool HasDPP;
bool HasR128A16;
+ bool HasNSAEncoding;
bool HasDLInsts;
bool HasDot1Insts;
bool HasDot2Insts;
bool EnableSRAMECC;
bool DoesNotSupportSRAMECC;
+ bool HasNoSdstCMPX;
+ bool HasVscnt;
+ bool HasRegisterBanking;
+ bool HasVOP3Literal;
+ bool HasNoDataDepHazard;
bool FlatAddressSpace;
bool FlatInstOffsets;
bool FlatGlobalInsts;
bool FlatScratchInsts;
+ bool ScalarFlatScratchInsts;
bool AddNoCarryInsts;
bool HasUnpackedD16VMem;
bool R600ALUInst;
bool CaymanISA;
bool CFALUBug;
+ bool LDSMisalignedBug;
bool HasVertexCache;
short TexVTXClauseSize;
bool ScalarizeGlobal;
+ bool HasVcmpxPermlaneHazard;
+ bool HasVMEMtoScalarWriteHazard;
+ bool HasSMEMtoVectorWriteHazard;
+ bool HasInstFwdPrefetchBug;
+ bool HasVcmpxExecWARHazard;
+ bool HasLdsBranchVmemWARHazard;
+ bool HasNSAtoVMEMBug;
+ bool HasFlatSegmentOffsetBug;
+
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable;
@@ -583,6 +603,10 @@ public:
return EnableXNACK;
}
+ bool isCuModeEnabled() const {
+ return EnableCuMode;
+ }
+
bool hasFlatAddressSpace() const {
return FlatAddressSpace;
}
@@ -599,6 +623,14 @@ public:
return FlatScratchInsts;
}
+ bool hasScalarFlatScratchInsts() const {
+ return ScalarFlatScratchInsts;
+ }
+
+ bool hasFlatSegmentOffsetBug() const {
+ return HasFlatSegmentOffsetBug;
+ }
+
bool hasFlatLgkmVMemCountInOrder() const {
return getGeneration() > GFX9;
}
@@ -654,10 +686,6 @@ public:
return HasSDWAOutModsVOPC;
}
- bool vmemWriteNeedsExpWaitcnt() const {
- return getGeneration() < SEA_ISLANDS;
- }
-
bool hasDLInsts() const {
return HasDLInsts;
}
@@ -674,6 +702,30 @@ public:
return EnableSRAMECC;
}
+ bool hasNoSdstCMPX() const {
+ return HasNoSdstCMPX;
+ }
+
+ bool hasVscnt() const {
+ return HasVscnt;
+ }
+
+ bool hasRegisterBanking() const {
+ return HasRegisterBanking;
+ }
+
+ bool hasVOP3Literal() const {
+ return HasVOP3Literal;
+ }
+
+ bool hasNoDataDepHazard() const {
+ return HasNoDataDepHazard;
+ }
+
+ bool vmemWriteNeedsExpWaitcnt() const {
+ return getGeneration() < SEA_ISLANDS;
+ }
+
// Scratch is allocated in 256 dword per wave blocks for the entire
// wavefront. When viewed from the perspecive of an arbitrary workitem, this
// is 4-byte aligned.
@@ -782,6 +834,12 @@ public:
return HasR128A16;
}
+ bool hasNSAEncoding() const {
+ return HasNSAEncoding;
+ }
+
+ bool hasMadF16() const;
+
bool enableSIScheduler() const {
return EnableSIScheduler;
}
@@ -816,6 +874,38 @@ public:
getGeneration() <= AMDGPUSubtarget::GFX9;
}
+ bool hasVcmpxPermlaneHazard() const {
+ return HasVcmpxPermlaneHazard;
+ }
+
+ bool hasVMEMtoScalarWriteHazard() const {
+ return HasVMEMtoScalarWriteHazard;
+ }
+
+ bool hasSMEMtoVectorWriteHazard() const {
+ return HasSMEMtoVectorWriteHazard;
+ }
+
+ bool hasLDSMisalignedBug() const {
+ return LDSMisalignedBug && !EnableCuMode;
+ }
+
+ bool hasInstFwdPrefetchBug() const {
+ return HasInstFwdPrefetchBug;
+ }
+
+ bool hasVcmpxExecWARHazard() const {
+ return HasVcmpxExecWARHazard;
+ }
+
+ bool hasLdsBranchVmemWARHazard() const {
+ return HasLdsBranchVmemWARHazard;
+ }
+
+ bool hasNSAtoVMEMBug() const {
+ return HasNSAtoVMEMBug;
+ }
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Apr 24 10:03:15 2019
@@ -999,6 +999,10 @@ public:
return AMDGPU::isGFX9(getSTI());
}
+ bool isGFX10() const {
+ return AMDGPU::isGFX10(getSTI());
+ }
+
bool hasInv2PiInlineImm() const {
return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
}
@@ -1407,7 +1411,7 @@ bool AMDGPUOperand::isRegClass(unsigned
bool AMDGPUOperand::isSDWAOperand(MVT type) const {
if (AsmParser->isVI())
return isVReg32();
- else if (AsmParser->isGFX9())
+ else if (AsmParser->isGFX9() || AsmParser->isGFX10())
return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
else
return false;
@@ -2953,7 +2957,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
if (getParser().parseIdentifier(KernelName))
return true;
- kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor();
+ kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
StringSet<> Seen;
Modified: llvm/trunk/lib/Target/AMDGPU/GCNProcessors.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNProcessors.td?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNProcessors.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNProcessors.td Wed Apr 24 10:03:15 2019
@@ -164,3 +164,10 @@ def : ProcessorModel<"gfx909", SIQuarter
FeatureISAVersion9_0_9.Features
>;
+//===----------------------------------------------------------------------===//
+// GCN GFX10.
+//===----------------------------------------------------------------------===//
+
+def : ProcessorModel<"gfx1010", GFX10SpeedModel,
+ FeatureISAVersion10_1_0.Features
+>;
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Wed Apr 24 10:03:15 2019
@@ -60,39 +60,40 @@ StringRef AMDGPUTargetStreamer::getArchN
AMDGPU::GPUKind AK;
switch (ElfMach) {
- case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
- case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
- case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
- case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
- case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
- case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
- case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
- case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
- case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
- case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
- case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
- case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
- case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
- case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
- case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
- case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
- case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
+ case ELF::EF_AMDGPU_MACH_R600_R600: AK = GK_R600; break;
+ case ELF::EF_AMDGPU_MACH_R600_R630: AK = GK_R630; break;
+ case ELF::EF_AMDGPU_MACH_R600_RS880: AK = GK_RS880; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV670: AK = GK_RV670; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV710: AK = GK_RV710; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV730: AK = GK_RV730; break;
+ case ELF::EF_AMDGPU_MACH_R600_RV770: AK = GK_RV770; break;
+ case ELF::EF_AMDGPU_MACH_R600_CEDAR: AK = GK_CEDAR; break;
+ case ELF::EF_AMDGPU_MACH_R600_CYPRESS: AK = GK_CYPRESS; break;
+ case ELF::EF_AMDGPU_MACH_R600_JUNIPER: AK = GK_JUNIPER; break;
+ case ELF::EF_AMDGPU_MACH_R600_REDWOOD: AK = GK_REDWOOD; break;
+ case ELF::EF_AMDGPU_MACH_R600_SUMO: AK = GK_SUMO; break;
+ case ELF::EF_AMDGPU_MACH_R600_BARTS: AK = GK_BARTS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAICOS: AK = GK_CAICOS; break;
+ case ELF::EF_AMDGPU_MACH_R600_CAYMAN: AK = GK_CAYMAN; break;
+ case ELF::EF_AMDGPU_MACH_R600_TURKS: AK = GK_TURKS; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX600: AK = GK_GFX600; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX601: AK = GK_GFX601; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX700: AK = GK_GFX700; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX701: AK = GK_GFX701; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX702: AK = GK_GFX702; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX703: AK = GK_GFX703; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX704: AK = GK_GFX704; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX801: AK = GK_GFX801; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX802: AK = GK_GFX802; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX803: AK = GK_GFX803; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX810: AK = GK_GFX810; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX900: AK = GK_GFX900; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX902: AK = GK_GFX902; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX904: AK = GK_GFX904; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX906: AK = GK_GFX906; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
+ case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
}
StringRef GPUName = getArchNameAMDGCN(AK);
@@ -139,6 +140,7 @@ unsigned AMDGPUTargetStreamer::getElfMac
case GK_GFX904: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX904;
case GK_GFX906: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX906;
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
+ case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
@@ -324,6 +326,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsa
PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+ if (IVersion.Major >= 10) {
+ PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
+ PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
+ PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
+ compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+ }
PRINT_FIELD(
OS, ".amdhsa_exception_fp_ieee_invalid_op", KD,
compute_pgm_rsrc2,
Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Wed Apr 24 10:03:15 2019
@@ -523,6 +523,15 @@ enum DppCtrl : unsigned {
#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23)
#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1)
#define C_00B848_IEEE_MODE 0xFF7FFFFF
+#define S_00B848_WGP_MODE(x) (((x) & 0x1) << 29)
+#define G_00B848_WGP_MODE(x) (((x) >> 29) & 0x1)
+#define C_00B848_WGP_MODE 0xDFFFFFFF
+#define S_00B848_MEM_ORDERED(x) (((x) & 0x1) << 30)
+#define G_00B848_MEM_ORDERED(x) (((x) >> 30) & 0x1)
+#define C_00B848_MEM_ORDERED 0xBFFFFFFF
+#define S_00B848_FWD_PROGRESS(x) (((x) & 0x1) << 31)
+#define G_00B848_FWD_PROGRESS(x) (((x) >> 31) & 0x1)
+#define C_00B848_FWD_PROGRESS 0x7FFFFFFF
// Helpers for setting FLOAT_MODE
@@ -553,6 +562,15 @@ enum DppCtrl : unsigned {
#define R_0286E8_SPI_TMPRING_SIZE 0x0286E8
#define S_0286E8_WAVESIZE(x) (((x) & 0x1FFF) << 12)
+#define R_028B54_VGT_SHADER_STAGES_EN 0x028B54
+#define S_028B54_HS_W32_EN(x) (((x) & 0x1) << 21)
+#define S_028B54_GS_W32_EN(x) (((x) & 0x1) << 22)
+#define S_028B54_VS_W32_EN(x) (((x) & 0x1) << 23)
+#define R_0286D8_SPI_PS_IN_CONTROL 0x0286D8
+#define S_0286D8_PS_W32_EN(x) (((x) & 0x1) << 15)
+#define R_00B800_COMPUTE_DISPATCH_INITIATOR 0x00B800
+#define S_00B800_CS_W32_EN(x) (((x) & 0x1) << 15)
+
#define R_SPILLED_SGPRS 0x4
#define R_SPILLED_VGPRS 0x8
} // End namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed Apr 24 10:03:15 2019
@@ -5591,7 +5591,9 @@ enum SIEncodingFamily {
SDWA = 2,
SDWA9 = 3,
GFX80 = 4,
- GFX9 = 5
+ GFX9 = 5,
+ GFX10 = 6,
+ SDWA10 = 7
};
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
@@ -5604,6 +5606,8 @@ static SIEncodingFamily subtargetEncodin
case AMDGPUSubtarget::VOLCANIC_ISLANDS:
case AMDGPUSubtarget::GFX9:
return SIEncodingFamily::VI;
+ case AMDGPUSubtarget::GFX10:
+ return SIEncodingFamily::GFX10;
}
llvm_unreachable("Unknown subtarget generation!");
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.td Wed Apr 24 10:03:15 2019
@@ -23,6 +23,8 @@ def SIEncodingFamily {
int SDWA9 = 3;
int GFX80 = 4;
int GFX9 = 5;
+ int GFX10 = 6;
+ int SDWA10 = 7;
}
//===----------------------------------------------------------------------===//
Modified: llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIRegisterInfo.td Wed Apr 24 10:03:15 2019
@@ -112,9 +112,9 @@ def TMA : RegisterWithSubRegs<"tma", [TM
}
foreach Index = 0-15 in {
- def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
- def TTMP#Index#_gfx9 : SIReg<"ttmp"#Index, !add(108, Index)>;
- def TTMP#Index : SIReg<"", 0>;
+ def TTMP#Index#_vi : SIReg<"ttmp"#Index, !add(112, Index)>;
+ def TTMP#Index#_gfx9_gfx10 : SIReg<"ttmp"#Index, !add(108, Index)>;
+ def TTMP#Index : SIReg<"", 0>;
}
multiclass FLAT_SCR_LOHI_m <string n, bits<16> ci_e, bits<16> vi_e> {
@@ -311,8 +311,8 @@ class TmpRegTuples<string tgt,
getSubRegs<size>.ret>;
foreach Index = {0, 2, 4, 6, 8, 10, 12, 14} in {
- def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
- def TTMP#Index#_TTMP#!add(Index,1)#_gfx9 : TmpRegTuples<"_gfx9", 2, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_vi : TmpRegTuples<"_vi", 2, Index>;
+ def TTMP#Index#_TTMP#!add(Index,1)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 2, Index>;
}
foreach Index = {0, 4, 8, 12} in {
@@ -321,7 +321,7 @@ foreach Index = {0, 4, 8, 12} in {
_TTMP#!add(Index,3)#_vi : TmpRegTuples<"_vi", 4, Index>;
def TTMP#Index#_TTMP#!add(Index,1)#
_TTMP#!add(Index,2)#
- _TTMP#!add(Index,3)#_gfx9 : TmpRegTuples<"_gfx9", 4, Index>;
+ _TTMP#!add(Index,3)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 4, Index>;
}
foreach Index = {0, 4, 8} in {
@@ -338,7 +338,7 @@ foreach Index = {0, 4, 8} in {
_TTMP#!add(Index,4)#
_TTMP#!add(Index,5)#
_TTMP#!add(Index,6)#
- _TTMP#!add(Index,7)#_gfx9 : TmpRegTuples<"_gfx9", 8, Index>;
+ _TTMP#!add(Index,7)#_gfx9_gfx10 : TmpRegTuples<"_gfx9_gfx10", 8, Index>;
}
def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_vi :
@@ -348,12 +348,12 @@ def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_
TTMP8_vi, TTMP9_vi, TTMP10_vi, TTMP11_vi,
TTMP12_vi, TTMP13_vi, TTMP14_vi, TTMP15_vi]>;
-def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9 :
+def TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15_gfx9_gfx10 :
TmpRegTuplesBase<0, 16,
- [TTMP0_gfx9, TTMP1_gfx9, TTMP2_gfx9, TTMP3_gfx9,
- TTMP4_gfx9, TTMP5_gfx9, TTMP6_gfx9, TTMP7_gfx9,
- TTMP8_gfx9, TTMP9_gfx9, TTMP10_gfx9, TTMP11_gfx9,
- TTMP12_gfx9, TTMP13_gfx9, TTMP14_gfx9, TTMP15_gfx9]>;
+ [TTMP0_gfx9_gfx10, TTMP1_gfx9_gfx10, TTMP2_gfx9_gfx10, TTMP3_gfx9_gfx10,
+ TTMP4_gfx9_gfx10, TTMP5_gfx9_gfx10, TTMP6_gfx9_gfx10, TTMP7_gfx9_gfx10,
+ TTMP8_gfx9_gfx10, TTMP9_gfx9_gfx10, TTMP10_gfx9_gfx10, TTMP11_gfx9_gfx10,
+ TTMP12_gfx9_gfx10, TTMP13_gfx9_gfx10, TTMP14_gfx9_gfx10, TTMP15_gfx9_gfx10]>;
// VGPR 32-bit registers
Modified: llvm/trunk/lib/Target/AMDGPU/SISchedule.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SISchedule.td?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SISchedule.td (original)
+++ llvm/trunk/lib/Target/AMDGPU/SISchedule.td Wed Apr 24 10:03:15 2019
@@ -37,6 +37,9 @@ def WriteDouble : SchedWrite;
// half rate f64 instruction (same as v_add_f64)
def WriteDoubleAdd : SchedWrite;
+// Conversion to or from f64 instruction
+def WriteDoubleCvt : SchedWrite;
+
// Half rate 64-bit instructions.
def Write64Bit : SchedWrite;
@@ -61,6 +64,7 @@ class SISchedMachineModel : SchedMachine
def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;
+def GFX10SpeedModel : SISchedMachineModel;
// XXX: Are the resource counts correct?
def HWBranch : ProcResource<1> {
@@ -81,6 +85,9 @@ def HWVMEM : ProcResource<1> {
def HWVALU : ProcResource<1> {
let BufferSize = 1;
}
+def HWRC : ProcResource<1> { // Register destination cache
+ let BufferSize = 1;
+}
class HWWriteRes<SchedWrite write, list<ProcResourceKind> resources,
int latency> : WriteRes<write, resources> {
@@ -124,6 +131,7 @@ defm : SICommonWriteRes;
def : HWVALUWriteRes<WriteFloatFMA, 1>;
def : HWVALUWriteRes<WriteDouble, 4>;
def : HWVALUWriteRes<WriteDoubleAdd, 2>;
+def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : InstRW<[WriteCopy], (instrs COPY)>;
@@ -136,7 +144,32 @@ defm : SICommonWriteRes;
def : HWVALUWriteRes<WriteFloatFMA, 16>;
def : HWVALUWriteRes<WriteDouble, 16>;
def : HWVALUWriteRes<WriteDoubleAdd, 8>;
+def : HWVALUWriteRes<WriteDoubleCvt, 4>;
def : InstRW<[WriteCopy], (instrs COPY)>;
} // End SchedModel = SIQuarterSpeedModel
+
+let SchedModel = GFX10SpeedModel in {
+
+// The latency values are 1 / (operations / cycle).
+// Add 1 stall cycle for VGPR read.
+def : HWWriteRes<Write32Bit, [HWVALU, HWRC], 5>;
+def : HWWriteRes<Write64Bit, [HWVALU, HWRC], 9>;
+def : HWWriteRes<WriteQuarterRate32, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteFloatFMA, [HWVALU, HWRC], 5>;
+def : HWWriteRes<WriteDouble, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteDoubleAdd, [HWVALU, HWRC], 17>;
+def : HWWriteRes<WriteDoubleCvt, [HWVALU, HWRC], 17>;
+
+def : HWWriteRes<WriteBranch, [HWBranch], 32>;
+def : HWWriteRes<WriteExport, [HWExport, HWRC], 16>;
+def : HWWriteRes<WriteLDS, [HWLGKM, HWRC], 20>;
+def : HWWriteRes<WriteSALU, [HWSALU, HWRC], 5>;
+def : HWWriteRes<WriteSMEM, [HWLGKM, HWRC], 20>;
+def : HWWriteRes<WriteVMEM, [HWVMEM, HWRC], 320>;
+def : HWWriteRes<WriteBarrier, [HWBranch], 2000>;
+
+def : InstRW<[WriteCopy], (instrs COPY)>;
+
+} // End SchedModel = GFX10SpeedModel
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Wed Apr 24 10:03:15 2019
@@ -435,11 +435,21 @@ void initDefaultAMDKernelCodeT(amd_kerne
Header.kernarg_segment_alignment = 4;
Header.group_segment_alignment = 4;
Header.private_segment_alignment = 4;
+
+ if (Version.Major >= 10) {
+ Header.compute_pgm_resource_registers |=
+ S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
+ S_00B848_MEM_ORDERED(1);
+ }
}
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+ const MCSubtargetInfo *STI) {
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+
amdhsa::kernel_descriptor_t KD;
memset(&KD, 0, sizeof(KD));
+
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
@@ -449,6 +459,13 @@ amdhsa::kernel_descriptor_t getDefaultAm
amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
+ if (Version.Major >= 10) {
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
+ STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
+ AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
+ amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
+ }
return KD;
}
@@ -679,6 +696,10 @@ bool isGFX9(const MCSubtargetInfo &STI)
return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
}
+bool isGFX10(const MCSubtargetInfo &STI) {
+ return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
+}
+
bool isGCN3Encoding(const MCSubtargetInfo &STI) {
return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
}
@@ -704,46 +725,46 @@ bool isRegIntersect(unsigned Reg0, unsig
CASE_CI_VI(FLAT_SCR) \
CASE_CI_VI(FLAT_SCR_LO) \
CASE_CI_VI(FLAT_SCR_HI) \
- CASE_VI_GFX9(TTMP0) \
- CASE_VI_GFX9(TTMP1) \
- CASE_VI_GFX9(TTMP2) \
- CASE_VI_GFX9(TTMP3) \
- CASE_VI_GFX9(TTMP4) \
- CASE_VI_GFX9(TTMP5) \
- CASE_VI_GFX9(TTMP6) \
- CASE_VI_GFX9(TTMP7) \
- CASE_VI_GFX9(TTMP8) \
- CASE_VI_GFX9(TTMP9) \
- CASE_VI_GFX9(TTMP10) \
- CASE_VI_GFX9(TTMP11) \
- CASE_VI_GFX9(TTMP12) \
- CASE_VI_GFX9(TTMP13) \
- CASE_VI_GFX9(TTMP14) \
- CASE_VI_GFX9(TTMP15) \
- CASE_VI_GFX9(TTMP0_TTMP1) \
- CASE_VI_GFX9(TTMP2_TTMP3) \
- CASE_VI_GFX9(TTMP4_TTMP5) \
- CASE_VI_GFX9(TTMP6_TTMP7) \
- CASE_VI_GFX9(TTMP8_TTMP9) \
- CASE_VI_GFX9(TTMP10_TTMP11) \
- CASE_VI_GFX9(TTMP12_TTMP13) \
- CASE_VI_GFX9(TTMP14_TTMP15) \
- CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
- CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
- CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
- CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
- CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
- CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
- CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
- CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0) \
+ CASE_VI_GFX9_GFX10(TTMP1) \
+ CASE_VI_GFX9_GFX10(TTMP2) \
+ CASE_VI_GFX9_GFX10(TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4) \
+ CASE_VI_GFX9_GFX10(TTMP5) \
+ CASE_VI_GFX9_GFX10(TTMP6) \
+ CASE_VI_GFX9_GFX10(TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8) \
+ CASE_VI_GFX9_GFX10(TTMP9) \
+ CASE_VI_GFX9_GFX10(TTMP10) \
+ CASE_VI_GFX9_GFX10(TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12) \
+ CASE_VI_GFX9_GFX10(TTMP13) \
+ CASE_VI_GFX9_GFX10(TTMP14) \
+ CASE_VI_GFX9_GFX10(TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1) \
+ CASE_VI_GFX9_GFX10(TTMP2_TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5) \
+ CASE_VI_GFX9_GFX10(TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9) \
+ CASE_VI_GFX9_GFX10(TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12_TTMP13) \
+ CASE_VI_GFX9_GFX10(TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
+ CASE_VI_GFX9_GFX10(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
+ CASE_VI_GFX9_GFX10(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
+ CASE_VI_GFX9_GFX10(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
}
#define CASE_CI_VI(node) \
assert(!isSI(STI)); \
case node: return isCI(STI) ? node##_ci : node##_vi;
-#define CASE_VI_GFX9(node) \
- case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
+#define CASE_VI_GFX9_GFX10(node) \
+ case node: return (isGFX9(STI) || isGFX10(STI)) ? node##_gfx9_gfx10 : node##_vi;
unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
if (STI.getTargetTriple().getArch() == Triple::r600)
@@ -752,17 +773,17 @@ unsigned getMCReg(unsigned Reg, const MC
}
#undef CASE_CI_VI
-#undef CASE_VI_GFX9
+#undef CASE_VI_GFX9_GFX10
#define CASE_CI_VI(node) case node##_ci: case node##_vi: return node;
-#define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
+#define CASE_VI_GFX9_GFX10(node) case node##_vi: case node##_gfx9_gfx10: return node;
unsigned mc2PseudoReg(unsigned Reg) {
MAP_REG2REG
}
#undef CASE_CI_VI
-#undef CASE_VI_GFX9
+#undef CASE_VI_GFX9_GFX10
#undef MAP_REG2REG
bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
@@ -1030,5 +1051,6 @@ const SourceOfDivergence *lookupSourceOf
bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
return lookupSourceOfDivergence(IntrID);
}
+
} // namespace AMDGPU
} // namespace llvm
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Wed Apr 24 10:03:15 2019
@@ -244,7 +244,8 @@ int getMCOpcode(uint16_t Opcode, unsigne
void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
const MCSubtargetInfo *STI);
-amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor();
+amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
+ const MCSubtargetInfo *STI);
bool isGroupSegment(const GlobalValue *GV);
bool isGlobalSegment(const GlobalValue *GV);
@@ -398,6 +399,7 @@ bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
bool isVI(const MCSubtargetInfo &STI);
bool isGFX9(const MCSubtargetInfo &STI);
+bool isGFX10(const MCSubtargetInfo &STI);
/// Is Reg - scalar register
bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI);
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h Wed Apr 24 10:03:15 2019
@@ -82,6 +82,9 @@ COMPPGM1(priv,
COMPPGM1(enable_dx10_clamp, compute_pgm_rsrc1_dx10_clamp, DX10_CLAMP),
COMPPGM1(debug_mode, compute_pgm_rsrc1_debug_mode, DEBUG_MODE),
COMPPGM1(enable_ieee_mode, compute_pgm_rsrc1_ieee_mode, IEEE_MODE),
+COMPPGM1(enable_wgp_mode, compute_pgm_rsrc1_wgp_mode, WGP_MODE),
+COMPPGM1(enable_mem_ordered, compute_pgm_rsrc1_mem_ordered, MEM_ORDERED),
+COMPPGM1(enable_fwd_progress, compute_pgm_rsrc1_fwd_progress, FWD_PROGRESS),
// TODO: bulky
// TODO: cdbg_user
COMPPGM2(enable_sgpr_private_segment_wave_byte_offset, compute_pgm_rsrc2_scratch_en, SCRATCH_EN),
Modified: llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-mach.ll?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-mach.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/elf-header-flags-mach.ll Wed Apr 24 10:03:15 2019
@@ -47,6 +47,7 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx904 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX904 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx906 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX906 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj -file-headers - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; ARCH-R600: Arch: r600
; ARCH-GCN: Arch: amdgcn
@@ -87,6 +88,7 @@
; GFX904: EF_AMDGPU_MACH_AMDGCN_GFX904 (0x2E)
; GFX906: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
+; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; ALL: ]
define amdgpu_kernel void @elf_header() {
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-note-no-func.ll Wed Apr 24 10:03:15 2019
@@ -24,6 +24,7 @@
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx904 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX904 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx906 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX906 %s
; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx909 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX909 %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-code-object-v3 | FileCheck --check-prefix=HSA --check-prefix=HSA-GFX1010 %s
; HSA: .hsa_code_object_version 2,1
; HSA-SI600: .hsa_code_object_isa 6,0,0,"AMD","AMDGPU"
@@ -42,3 +43,4 @@
; HSA-GFX904: .hsa_code_object_isa 9,0,4,"AMD","AMDGPU"
; HSA-GFX906: .hsa_code_object_isa 9,0,6,"AMD","AMDGPU"
; HSA-GFX909: .hsa_code_object_isa 9,0,9,"AMD","AMDGPU"
+; HSA-GFX1010: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
Modified: llvm/trunk/tools/llvm-readobj/ELFDumper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-readobj/ELFDumper.cpp?rev=359113&r1=359112&r2=359113&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-readobj/ELFDumper.cpp (original)
+++ llvm/trunk/tools/llvm-readobj/ELFDumper.cpp Wed Apr 24 10:03:15 2019
@@ -1275,6 +1275,7 @@ static const EnumEntry<unsigned> ElfHead
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX904),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX906),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_XNACK),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_SRAM_ECC)
};
More information about the llvm-commits
mailing list