[llvm] r363577 - [AMDGPU] gfx1010 wave32 metadata
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 09:48:56 PDT 2019
Author: rampitec
Date: Mon Jun 17 09:48:56 2019
New Revision: 363577
URL: http://llvm.org/viewvc/llvm-project?rev=363577&view=rev
Log:
[AMDGPU] gfx1010 wave32 metadata
Differential Revision: https://reviews.llvm.org/D63207
Added:
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s
llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s
- copied, changed from r363576, llvm/trunk/test/MC/AMDGPU/hsa.s
llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s
Modified:
llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h
llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s
llvm/trunk/test/MC/AMDGPU/hsa.s
llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
Modified: llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h (original)
+++ llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h Mon Jun 17 09:48:56 2019
@@ -143,6 +143,7 @@ enum : int32_t {
KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
+ KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
};
#undef KERNEL_CODE_PROPERTY
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Mon Jun 17 09:48:56 2019
@@ -361,6 +361,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKern
KernelCodeProperties |=
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
}
+ if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
+ KernelCodeProperties |=
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ }
return KernelCodeProperties;
}
@@ -1081,6 +1085,10 @@ void AMDGPUAsmPrinter::EmitPALMetadata(c
MD->setSpiPsInputEna(MFI->getPSInputEnable());
MD->setSpiPsInputAddr(MFI->getPSInputAddr());
}
+
+ const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+ if (STM.isWave32())
+ MD->setWave32(MF.getFunction().getCallingConv());
}
// This is supposed to be log2(Size)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h Mon Jun 17 09:48:56 2019
@@ -126,8 +126,12 @@ enum amd_code_property_mask_t {
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
- AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
- AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT = 10,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH = 1,
+ AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 = ((1 << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+
+ AMD_CODE_PROPERTY_RESERVED1_SHIFT = 11,
+ AMD_CODE_PROPERTY_RESERVED1_WIDTH = 5,
AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
/// Control wave ID base counter for GDS ordered-append. Used to set
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Mon Jun 17 09:48:56 2019
@@ -3433,6 +3433,14 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
UserSGPRCount += 1;
+ } else if (ID == ".amdhsa_wavefront_size32") {
+ if (IVersion.Major < 10)
+ return getParser().Error(IDRange.Start, "directive requires gfx10+",
+ IDRange);
+ EnableWavefrontSize32 = Val;
+ PARSE_BITS_ENTRY(KD.kernel_code_properties,
+ KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ Val, ValRange);
} else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
@@ -3680,6 +3688,30 @@ bool AMDGPUAsmParser::ParseAMDKernelCode
}
Lex();
+ if (ID == "enable_wavefront_size32") {
+ if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+ if (!isGFX10())
+ return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
+ } else {
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
+ }
+ }
+
+ if (ID == "wavefront_size") {
+ if (Header.wavefront_size == 5) {
+ if (!isGFX10())
+ return TokError("wavefront_size=5 is only allowed on GFX10+");
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+ return TokError("wavefront_size=5 requires +WavefrontSize32");
+ } else if (Header.wavefront_size == 6) {
+ if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+ return TokError("wavefront_size=6 requires +WavefrontSize64");
+ }
+ }
+
if (ID == "enable_wgp_mode") {
if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Mon Jun 17 09:48:56 2019
@@ -284,6 +284,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsa
PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
kernel_code_properties,
amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+ if (IVersion.Major >= 10)
+ PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
+ kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
PRINT_FIELD(
OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
compute_pgm_rsrc2,
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Mon Jun 17 09:48:56 2019
@@ -457,6 +457,10 @@ void initDefaultAMDKernelCodeT(amd_kerne
Header.private_segment_alignment = 4;
if (Version.Major >= 10) {
+ if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
+ Header.wavefront_size = 5;
+ Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+ }
Header.compute_pgm_resource_registers |=
S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
S_00B848_MEM_ORDERED(1);
@@ -480,6 +484,9 @@ amdhsa::kernel_descriptor_t getDefaultAm
AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
if (Version.Major >= 10) {
+ AMDHSA_BITS_SET(KD.kernel_code_properties,
+ amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+ STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp Mon Jun 17 09:48:56 2019
@@ -233,6 +233,29 @@ void AMDGPUPALMetadata::setScratchSize(C
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
+// Set the hardware register bit in PAL metadata to enable wave32 on the
+// shader of the given calling convention.
+void AMDGPUPALMetadata::setWave32(unsigned CC) {
+ switch (CC) {
+ case CallingConv::AMDGPU_HS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_GS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_GS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_VS:
+ setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_VS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_PS:
+ setRegister(PALMD::R_A1B6_SPI_PS_IN_CONTROL, S_0286D8_PS_W32_EN(1));
+ break;
+ case CallingConv::AMDGPU_CS:
+ setRegister(PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR,
+ S_00B800_CS_W32_EN(1));
+ break;
+ }
+}
+
// Convert a register number to name, for display by toString().
// Returns nullptr if none.
static const char *getRegisterName(unsigned RegNum) {
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h Mon Jun 17 09:48:56 2019
@@ -80,6 +80,10 @@ public:
// Set the scratch size in the metadata.
void setScratchSize(unsigned CC, unsigned Val);
+ // Set the hardware register bit in PAL metadata to enable wave32 on the
+ // shader of the given calling convention.
+ void setWave32(unsigned CC);
+
// Emit the accumulated PAL metadata as asm directives.
// This is called from AMDGPUTargetAsmStreamer::Finish().
void toString(std::string &S);
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h Mon Jun 17 09:48:56 2019
@@ -109,6 +109,7 @@ CODEPROP(enable_sgpr_private_segment_siz
CODEPROP(enable_sgpr_grid_workgroup_count_x, ENABLE_SGPR_GRID_WORKGROUP_COUNT_X),
CODEPROP(enable_sgpr_grid_workgroup_count_y, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y),
CODEPROP(enable_sgpr_grid_workgroup_count_z, ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z),
+CODEPROP(enable_wavefront_size32, ENABLE_WAVEFRONT_SIZE32),
CODEPROP(enable_ordered_append_gds, ENABLE_ORDERED_APPEND_GDS),
CODEPROP(private_element_size, PRIVATE_ELEMENT_SIZE),
CODEPROP(is_ptr64, IS_PTR64),
Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll Mon Jun 17 09:48:56 2019
@@ -1,6 +1,7 @@
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; run: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX1010 --check-prefix=WAVE32 --check-prefix=NOTES %s
@var = addrspace(1) global float 0.0
@@ -14,10 +15,12 @@
; CHECK: .max_flat_workgroup_size: 256
; CHECK: .name: test
; CHECK: .private_segment_fixed_size: 0
-; CHECK: .sgpr_count: 8
+; WAVE64: .sgpr_count: 8
+; WAVE32: .sgpr_count: 10
; CHECK: .symbol: test.kd
; CHECK: .vgpr_count: 6
-; CHECK: .wavefront_size: 64
+; WAVE64: .wavefront_size: 64
+; WAVE32: .wavefront_size: 32
define amdgpu_kernel void @test(
half addrspace(1)* %r,
half addrspace(1)* %a,
@@ -34,6 +37,7 @@ entry:
; GFX700: .sgpr_spill_count: 40
; GFX803: .sgpr_spill_count: 24
; GFX900: .sgpr_spill_count: 24
+; GFX1010: .sgpr_spill_count: 24
; CHECK: .symbol: num_spilled_sgprs.kd
define amdgpu_kernel void @num_spilled_sgprs(
i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32],
Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll?rev=363577&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll Mon Jun 17 09:48:56 2019
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
+
+; GCN: ---
+; GCN: Kernels:
+; GCN: - Name: wavefrontsize
+; GCN: CodeProps:
+; GFX10-32: WavefrontSize: 32
+; GFX10-64: WavefrontSize: 64
+; GCN: ...
+define amdgpu_kernel void @wavefrontsize() {
+entry:
+ ret void
+}
Modified: llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s Mon Jun 17 09:48:56 2019
@@ -1,4 +1,5 @@
// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s
+// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=GFX10
// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=NOT-AMDHSA
.text
@@ -44,6 +45,46 @@
// CHECK: error: expected .amdhsa_ directive or .end_amdhsa_kernel
.end_amdhsa_kernel
+.amdhsa_kernel foo
+ .amdhsa_wavefront_size32 1
+ // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_workgroup_processor_mode 1
+ // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_memory_ordered 1
+ // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_forward_progress 1
+ // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_wavefront_size32 5
+ // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_workgroup_processor_mode 5
+ // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_memory_ordered 5
+ // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+ .amdhsa_forward_progress 5
+ // GFX10: error: value out of range
+.end_amdhsa_kernel
+
.set .amdgcn.next_free_vgpr, "foo"
v_mov_b32_e32 v0, s0
// CHECK: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
Added: llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s?rev=363577&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s (added)
+++ llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s Mon Jun 17 09:48:56 2019
@@ -0,0 +1,223 @@
+// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t
+// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// big endian not supported
+// XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc
+
+// READOBJ: Section Headers
+// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
+// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
+
+// READOBJ: Relocation section '.rela.rodata' at offset
+// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
+// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
+// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
+
+// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
+// READOBJ: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
+// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
+// READOBJ: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
+// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
+// READOBJ: {{[0-9]+}}: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
+// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
+
+// OBJDUMP: Contents of section .rodata
+// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
+// minimal
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
+// complete
+// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
+// special_sgpr
+// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
+
+.p2align 8
+.type minimal, at function
+minimal:
+ s_endpgm
+
+.p2align 8
+.type complete, at function
+complete:
+ s_endpgm
+
+.p2align 8
+.type special_sgpr, at function
+special_sgpr:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+// Test that only specifying required directives is allowed, and that defaulted
+// values are omitted.
+.p2align 6
+.amdhsa_kernel minimal
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
+// Test that we can specify all available directives with non-default values.
+.p2align 6
+.amdhsa_kernel complete
+ .amdhsa_group_segment_fixed_size 1
+ .amdhsa_private_segment_fixed_size 1
+ .amdhsa_user_sgpr_private_segment_buffer 1
+ .amdhsa_user_sgpr_dispatch_ptr 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_user_sgpr_dispatch_id 1
+ .amdhsa_user_sgpr_flat_scratch_init 1
+ .amdhsa_user_sgpr_private_segment_size 1
+ .amdhsa_wavefront_size32 1
+ .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+ .amdhsa_system_sgpr_workgroup_id_x 0
+ .amdhsa_system_sgpr_workgroup_id_y 1
+ .amdhsa_system_sgpr_workgroup_id_z 1
+ .amdhsa_system_sgpr_workgroup_info 1
+ .amdhsa_system_vgpr_workitem_id 1
+ .amdhsa_next_free_vgpr 9
+ .amdhsa_next_free_sgpr 27
+ .amdhsa_reserve_vcc 0
+ .amdhsa_reserve_flat_scratch 0
+ .amdhsa_reserve_xnack_mask 0
+ .amdhsa_float_round_mode_32 1
+ .amdhsa_float_round_mode_16_64 1
+ .amdhsa_float_denorm_mode_32 1
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_dx10_clamp 0
+ .amdhsa_ieee_mode 0
+ .amdhsa_fp16_overflow 1
+ .amdhsa_workgroup_processor_mode 1
+ .amdhsa_memory_ordered 1
+ .amdhsa_forward_progress 1
+ .amdhsa_exception_fp_ieee_invalid_op 1
+ .amdhsa_exception_fp_denorm_src 1
+ .amdhsa_exception_fp_ieee_div_zero 1
+ .amdhsa_exception_fp_ieee_overflow 1
+ .amdhsa_exception_fp_ieee_underflow 1
+ .amdhsa_exception_fp_ieee_inexact 1
+ .amdhsa_exception_int_div_zero 1
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel complete
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
+// ASM-NEXT: .amdhsa_next_free_vgpr 9
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 1
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
+
+// Test that we are including special SGPR usage in the granulated count.
+.p2align 6
+.amdhsa_kernel special_sgpr
+ // Same next_free_sgpr as "complete", but...
+ .amdhsa_next_free_sgpr 27
+ // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
+ // 3 granules to 4
+ .amdhsa_reserve_flat_scratch 1
+
+ .amdhsa_reserve_vcc 0
+ .amdhsa_reserve_xnack_mask 0
+
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_dx10_clamp 0
+ .amdhsa_ieee_mode 0
+ .amdhsa_next_free_vgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel special_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM: .end_amdhsa_kernel
+
+.section .foo
+
+.byte .amdgcn.gfx_generation_number
+// ASM: .byte 10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v7, s10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 8
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 11
+
+.set .amdgcn.next_free_vgpr, 0
+.set .amdgcn.next_free_sgpr, 0
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v16, s3
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 17
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 4
Copied: llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s (from r363576, llvm/trunk/test/MC/AMDGPU/hsa.s)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s?p2=llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s&p1=llvm/trunk/test/MC/AMDGPU/hsa.s&r1=363576&r2=363577&rev=363577&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s Mon Jun 17 09:48:56 2019
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM
-// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3 -show-encoding %s | llvm-readobj --symbols -S --sd | FileCheck %s --check-prefix=ELF
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
// ELF: Section {
// ELF: Name: .text
@@ -78,6 +78,9 @@ amd_kernel_code_t_test_all:
compute_pgm_rsrc1_dx10_clamp = 1
compute_pgm_rsrc1_debug_mode = 1
compute_pgm_rsrc1_ieee_mode = 1
+ compute_pgm_rsrc1_wgp_mode = 0
+ compute_pgm_rsrc1_mem_ordered = 0
+ compute_pgm_rsrc1_fwd_progress = 1
compute_pgm_rsrc2_scratch_en = 1
compute_pgm_rsrc2_user_sgpr = 1
compute_pgm_rsrc2_tgid_x_en = 1
@@ -120,7 +123,7 @@ amd_kernel_code_t_test_all:
kernarg_segment_alignment = 5
group_segment_alignment = 5
private_segment_alignment = 5
- wavefront_size = 5
+ wavefront_size = 6
call_convention = 1
runtime_loader_kernel_symbol = 1
.end_amd_kernel_code_t
@@ -143,6 +146,9 @@ amd_kernel_code_t_test_all:
// ASM: enable_dx10_clamp = 1
// ASM: debug_mode = 1
// ASM: enable_ieee_mode = 1
+// ASM: enable_wgp_mode = 0
+// ASM: enable_mem_ordered = 0
+// ASM: enable_fwd_progress = 1
// ASM: enable_sgpr_private_segment_wave_byte_offset = 1
// ASM: user_sgpr_count = 1
// ASM: enable_sgpr_workgroup_id_x = 1
@@ -185,7 +191,7 @@ amd_kernel_code_t_test_all:
// ASM: kernarg_segment_alignment = 5
// ASM: group_segment_alignment = 5
// ASM: private_segment_alignment = 5
-// ASM: wavefront_size = 5
+// ASM: wavefront_size = 6
// ASM: call_convention = 1
// ASM: runtime_loader_kernel_symbol = 1
// ASM: .end_amd_kernel_code_t
@@ -213,8 +219,8 @@ amd_kernel_code_t_minimal:
// ASM: amd_code_version_major = 1
// ASM: amd_code_version_minor = 2
// ASM: amd_machine_kind = 1
-// ASM: amd_machine_version_major = 7
-// ASM: amd_machine_version_minor = 0
+// ASM: amd_machine_version_major = 10
+// ASM: amd_machine_version_minor = 1
// ASM: amd_machine_version_stepping = 0
// ASM: kernel_code_entry_byte_offset = 256
// ASM: kernel_code_prefetch_byte_size = 0
@@ -226,6 +232,9 @@ amd_kernel_code_t_minimal:
// ASM: enable_dx10_clamp = 0
// ASM: debug_mode = 0
// ASM: enable_ieee_mode = 0
+// ASM: enable_wgp_mode = 1
+// ASM: enable_mem_ordered = 1
+// ASM: enable_fwd_progress = 0
// ASM: enable_sgpr_private_segment_wave_byte_offset = 0
// ASM: user_sgpr_count = 2
// ASM: enable_sgpr_workgroup_id_x = 0
@@ -246,6 +255,7 @@ amd_kernel_code_t_minimal:
// ASM: enable_sgpr_grid_workgroup_count_x = 0
// ASM: enable_sgpr_grid_workgroup_count_y = 0
// ASM: enable_sgpr_grid_workgroup_count_z = 0
+// ASM: enable_wavefront_size32 = 0
// ASM: enable_ordered_append_gds = 0
// ASM: private_element_size = 0
// ASM: is_ptr64 = 1
Added: llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s?rev=363577&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s (added)
+++ llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s Mon Jun 17 09:48:56 2019
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s | FileCheck --check-prefixes=GCN,GFX7 %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s
+
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX7-ERR %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W32-ERR %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W64-ERR %s
+
+// GCN: test0:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test0
+test0:
+.amd_kernel_code_t
+.end_amd_kernel_code_t
+
+// GCN: test1:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32-ERR: error: enable_wavefront_size32=0 requires +WavefrontSize64
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test1
+test1:
+.amd_kernel_code_t
+ enable_wavefront_size32 = 0
+.end_amd_kernel_code_t
+
+// GCN: test2:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32-ERR: error: wavefront_size=6 requires +WavefrontSize64
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test2
+test2:
+.amd_kernel_code_t
+ wavefront_size = 6
+.end_amd_kernel_code_t
+
+// GCN: test3:
+// GFX7-ERR: error: enable_wavefront_size32=1 is only allowed on GFX10+
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64-ERR: error: enable_wavefront_size32=1 requires +WavefrontSize32
+.amdgpu_hsa_kernel test3
+test3:
+.amd_kernel_code_t
+ enable_wavefront_size32 = 1
+.end_amd_kernel_code_t
+
+// GCN: test4:
+// GFX7-ERR: error: wavefront_size=5 is only allowed on GFX10+
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64-ERR: error: wavefront_size=5 requires +WavefrontSize32
+.amdgpu_hsa_kernel test4
+test4:
+.amd_kernel_code_t
+ wavefront_size = 5
+.end_amd_kernel_code_t
Modified: llvm/trunk/test/MC/AMDGPU/hsa.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa.s?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa.s Mon Jun 17 09:48:56 2019
@@ -120,7 +120,7 @@ amd_kernel_code_t_test_all:
kernarg_segment_alignment = 5
group_segment_alignment = 5
private_segment_alignment = 5
- wavefront_size = 5
+ wavefront_size = 6
call_convention = 1
runtime_loader_kernel_symbol = 1
.end_amd_kernel_code_t
@@ -185,7 +185,7 @@ amd_kernel_code_t_test_all:
// ASM: kernarg_segment_alignment = 5
// ASM: group_segment_alignment = 5
// ASM: private_segment_alignment = 5
-// ASM: wavefront_size = 5
+// ASM: wavefront_size = 6
// ASM: call_convention = 1
// ASM: runtime_loader_kernel_symbol = 1
// ASM: .end_amd_kernel_code_t
Modified: llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s Mon Jun 17 09:48:56 2019
@@ -1,6 +1,8 @@
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-code-object-v3,-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s
// RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-code-object-v3,-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
.hsa_code_object_isa
// GFX8: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
// GFX9: .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
+// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"
More information about the llvm-commits
mailing list