[llvm] r363577 - [AMDGPU] gfx1010 wave32 metadata

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 17 09:48:56 PDT 2019


Author: rampitec
Date: Mon Jun 17 09:48:56 2019
New Revision: 363577

URL: http://llvm.org/viewvc/llvm-project?rev=363577&view=rev
Log:
[AMDGPU] gfx1010 wave32 metadata

Differential Revision: https://reviews.llvm.org/D63207

Added:
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
    llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s
    llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s
      - copied, changed from r363576, llvm/trunk/test/MC/AMDGPU/hsa.s
    llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s
Modified:
    llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h
    llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h
    llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
    llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
    llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
    llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
    llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s
    llvm/trunk/test/MC/AMDGPU/hsa.s
    llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s

Modified: llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h (original)
+++ llvm/trunk/include/llvm/Support/AMDHSAKernelDescriptor.h Mon Jun 17 09:48:56 2019
@@ -143,6 +143,7 @@ enum : int32_t {
   KERNEL_CODE_PROPERTY(ENABLE_SGPR_FLAT_SCRATCH_INIT, 5, 1),
   KERNEL_CODE_PROPERTY(ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 6, 1),
   KERNEL_CODE_PROPERTY(RESERVED0, 7, 3),
+  KERNEL_CODE_PROPERTY(ENABLE_WAVEFRONT_SIZE32, 10, 1), // GFX10+
   KERNEL_CODE_PROPERTY(RESERVED1, 11, 5),
 };
 #undef KERNEL_CODE_PROPERTY

Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp Mon Jun 17 09:48:56 2019
@@ -361,6 +361,10 @@ uint16_t AMDGPUAsmPrinter::getAmdhsaKern
     KernelCodeProperties |=
         amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT;
   }
+  if (MF.getSubtarget<GCNSubtarget>().isWave32()) {
+    KernelCodeProperties |=
+        amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+  }
 
   return KernelCodeProperties;
 }
@@ -1081,6 +1085,10 @@ void AMDGPUAsmPrinter::EmitPALMetadata(c
     MD->setSpiPsInputEna(MFI->getPSInputEnable());
     MD->setSpiPsInputAddr(MFI->getPSInputAddr());
   }
+
+  const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
+  if (STM.isWave32())
+    MD->setWave32(MF.getFunction().getCallingConv());
 }
 
 // This is supposed to be log2(Size)

Modified: llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDKernelCodeT.h Mon Jun 17 09:48:56 2019
@@ -126,8 +126,12 @@ enum amd_code_property_mask_t {
   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH = 1,
   AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z = ((1 << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z_SHIFT,
 
-  AMD_CODE_PROPERTY_RESERVED1_SHIFT = 10,
-  AMD_CODE_PROPERTY_RESERVED1_WIDTH = 6,
+  AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT = 10,
+  AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH = 1,
+  AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32 = ((1 << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_WIDTH) - 1) << AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT,
+
+  AMD_CODE_PROPERTY_RESERVED1_SHIFT = 11,
+  AMD_CODE_PROPERTY_RESERVED1_WIDTH = 5,
   AMD_CODE_PROPERTY_RESERVED1 = ((1 << AMD_CODE_PROPERTY_RESERVED1_WIDTH) - 1) << AMD_CODE_PROPERTY_RESERVED1_SHIFT,
 
   /// Control wave ID base counter for GDS ordered-append. Used to set

Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Mon Jun 17 09:48:56 2019
@@ -3433,6 +3433,14 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
                        Val, ValRange);
       UserSGPRCount += 1;
+    } else if (ID == ".amdhsa_wavefront_size32") {
+      if (IVersion.Major < 10)
+        return getParser().Error(IDRange.Start, "directive requires gfx10+",
+                                 IDRange);
+      EnableWavefrontSize32 = Val;
+      PARSE_BITS_ENTRY(KD.kernel_code_properties,
+                       KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+                       Val, ValRange);
     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
       PARSE_BITS_ENTRY(
           KD.compute_pgm_rsrc2,
@@ -3680,6 +3688,30 @@ bool AMDGPUAsmParser::ParseAMDKernelCode
   }
   Lex();
 
+  if (ID == "enable_wavefront_size32") {
+    if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
+      if (!isGFX10())
+        return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
+      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+        return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
+    } else {
+      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+        return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
+    }
+  }
+
+  if (ID == "wavefront_size") {
+    if (Header.wavefront_size == 5) {
+      if (!isGFX10())
+        return TokError("wavefront_size=5 is only allowed on GFX10+");
+      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
+        return TokError("wavefront_size=5 requires +WavefrontSize32");
+    } else if (Header.wavefront_size == 6) {
+      if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
+        return TokError("wavefront_size=6 requires +WavefrontSize64");
+    }
+  }
+
   if (ID == "enable_wgp_mode") {
     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10())
       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");

Modified: llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp Mon Jun 17 09:48:56 2019
@@ -284,6 +284,10 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsa
   PRINT_FIELD(OS, ".amdhsa_user_sgpr_private_segment_size", KD,
               kernel_code_properties,
               amdhsa::KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE);
+  if (IVersion.Major >= 10)
+    PRINT_FIELD(OS, ".amdhsa_wavefront_size32", KD,
+                kernel_code_properties,
+                amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32);
   PRINT_FIELD(
       OS, ".amdhsa_system_sgpr_private_segment_wavefront_offset", KD,
       compute_pgm_rsrc2,

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Mon Jun 17 09:48:56 2019
@@ -457,6 +457,10 @@ void initDefaultAMDKernelCodeT(amd_kerne
   Header.private_segment_alignment = 4;
 
   if (Version.Major >= 10) {
+    if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
+      Header.wavefront_size = 5;
+      Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
+    }
     Header.compute_pgm_resource_registers |=
       S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
       S_00B848_MEM_ORDERED(1);
@@ -480,6 +484,9 @@ amdhsa::kernel_descriptor_t getDefaultAm
   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
   if (Version.Major >= 10) {
+    AMDHSA_BITS_SET(KD.kernel_code_properties,
+                    amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
+                    STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
                     amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
                     STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp Mon Jun 17 09:48:56 2019
@@ -233,6 +233,29 @@ void AMDGPUPALMetadata::setScratchSize(C
   getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
 }
 
+// Set the hardware register bit in PAL metadata to enable wave32 on the
+// shader of the given calling convention.
+void AMDGPUPALMetadata::setWave32(unsigned CC) {
+  switch (CC) {
+  case CallingConv::AMDGPU_HS:
+    setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_HS_W32_EN(1));
+    break;
+  case CallingConv::AMDGPU_GS:
+    setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_GS_W32_EN(1));
+    break;
+  case CallingConv::AMDGPU_VS:
+    setRegister(PALMD::R_A2D5_VGT_SHADER_STAGES_EN, S_028B54_VS_W32_EN(1));
+    break;
+  case CallingConv::AMDGPU_PS:
+    setRegister(PALMD::R_A1B6_SPI_PS_IN_CONTROL, S_0286D8_PS_W32_EN(1));
+    break;
+  case CallingConv::AMDGPU_CS:
+    setRegister(PALMD::R_2E00_COMPUTE_DISPATCH_INITIATOR,
+                S_00B800_CS_W32_EN(1));
+    break;
+  }
+}
+
 // Convert a register number to name, for display by toString().
 // Returns nullptr if none.
 static const char *getRegisterName(unsigned RegNum) {

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h Mon Jun 17 09:48:56 2019
@@ -80,6 +80,10 @@ public:
   // Set the scratch size in the metadata.
   void setScratchSize(unsigned CC, unsigned Val);
 
+  // Set the hardware register bit in PAL metadata to enable wave32 on the
+  // shader of the given calling convention.
+  void setWave32(unsigned CC);
+
   // Emit the accumulated PAL metadata as asm directives.
   // This is called from AMDGPUTargetAsmStreamer::Finish().
   void toString(std::string &S);

Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDKernelCodeTInfo.h Mon Jun 17 09:48:56 2019
@@ -109,6 +109,7 @@ CODEPROP(enable_sgpr_private_segment_siz
 CODEPROP(enable_sgpr_grid_workgroup_count_x,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_X),
 CODEPROP(enable_sgpr_grid_workgroup_count_y,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_Y),
 CODEPROP(enable_sgpr_grid_workgroup_count_z,  ENABLE_SGPR_GRID_WORKGROUP_COUNT_Z),
+CODEPROP(enable_wavefront_size32,             ENABLE_WAVEFRONT_SIZE32),
 CODEPROP(enable_ordered_append_gds,           ENABLE_ORDERED_APPEND_GDS),
 CODEPROP(private_element_size,                PRIVATE_ELEMENT_SIZE),
 CODEPROP(is_ptr64,                            IS_PTR64),

Modified: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll Mon Jun 17 09:48:56 2019
@@ -1,6 +1,7 @@
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=NOTES %s
-; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readelf --notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX700 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX803 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; RUN: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX900 --check-prefix=WAVE64 --check-prefix=NOTES %s
+; run: llc -mattr=+code-object-v3 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -enable-misched=0 -filetype=obj -o - < %s | llvm-readobj -elf-output-style=GNU -notes | FileCheck --check-prefix=CHECK --check-prefix=GFX1010 --check-prefix=WAVE32 --check-prefix=NOTES %s
 
 @var = addrspace(1) global float 0.0
 
@@ -14,10 +15,12 @@
 ; CHECK:     .max_flat_workgroup_size: 256
 ; CHECK:     .name:           test
 ; CHECK:     .private_segment_fixed_size: 0
-; CHECK:     .sgpr_count:     8
+; WAVE64:    .sgpr_count:     8
+; WAVE32:    .sgpr_count:     10
 ; CHECK:     .symbol:         test.kd
 ; CHECK:     .vgpr_count:     6
-; CHECK:     .wavefront_size: 64
+; WAVE64:    .wavefront_size: 64
+; WAVE32:    .wavefront_size: 32
 define amdgpu_kernel void @test(
     half addrspace(1)* %r,
     half addrspace(1)* %a,
@@ -34,6 +37,7 @@ entry:
 ; GFX700:   .sgpr_spill_count: 40
 ; GFX803:   .sgpr_spill_count: 24
 ; GFX900:   .sgpr_spill_count: 24
+; GFX1010:  .sgpr_spill_count: 24
 ; CHECK:   .symbol:     num_spilled_sgprs.kd
 define amdgpu_kernel void @num_spilled_sgprs(
     i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, [8 x i32],

Added: llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll?rev=363577&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/hsa-metadata-wavefrontsize.ll Mon Jun 17 09:48:56 2019
@@ -0,0 +1,14 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-32 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefixes=GCN,GFX10-64 %s
+
+; GCN:      ---
+; GCN:      Kernels:
+; GCN:        - Name: wavefrontsize
+; GCN:          CodeProps:
+; GFX10-32:       WavefrontSize: 32
+; GFX10-64:       WavefrontSize: 64
+; GCN:      ...
+define amdgpu_kernel void @wavefrontsize() {
+entry:
+  ret void
+}

Modified: llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa-diag-v3.s Mon Jun 17 09:48:56 2019
@@ -1,4 +1,5 @@
 // RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s
+// RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=GFX10
 // RUN: not llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd- -mcpu=gfx803 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefix=NOT-AMDHSA
 
 .text
@@ -44,6 +45,46 @@
   // CHECK: error: expected .amdhsa_ directive or .end_amdhsa_kernel
 .end_amdhsa_kernel
 
+.amdhsa_kernel foo
+  .amdhsa_wavefront_size32 1
+  // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+  .amdhsa_workgroup_processor_mode 1
+  // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+  .amdhsa_memory_ordered 1
+  // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+  .amdhsa_forward_progress 1
+  // CHECK: error: directive requires gfx10+
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+  .amdhsa_wavefront_size32 5
+  // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+  .amdhsa_workgroup_processor_mode 5
+  // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+  .amdhsa_memory_ordered 5
+  // GFX10: error: value out of range
+.end_amdhsa_kernel
+
+.amdhsa_kernel foo
+  .amdhsa_forward_progress 5
+  // GFX10: error: value out of range
+.end_amdhsa_kernel
+
 .set .amdgcn.next_free_vgpr, "foo"
 v_mov_b32_e32 v0, s0
 // CHECK: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions

Added: llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s?rev=363577&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s (added)
+++ llvm/trunk/test/MC/AMDGPU/hsa-gfx10-v3.s Mon Jun 17 09:48:56 2019
@@ -0,0 +1,223 @@
+// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -mattr=+code-object-v3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -filetype=obj < %s > %t
+// RUN: llvm-readobj -elf-output-style=GNU -sections -symbols -relocations %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// big endian not supported
+// XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc
+
+// READOBJ: Section Headers
+// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
+// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        0000c0 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
+
+// READOBJ: Relocation section '.rela.rodata' at offset
+// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
+// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
+// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
+
+// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
+// READOBJ: {{[0-9]+}}: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
+// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
+// READOBJ: {{[0-9]+}}: 0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
+// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
+// READOBJ: {{[0-9]+}}: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
+// READOBJ: {{[0-9]+}}: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
+
+// OBJDUMP: Contents of section .rodata
+// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
+// minimal
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
+// complete
+// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
+// special_sgpr
+// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
+
+.p2align 8
+.type minimal, at function
+minimal:
+  s_endpgm
+
+.p2align 8
+.type complete, at function
+complete:
+  s_endpgm
+
+.p2align 8
+.type special_sgpr, at function
+special_sgpr:
+  s_endpgm
+
+.rodata
+// ASM: .rodata
+
+// Test that only specifying required directives is allowed, and that defaulted
+// values are omitted.
+.p2align 6
+.amdhsa_kernel minimal
+  .amdhsa_next_free_vgpr 0
+  .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
+// Test that we can specify all available directives with non-default values.
+.p2align 6
+.amdhsa_kernel complete
+  .amdhsa_group_segment_fixed_size 1
+  .amdhsa_private_segment_fixed_size 1
+  .amdhsa_user_sgpr_private_segment_buffer 1
+  .amdhsa_user_sgpr_dispatch_ptr 1
+  .amdhsa_user_sgpr_queue_ptr 1
+  .amdhsa_user_sgpr_kernarg_segment_ptr 1
+  .amdhsa_user_sgpr_dispatch_id 1
+  .amdhsa_user_sgpr_flat_scratch_init 1
+  .amdhsa_user_sgpr_private_segment_size 1
+  .amdhsa_wavefront_size32 1
+  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+  .amdhsa_system_sgpr_workgroup_id_x 0
+  .amdhsa_system_sgpr_workgroup_id_y 1
+  .amdhsa_system_sgpr_workgroup_id_z 1
+  .amdhsa_system_sgpr_workgroup_info 1
+  .amdhsa_system_vgpr_workitem_id 1
+  .amdhsa_next_free_vgpr 9
+  .amdhsa_next_free_sgpr 27
+  .amdhsa_reserve_vcc 0
+  .amdhsa_reserve_flat_scratch 0
+  .amdhsa_reserve_xnack_mask 0
+  .amdhsa_float_round_mode_32 1
+  .amdhsa_float_round_mode_16_64 1
+  .amdhsa_float_denorm_mode_32 1
+  .amdhsa_float_denorm_mode_16_64 0
+  .amdhsa_dx10_clamp 0
+  .amdhsa_ieee_mode 0
+  .amdhsa_fp16_overflow 1
+  .amdhsa_workgroup_processor_mode 1
+  .amdhsa_memory_ordered 1
+  .amdhsa_forward_progress 1
+  .amdhsa_exception_fp_ieee_invalid_op 1
+  .amdhsa_exception_fp_denorm_src 1
+  .amdhsa_exception_fp_ieee_div_zero 1
+  .amdhsa_exception_fp_ieee_overflow 1
+  .amdhsa_exception_fp_ieee_underflow 1
+  .amdhsa_exception_fp_ieee_inexact 1
+  .amdhsa_exception_int_div_zero 1
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel complete
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
+// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_wavefront_size32 1
+// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
+// ASM-NEXT: .amdhsa_next_free_vgpr 9
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM-NEXT: .amdhsa_float_round_mode_32 1
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
+// ASM-NEXT: .amdhsa_memory_ordered 1
+// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
+
+// Test that we are including special SGPR usage in the granulated count.
+.p2align 6
+.amdhsa_kernel special_sgpr
+  // Same next_free_sgpr as "complete", but...
+  .amdhsa_next_free_sgpr 27
+  // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
+  // 3 granules to 4
+  .amdhsa_reserve_flat_scratch 1
+
+  .amdhsa_reserve_vcc 0
+  .amdhsa_reserve_xnack_mask 0
+
+  .amdhsa_float_denorm_mode_16_64 0
+  .amdhsa_dx10_clamp 0
+  .amdhsa_ieee_mode 0
+  .amdhsa_next_free_vgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel special_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 0
+// ASM: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM: .end_amdhsa_kernel
+
+.section .foo
+
+.byte .amdgcn.gfx_generation_number
+// ASM: .byte 10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v7, s10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 8
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 11
+
+.set .amdgcn.next_free_vgpr, 0
+.set .amdgcn.next_free_sgpr, 0
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v16, s3
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 17
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 4

Copied: llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s (from r363576, llvm/trunk/test/MC/AMDGPU/hsa.s)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s?p2=llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s&p1=llvm/trunk/test/MC/AMDGPU/hsa.s&r1=363576&r2=363577&rev=363577&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa-gfx10.s Mon Jun 17 09:48:56 2019
@@ -1,5 +1,5 @@
-// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM
-// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=kaveri -mattr=-code-object-v3 -show-encoding %s | llvm-readobj --symbols -S --sd | FileCheck %s --check-prefix=ELF
+// RUN: llvm-mc -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | FileCheck %s --check-prefix=ASM
+// RUN: llvm-mc -filetype=obj -triple amdgcn--amdhsa -mcpu=gfx1010 -mattr=-WavefrontSize32,+WavefrontSize64,-code-object-v3 -show-encoding %s | llvm-readobj -symbols -s -sd | FileCheck %s --check-prefix=ELF
 
 // ELF: Section {
 // ELF: Name: .text
@@ -78,6 +78,9 @@ amd_kernel_code_t_test_all:
     compute_pgm_rsrc1_dx10_clamp = 1
     compute_pgm_rsrc1_debug_mode = 1
     compute_pgm_rsrc1_ieee_mode = 1
+    compute_pgm_rsrc1_wgp_mode = 0
+    compute_pgm_rsrc1_mem_ordered = 0
+    compute_pgm_rsrc1_fwd_progress = 1
     compute_pgm_rsrc2_scratch_en = 1
     compute_pgm_rsrc2_user_sgpr = 1
     compute_pgm_rsrc2_tgid_x_en = 1
@@ -120,7 +123,7 @@ amd_kernel_code_t_test_all:
     kernarg_segment_alignment = 5
     group_segment_alignment = 5
     private_segment_alignment = 5
-    wavefront_size = 5
+    wavefront_size = 6
     call_convention = 1
     runtime_loader_kernel_symbol = 1
 .end_amd_kernel_code_t
@@ -143,6 +146,9 @@ amd_kernel_code_t_test_all:
 // ASM: enable_dx10_clamp = 1
 // ASM: debug_mode = 1
 // ASM: enable_ieee_mode = 1
+// ASM: enable_wgp_mode = 0
+// ASM: enable_mem_ordered = 0
+// ASM: enable_fwd_progress = 1
 // ASM: enable_sgpr_private_segment_wave_byte_offset = 1
 // ASM: user_sgpr_count = 1
 // ASM: enable_sgpr_workgroup_id_x = 1
@@ -185,7 +191,7 @@ amd_kernel_code_t_test_all:
 // ASM: kernarg_segment_alignment = 5
 // ASM: group_segment_alignment = 5
 // ASM: private_segment_alignment = 5
-// ASM: wavefront_size = 5
+// ASM: wavefront_size = 6
 // ASM: call_convention = 1
 // ASM: runtime_loader_kernel_symbol = 1
 // ASM: .end_amd_kernel_code_t
@@ -213,8 +219,8 @@ amd_kernel_code_t_minimal:
 // ASM:	amd_code_version_major = 1
 // ASM:	amd_code_version_minor = 2
 // ASM:	amd_machine_kind = 1
-// ASM:	amd_machine_version_major = 7
-// ASM:	amd_machine_version_minor = 0
+// ASM:	amd_machine_version_major = 10
+// ASM:	amd_machine_version_minor = 1
 // ASM:	amd_machine_version_stepping = 0
 // ASM:	kernel_code_entry_byte_offset = 256
 // ASM:	kernel_code_prefetch_byte_size = 0
@@ -226,6 +232,9 @@ amd_kernel_code_t_minimal:
 // ASM: enable_dx10_clamp = 0
 // ASM: debug_mode = 0
 // ASM: enable_ieee_mode = 0
+// ASM: enable_wgp_mode = 1
+// ASM: enable_mem_ordered = 1
+// ASM: enable_fwd_progress = 0
 // ASM: enable_sgpr_private_segment_wave_byte_offset = 0
 // ASM: user_sgpr_count = 2
 // ASM: enable_sgpr_workgroup_id_x = 0
@@ -246,6 +255,7 @@ amd_kernel_code_t_minimal:
 // ASM:	enable_sgpr_grid_workgroup_count_x = 0
 // ASM:	enable_sgpr_grid_workgroup_count_y = 0
 // ASM:	enable_sgpr_grid_workgroup_count_z = 0
+// ASM:	enable_wavefront_size32 = 0
 // ASM:	enable_ordered_append_gds = 0
 // ASM:	private_element_size = 0
 // ASM:	is_ptr64 = 1

Added: llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s?rev=363577&view=auto
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s (added)
+++ llvm/trunk/test/MC/AMDGPU/hsa-wave-size.s Mon Jun 17 09:48:56 2019
@@ -0,0 +1,65 @@
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s | FileCheck --check-prefixes=GCN,GFX7 %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W32 %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s | FileCheck --check-prefixes=GCN,GFX10-W64 %s
+
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx700 -mattr=-code-object-v3 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX7-ERR %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,+WavefrontSize32,-WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W32-ERR %s
+// RUN: not llvm-mc -triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32,+WavefrontSize64 %s 2>&1 | FileCheck --check-prefixes=GCN-ERR,GFX10-W64-ERR %s
+
+// GCN: test0:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test0
+test0:
+.amd_kernel_code_t
+.end_amd_kernel_code_t
+
+// GCN: test1:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32-ERR: error: enable_wavefront_size32=0 requires +WavefrontSize64
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test1
+test1:
+.amd_kernel_code_t
+  enable_wavefront_size32 = 0
+.end_amd_kernel_code_t
+
+// GCN: test2:
+// GFX7: enable_wavefront_size32 = 0
+// GFX7: wavefront_size = 6
+// GFX10-W32-ERR: error: wavefront_size=6 requires +WavefrontSize64
+// GFX10-W64: enable_wavefront_size32 = 0
+// GFX10-W64: wavefront_size = 6
+.amdgpu_hsa_kernel test2
+test2:
+.amd_kernel_code_t
+  wavefront_size = 6
+.end_amd_kernel_code_t
+
+// GCN: test3:
+// GFX7-ERR: error: enable_wavefront_size32=1 is only allowed on GFX10+
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64-ERR: error: enable_wavefront_size32=1 requires +WavefrontSize32
+.amdgpu_hsa_kernel test3
+test3:
+.amd_kernel_code_t
+  enable_wavefront_size32 = 1
+.end_amd_kernel_code_t
+
+// GCN: test4:
+// GFX7-ERR: error: wavefront_size=5 is only allowed on GFX10+
+// GFX10-W32: enable_wavefront_size32 = 1
+// GFX10-W32: wavefront_size = 5
+// GFX10-W64-ERR: error: wavefront_size=5 requires +WavefrontSize32
+.amdgpu_hsa_kernel test4
+test4:
+.amd_kernel_code_t
+  wavefront_size = 5
+.end_amd_kernel_code_t

Modified: llvm/trunk/test/MC/AMDGPU/hsa.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa.s?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa.s Mon Jun 17 09:48:56 2019
@@ -120,7 +120,7 @@ amd_kernel_code_t_test_all:
     kernarg_segment_alignment = 5
     group_segment_alignment = 5
     private_segment_alignment = 5
-    wavefront_size = 5
+    wavefront_size = 6
     call_convention = 1
     runtime_loader_kernel_symbol = 1
 .end_amd_kernel_code_t
@@ -185,7 +185,7 @@ amd_kernel_code_t_test_all:
 // ASM: kernarg_segment_alignment = 5
 // ASM: group_segment_alignment = 5
 // ASM: private_segment_alignment = 5
-// ASM: wavefront_size = 5
+// ASM: wavefront_size = 6
 // ASM: call_convention = 1
 // ASM: runtime_loader_kernel_symbol = 1
 // ASM: .end_amd_kernel_code_t

Modified: llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s?rev=363577&r1=363576&r2=363577&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa_isa_version_attrs.s Mon Jun 17 09:48:56 2019
@@ -1,6 +1,8 @@
 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx801 -mattr=-code-object-v3,-fast-fmaf -show-encoding %s | FileCheck --check-prefix=GFX8 %s
 // RUN: llvm-mc -arch=amdgcn -mcpu=gfx900 -mattr=-code-object-v3,-mad-mix-insts -show-encoding %s | FileCheck --check-prefix=GFX9 %s
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -mattr=-code-object-v3,-WavefrontSize32 -show-encoding %s | FileCheck --check-prefix=GFX10 %s
 
 .hsa_code_object_isa
 // GFX8:  .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
 // GFX9:  .hsa_code_object_isa 9,0,0,"AMD","AMDGPU"
+// GFX10: .hsa_code_object_isa 10,1,0,"AMD","AMDGPU"




More information about the llvm-commits mailing list