[llvm] r315822 - AMDGPU: Add AMDGPU HSA Kernel Descriptor

Sat Oct 14 12:17:08 PDT 2017

Author: kzhuravl
Date: Sat Oct 14 12:17:08 2017
New Revision: 315822

URL: http://llvm.org/viewvc/llvm-project?rev=315822&view=rev
Log:
AMDGPU: Add AMDGPU HSA Kernel Descriptor

  - Update docs to match llvm coding style
  - Add missing FP16_OVFL bit for gfx9
  - Fix the size of the kernel descriptor in the docs

Differential Revision: https://reviews.llvm.org/D38902

Added:
    llvm/trunk/include/llvm/Support/AMDGPUKernelDescriptor.h
Modified:
    llvm/trunk/docs/AMDGPUUsage.rst

Modified: llvm/trunk/docs/AMDGPUUsage.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/AMDGPUUsage.rst?rev=315822&r1=315821&r2=315822&view=diff
==============================================================================

--- llvm/trunk/docs/AMDGPUUsage.rst (original)
+++ llvm/trunk/docs/AMDGPUUsage.rst Sat Oct 14 12:17:08 2017
@@ -1427,7 +1427,7 @@ CP microcode requires the Kernel descrit
      ======= ======= =============================== ===========================
      Bits    Size    Field Name                      Description
      ======= ======= =============================== ===========================
-     31:0    4 bytes group_segment_fixed_size        The amount of fixed local
+     31:0    4 bytes GroupSegmentFixedSize           The amount of fixed local
                                                      address space memory
                                                      required for a work-group
                                                      in bytes. This does not
@@ -1436,7 +1436,7 @@ CP microcode requires the Kernel descrit
                                                      space memory that may be
                                                      added when the kernel is
                                                      dispatched.
-     63:32   4 bytes private_segment_fixed_size      The amount of fixed
+     63:32   4 bytes PrivateSegmentFixedSize         The amount of fixed
                                                      private address space
                                                      memory required for a
                                                      work-item in bytes. If
@@ -1444,18 +1444,18 @@ CP microcode requires the Kernel descrit
                                                      then additional space must
                                                      be added to this value for
                                                      the call stack.
-     95:64   4 bytes max_flat_workgroup_size         Maximum flat work-group
+     95:64   4 bytes MaxFlatWorkgroupSize            Maximum flat work-group
                                                      size supported by the
                                                      kernel in work-items.
-     96      1 bit   is_dynamic_call_stack           Indicates if the generated
+     96      1 bit   IsDynamicCallStack              Indicates if the generated
                                                      machine code is using a
                                                      dynamically sized call
                                                      stack.
-     97      1 bit   is_xnack_enabled                Indicates if the generated
+     97      1 bit   IsXNACKEnabled                  Indicates if the generated
                                                      machine code is capable of
                                                      suppoting XNACK.
      127:98  30 bits                                 Reserved. Must be 0.
-     191:128 8 bytes kernel_code_entry_byte_offset   Byte offset (possibly
+     191:128 8 bytes KernelCodeEntryByteOffset       Byte offset (possibly
                                                      negative) from base
                                                      address of kernel
                                                      descriptor to kernel's
@@ -1464,22 +1464,22 @@ CP microcode requires the Kernel descrit
                                                      aligned.
      383:192 24                                      Reserved. Must be 0.
              bytes
-     415:384 4 bytes compute_pgm_rsrc1               Compute Shader (CS)
+     415:384 4 bytes ComputePgmRsrc1                 Compute Shader (CS)
                                                      program settings used by
                                                      CP to set up
                                                      ``COMPUTE_PGM_RSRC1``
                                                      configuration
                                                      register. See
                                                      :ref:`amdgpu-amdhsa-compute_pgm_rsrc1_t-gfx6-gfx9-table`.
-     447:416 4 bytes compute_pgm_rsrc2               Compute Shader (CS)
+     447:416 4 bytes ComputePgmRsrc2                 Compute Shader (CS)
                                                      program settings used by
                                                      CP to set up
                                                      ``COMPUTE_PGM_RSRC2``
                                                      configuration
                                                      register. See
                                                      :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx9-table`.
-     448     1 bit   enable_sgpr_private_segment     Enable the setup of the
-                     _buffer                         SGPR user data registers
+     448     1 bit   EnableSGPRPrivateSegmentBuffer  Enable the setup of the
+                                                     SGPR user data registers
                                                      (see
                                                      :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
 
@@ -1490,21 +1490,20 @@ CP microcode requires the Kernel descrit
                                                      ``compute_pgm_rsrc2.user_sgpr.user_sgpr_count``.
                                                      Any requests beyond 16
                                                      will be ignored.
-     449     1 bit   enable_sgpr_dispatch_ptr        *see above*
-     450     1 bit   enable_sgpr_queue_ptr           *see above*
-     451     1 bit   enable_sgpr_kernarg_segment_ptr *see above*
-     452     1 bit   enable_sgpr_dispatch_id         *see above*
-     453     1 bit   enable_sgpr_flat_scratch_init   *see above*
-     454     1 bit   enable_sgpr_private_segment     *see above*
-                     _size
-     455     1 bit   enable_sgpr_grid_workgroup      Not implemented in CP and
-                     _count_X                        should always be 0.
-     456     1 bit   enable_sgpr_grid_workgroup      Not implemented in CP and
-                     _count_Y                        should always be 0.
-     457     1 bit   enable_sgpr_grid_workgroup      Not implemented in CP and
-                     _count_Z                        should always be 0.
+     449     1 bit   EnableSGPRDispatchPtr           *see above*
+     450     1 bit   EnableSGPRQueuePtr              *see above*
+     451     1 bit   EnableSGPRKernargSegmentPtr     *see above*
+     452     1 bit   EnableSGPRDispatchID            *see above*
+     453     1 bit   EnableSGPRFlatScratchInit       *see above*
+     454     1 bit   EnableSGPRPrivateSegmentSize    *see above*
+     455     1 bit   EnableSGPRGridWorkgroupCountX   Not implemented in CP and
+                                                     should always be 0.
+     456     1 bit   EnableSGPRGridWorkgroupCountY   Not implemented in CP and
+                                                     should always be 0.
+     457     1 bit   EnableSGPRGridWorkgroupCountZ   Not implemented in CP and
+                                                     should always be 0.
      463:458 6 bits                                  Reserved. Must be 0.
-     511:464 4                                       Reserved. Must be 0.
+     511:464 6                                       Reserved. Must be 0.
              bytes
      512     **Total size 64 bytes.**
      ======= ===================================================================
@@ -1517,7 +1516,7 @@ CP microcode requires the Kernel descrit
      ======= ======= =============================== ===========================================================================
      Bits    Size    Field Name                      Description
      ======= ======= =============================== ===========================================================================
-     5:0     6 bits  granulated_workitem_vgpr_count  Number of vector registers
+     5:0     6 bits  GRANULATED_WORKITEM_VGPR_COUNT  Number of vector registers
                                                      used by each work-item,
                                                      granularity is device
                                                      specific:
@@ -1528,7 +1527,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC1.VGPRS``.
-     9:6     4 bits  granulated_wavefront_sgpr_count Number of scalar registers
+     9:6     4 bits  GRANULATED_WAVEFRONT_SGPR_COUNT Number of scalar registers
                                                      used by a wavefront,
                                                      granularity is device
                                                      specific:
@@ -1550,7 +1549,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC1.SGPRS``.
-     11:10   2 bits  priority                        Must be 0.
+     11:10   2 bits  PRIORITY                        Must be 0.
 
                                                      Start executing wavefront
                                                      at the specified priority.
@@ -1558,7 +1557,7 @@ CP microcode requires the Kernel descrit
                                                      CP is responsible for
                                                      filling in
                                                      ``COMPUTE_PGM_RSRC1.PRIORITY``.
-     13:12   2 bits  float_mode_round_32             Wavefront starts execution
+     13:12   2 bits  FLOAT_ROUND_MODE_32             Wavefront starts execution
                                                      with specified rounding
                                                      mode for single (32
                                                      bit) floating point
@@ -1571,7 +1570,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
-     15:14   2 bits  float_mode_round_16_64          Wavefront starts execution
+     15:14   2 bits  FLOAT_ROUND_MODE_16_64          Wavefront starts execution
                                                      with specified rounding
                                                      denorm mode for half/double (16
                                                      and 64 bit) floating point
@@ -1584,7 +1583,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
-     17:16   2 bits  float_mode_denorm_32            Wavefront starts execution
+     17:16   2 bits  FLOAT_DENORM_MODE_32            Wavefront starts execution
                                                      with specified denorm mode
                                                      for single (32
                                                      bit)  floating point
@@ -1597,7 +1596,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
-     19:18   2 bits  float_mode_denorm_16_64         Wavefront starts execution
+     19:18   2 bits  FLOAT_DENORM_MODE_16_64         Wavefront starts execution
                                                      with specified denorm mode
                                                      for half/double (16
                                                      and 64 bit) floating point
@@ -1610,7 +1609,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC1.FLOAT_MODE``.
-     20      1 bit   priv                            Must be 0.
+     20      1 bit   PRIV                            Must be 0.
 
                                                      Start executing wavefront
                                                      in privilege trap handler
@@ -1619,7 +1618,7 @@ CP microcode requires the Kernel descrit
                                                      CP is responsible for
                                                      filling in
                                                      ``COMPUTE_PGM_RSRC1.PRIV``.
-     21      1 bit   enable_dx10_clamp               Wavefront starts execution
+     21      1 bit   ENABLE_DX10_CLAMP               Wavefront starts execution
                                                      with DX10 clamp mode
                                                      enabled. Used by the vector
                                                      ALU to force DX-10 style
@@ -1630,7 +1629,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC1.DX10_CLAMP``.
-     22      1 bit   debug_mode                      Must be 0.
+     22      1 bit   DEBUG_MODE                      Must be 0.
 
                                                      Start executing wavefront
                                                      in single step mode.
@@ -1638,7 +1637,7 @@ CP microcode requires the Kernel descrit
                                                      CP is responsible for
                                                      filling in
                                                      ``COMPUTE_PGM_RSRC1.DEBUG_MODE``.
-     23      1 bit   enable_ieee_mode                Wavefront starts execution
+     23      1 bit   ENABLE_IEEE_MODE                Wavefront starts execution
                                                      with IEEE mode
                                                      enabled. Floating point
                                                      opcodes that support
@@ -1653,7 +1652,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC1.IEEE_MODE``.
-     24      1 bit   bulky                           Must be 0.
+     24      1 bit   BULKY                           Must be 0.
 
                                                      Only one work-group allowed
                                                      to execute on a compute
@@ -1662,7 +1661,7 @@ CP microcode requires the Kernel descrit
                                                      CP is responsible for
                                                      filling in
                                                      ``COMPUTE_PGM_RSRC1.BULKY``.
-     25      1 bit   cdbg_user                       Must be 0.
+     25      1 bit   CDBG_USER                       Must be 0.
 
                                                      Flag that can be used to
                                                      control debugging code.
@@ -1670,7 +1669,29 @@ CP microcode requires the Kernel descrit
                                                      CP is responsible for
                                                      filling in
                                                      ``COMPUTE_PGM_RSRC1.CDBG_USER``.
-     31:26   6 bits                                  Reserved. Must be 0.
+     26      1 bit   FP16_OVFL                       GFX6-8:
+                                                       Reserved. Must be 0.
+                                                     GFX9:
+                                                       Wavefront starts
+                                                       execution with specified
+                                                       fp16 overflow mode.
+
+                                                       - If 0, then fp16
+                                                         overflow generates
+                                                         +/-INF values.
+                                                       - If 1, then fp16
+                                                         overflow that is the
+                                                         result of an +/-INF
+                                                         input value or divide
+                                                         by 0 generates a
+                                                         +/-INF, otherwise
+                                                         clamps computed
+                                                         overflow to +/-MAX_FP16
+                                                         as appropriate.
+
+                                                       Used by CP to set up
+                                                       ``COMPUTE_PGM_RSRC1.FP16_OVFL``.
+     31:27   5 bits                                  Reserved. Must be 0.
      32      **Total size 4 bytes**
      ======= ===================================================================================================================
 
@@ -1682,14 +1703,14 @@ CP microcode requires the Kernel descrit
      ======= ======= =============================== ===========================================================================
      Bits    Size    Field Name                      Description
      ======= ======= =============================== ===========================================================================
-     0       1 bit   enable_sgpr_private_segment     Enable the setup of the
-                     _wave_offset                    SGPR wave scratch offset
+     0       1 bit   ENABLE_SGPR_PRIVATE_SEGMENT     Enable the setup of the
+                     _WAVE_OFFSET                    SGPR wave scratch offset
                                                      system register (see
                                                      :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.SCRATCH_EN``.
-     5:1     5 bits  user_sgpr_count                 The total number of SGPR
+     5:1     5 bits  USER_SGPR_COUNT                 The total number of SGPR
                                                      user data registers
                                                      requested. This number must
                                                      match the number of user
@@ -1697,7 +1718,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.USER_SGPR``.
-     6       1 bit   enable_trap_handler             Set to 1 if code contains a
+     6       1 bit   ENABLE_TRAP_HANDLER             Set to 1 if code contains a
                                                      TRAP instruction which
                                                      requires a trap handler to
                                                      be enabled.
@@ -1708,7 +1729,7 @@ CP microcode requires the Kernel descrit
                                                      installed a trap handler
                                                      regardless of the setting
                                                      of this field.
-     7       1 bit   enable_sgpr_workgroup_id_x      Enable the setup of the
+     7       1 bit   ENABLE_SGPR_WORKGROUP_ID_X      Enable the setup of the
                                                      system SGPR register for
                                                      the work-group id in the X
                                                      dimension (see
@@ -1716,7 +1737,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.TGID_X_EN``.
-     8       1 bit   enable_sgpr_workgroup_id_y      Enable the setup of the
+     8       1 bit   ENABLE_SGPR_WORKGROUP_ID_Y      Enable the setup of the
                                                      system SGPR register for
                                                      the work-group id in the Y
                                                      dimension (see
@@ -1724,7 +1745,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.TGID_Y_EN``.
-     9       1 bit   enable_sgpr_workgroup_id_z      Enable the setup of the
+     9       1 bit   ENABLE_SGPR_WORKGROUP_ID_Z      Enable the setup of the
                                                      system SGPR register for
                                                      the work-group id in the Z
                                                      dimension (see
@@ -1732,14 +1753,14 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.TGID_Z_EN``.
-     10      1 bit   enable_sgpr_workgroup_info      Enable the setup of the
+     10      1 bit   ENABLE_SGPR_WORKGROUP_INFO      Enable the setup of the
                                                      system SGPR register for
                                                      work-group information (see
                                                      :ref:`amdgpu-amdhsa-initial-kernel-execution-state`).
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.TGID_SIZE_EN``.
-     12:11   2 bits  enable_vgpr_workitem_id         Enable the setup of the
+     12:11   2 bits  ENABLE_VGPR_WORKITEM_ID         Enable the setup of the
                                                      VGPR system registers used
                                                      for the work-item ID.
                                                      :ref:`amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table`
@@ -1747,7 +1768,7 @@ CP microcode requires the Kernel descrit
 
                                                      Used by CP to set up
                                                      ``COMPUTE_PGM_RSRC2.TIDIG_CMP_CNT``.
-     13      1 bit   enable_exception_address_watch  Must be 0.
+     13      1 bit   ENABLE_EXCEPTION_ADDRESS_WATCH  Must be 0.
 
                                                      Wavefront starts execution
                                                      with address watch
@@ -1763,7 +1784,7 @@ CP microcode requires the Kernel descrit
                                                      ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB``
                                                      according to what the
                                                      runtime requests.
-     14      1 bit   enable_exception_memory         Must be 0.
+     14      1 bit   ENABLE_EXCEPTION_MEMORY         Must be 0.
 
                                                      Wavefront starts execution
                                                      with memory violation
@@ -1782,7 +1803,7 @@ CP microcode requires the Kernel descrit
                                                      ``COMPUTE_PGM_RSRC2.EXCP_EN_MSB``
                                                      according to what the
                                                      runtime requests.
-     23:15   9 bits  granulated_lds_size             Must be 0.
+     23:15   9 bits  GRANULATED_LDS_SIZE             Must be 0.
 
                                                      CP uses the rounded value
                                                      from the dispatch packet,
@@ -1803,8 +1824,8 @@ CP microcode requires the Kernel descrit
                                                      GFX7-GFX9:
                                                        roundup(lds-size / (128 * 4))
 
-     24      1 bit   enable_exception_ieee_754_fp    Wavefront starts execution
-                     _invalid_operation              with specified exceptions
+     24      1 bit   ENABLE_EXCEPTION_IEEE_754_FP    Wavefront starts execution
+                     _INVALID_OPERATION              with specified exceptions
                                                      enabled.
 
                                                      Used by CP to set up
@@ -1813,19 +1834,19 @@ CP microcode requires the Kernel descrit
 
                                                      IEEE 754 FP Invalid
                                                      Operation
-     25      1 bit   enable_exception_fp_denormal    FP Denormal one or more
-                     _source                         input operands is a
+     25      1 bit   ENABLE_EXCEPTION_FP_DENORMAL    FP Denormal one or more
+                     _SOURCE                         input operands is a
                                                      denormal number
-     26      1 bit   enable_exception_ieee_754_fp    IEEE 754 FP Division by
-                     _division_by_zero               Zero
-     27      1 bit   enable_exception_ieee_754_fp    IEEE 754 FP FP Overflow
-                     _overflow
-     28      1 bit   enable_exception_ieee_754_fp    IEEE 754 FP Underflow
-                     _underflow
-     29      1 bit   enable_exception_ieee_754_fp    IEEE 754 FP Inexact
-                     _inexact
-     30      1 bit   enable_exception_int_divide_by  Integer Division by Zero
-                     _zero                           (rcp_iflag_f32 instruction
+     26      1 bit   ENABLE_EXCEPTION_IEEE_754_FP    IEEE 754 FP Division by
+                     _DIVISION_BY_ZERO               Zero
+     27      1 bit   ENABLE_EXCEPTION_IEEE_754_FP    IEEE 754 FP FP Overflow
+                     _OVERFLOW
+     28      1 bit   ENABLE_EXCEPTION_IEEE_754_FP    IEEE 754 FP Underflow
+                     _UNDERFLOW
+     29      1 bit   ENABLE_EXCEPTION_IEEE_754_FP    IEEE 754 FP Inexact
+                     _INEXACT
+     30      1 bit   ENABLE_EXCEPTION_INT_DIVIDE_BY  Integer Division by Zero
+                     _ZERO                           (rcp_iflag_f32 instruction
                                                      only)
      31      1 bit                                   Reserved. Must be 0.
      32      **Total size 4 bytes.**
@@ -1836,45 +1857,46 @@ CP microcode requires the Kernel descrit
   .. table:: Floating Point Rounding Mode Enumeration Values
      :name: amdgpu-amdhsa-floating-point-rounding-mode-enumeration-values-table
 
-     ===================================== ===== ===============================
-     Enumeration Name                      Value Description
-     ===================================== ===== ===============================
-     AMD_FLOAT_ROUND_MODE_NEAR_EVEN        0     Round Ties To Even
-     AMD_FLOAT_ROUND_MODE_PLUS_INFINITY    1     Round Toward +infinity
-     AMD_FLOAT_ROUND_MODE_MINUS_INFINITY   2     Round Toward -infinity
-     AMD_FLOAT_ROUND_MODE_ZERO             3     Round Toward 0
-     ===================================== ===== ===============================
+     ====================================== ===== ==============================
+     Enumeration Name                       Value Description
+     ====================================== ===== ==============================
+     AMDGPU_FLOAT_ROUND_MODE_NEAR_EVEN      0     Round Ties To Even
+     AMDGPU_FLOAT_ROUND_MODE_PLUS_INFINITY  1     Round Toward +infinity
+     AMDGPU_FLOAT_ROUND_MODE_MINUS_INFINITY 2     Round Toward -infinity
+     AMDGPU_FLOAT_ROUND_MODE_ZERO           3     Round Toward 0
+     ====================================== ===== ==============================
 
 ..
 
   .. table:: Floating Point Denorm Mode Enumeration Values
      :name: amdgpu-amdhsa-floating-point-denorm-mode-enumeration-values-table
 
-     ===================================== ===== ===============================
-     Enumeration Name                      Value Description
-     ===================================== ===== ===============================
-     AMD_FLOAT_DENORM_MODE_FLUSH_SRC_DST   0     Flush Source and Destination
-                                                 Denorms
-     AMD_FLOAT_DENORM_MODE_FLUSH_DST       1     Flush Output Denorms
-     AMD_FLOAT_DENORM_MODE_FLUSH_SRC       2     Flush Source Denorms
-     AMD_FLOAT_DENORM_MODE_FLUSH_NONE      3     No Flush
-     ===================================== ===== ===============================
+     ====================================== ===== ==============================
+     Enumeration Name                       Value Description
+     ====================================== ===== ==============================
+     AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC_DST 0     Flush Source and Destination
+                                                  Denorms
+     AMDGPU_FLOAT_DENORM_MODE_FLUSH_DST     1     Flush Output Denorms
+     AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC     2     Flush Source Denorms
+     AMDGPU_FLOAT_DENORM_MODE_FLUSH_NONE    3     No Flush
+     ====================================== ===== ==============================
 
 ..
 
   .. table:: System VGPR Work-Item ID Enumeration Values
      :name: amdgpu-amdhsa-system-vgpr-work-item-id-enumeration-values-table
 
-     ===================================== ===== ===============================
-     Enumeration Name                      Value Description
-     ===================================== ===== ===============================
-     AMD_SYSTEM_VGPR_WORKITEM_ID_X         0     Set work-item X dimension ID.
-     AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y       1     Set work-item X and Y
-                                                 dimensions ID.
-     AMD_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z     2     Set work-item X, Y and Z
-                                                 dimensions ID.
-     AMD_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3     Undefined.
-     ===================================== ===== ===============================
+     ======================================== ===== ============================
+     Enumeration Name                         Value Description
+     ======================================== ===== ============================
+     AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X         0     Set work-item X dimension
+                                                    ID.
+     AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y       1     Set work-item X and Y
+                                                    dimensions ID.
+     AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z     2     Set work-item X, Y and Z
+                                                    dimensions ID.
+     AMDGPU_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED 3     Undefined.
+     ======================================== ===== ============================
 
 .. _amdgpu-amdhsa-initial-kernel-execution-state:
 

Added: llvm/trunk/include/llvm/Support/AMDGPUKernelDescriptor.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/AMDGPUKernelDescriptor.h?rev=315822&view=auto
==============================================================================
--- llvm/trunk/include/llvm/Support/AMDGPUKernelDescriptor.h (added)
+++ llvm/trunk/include/llvm/Support/AMDGPUKernelDescriptor.h Sat Oct 14 12:17:08 2017
@@ -0,0 +1,139 @@
+//===--- AMDGPUKernelDescriptor.h -------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief AMDGPU kernel descriptor definitions. For more information, visit
+/// https://llvm.org/docs/AMDGPUUsage.html#kernel-descriptor-for-gfx6-gfx9
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H
+#define LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H
+
+#include <cstdint>
+
+// Creates enumeration entries used for packing bits into integers. Enumeration
+// entries include bit shift amount, bit width, and bit mask.
+#define AMDGPU_BITS_ENUM_ENTRY(name, shift, width) \
+  name ## _SHIFT = (shift),                        \
+  name ## _WIDTH = (width),                        \
+  name = (((1 << (width)) - 1) << (shift))         \
+
+// Gets bits for specified bit mask from specified source.
+#define AMDGPU_BITS_GET(src, mask) \
+  ((src & mask) >> mask ## _SHIFT) \
+
+// Sets bits for specified bit mask in specified destination.
+#define AMDGPU_BITS_SET(dst, mask, val)     \
+  dst &= (~(1 << mask ## _SHIFT) & ~mask);  \
+  dst |= (((val) << mask ## _SHIFT) & mask) \
+
+namespace llvm {
+namespace AMDGPU {
+namespace HSAKD {
+
+/// \brief Floating point rounding modes.
+enum : uint8_t {
+  AMDGPU_FLOAT_ROUND_MODE_NEAR_EVEN      = 0,
+  AMDGPU_FLOAT_ROUND_MODE_PLUS_INFINITY  = 1,
+  AMDGPU_FLOAT_ROUND_MODE_MINUS_INFINITY = 2,
+  AMDGPU_FLOAT_ROUND_MODE_ZERO           = 3,
+};
+
+/// \brief Floating point denorm modes.
+enum : uint8_t {
+  AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC_DST = 0,
+  AMDGPU_FLOAT_DENORM_MODE_FLUSH_DST     = 1,
+  AMDGPU_FLOAT_DENORM_MODE_FLUSH_SRC     = 2,
+  AMDGPU_FLOAT_DENORM_MODE_FLUSH_NONE    = 3,
+};
+
+/// \brief System VGPR workitem IDs.
+enum : uint8_t {
+  AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X         = 0,
+  AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y       = 1,
+  AMDGPU_SYSTEM_VGPR_WORKITEM_ID_X_Y_Z     = 2,
+  AMDGPU_SYSTEM_VGPR_WORKITEM_ID_UNDEFINED = 3,
+};
+
+/// \brief Compute program resource register one layout.
+enum ComputePgmRsrc1 {
+  AMDGPU_BITS_ENUM_ENTRY(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
+  AMDGPU_BITS_ENUM_ENTRY(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
+  AMDGPU_BITS_ENUM_ENTRY(PRIORITY, 10, 2),
+  AMDGPU_BITS_ENUM_ENTRY(FLOAT_ROUND_MODE_32, 12, 2),
+  AMDGPU_BITS_ENUM_ENTRY(FLOAT_ROUND_MODE_16_64, 14, 2),
+  AMDGPU_BITS_ENUM_ENTRY(FLOAT_DENORM_MODE_32, 16, 2),
+  AMDGPU_BITS_ENUM_ENTRY(FLOAT_DENORM_MODE_16_64, 18, 2),
+  AMDGPU_BITS_ENUM_ENTRY(PRIV, 20, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_DX10_CLAMP, 21, 1),
+  AMDGPU_BITS_ENUM_ENTRY(DEBUG_MODE, 22, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_IEEE_MODE, 23, 1),
+  AMDGPU_BITS_ENUM_ENTRY(BULKY, 24, 1),
+  AMDGPU_BITS_ENUM_ENTRY(CDBG_USER, 25, 1),
+  AMDGPU_BITS_ENUM_ENTRY(FP16_OVFL, 26, 1),
+  AMDGPU_BITS_ENUM_ENTRY(RESERVED0, 27, 5),
+};
+
+/// \brief Compute program resource register two layout.
+enum ComputePgmRsrc2 {
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_PRIVATE_SEGMENT_WAVE_OFFSET, 0, 1),
+  AMDGPU_BITS_ENUM_ENTRY(USER_SGPR_COUNT, 1, 5),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_TRAP_HANDLER, 6, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_X, 7, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_Y, 8, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_ID_Z, 9, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_SGPR_WORKGROUP_INFO, 10, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_VGPR_WORKITEM_ID, 11, 2),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_ADDRESS_WATCH, 13, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_MEMORY, 14, 1),
+  AMDGPU_BITS_ENUM_ENTRY(GRANULATED_LDS_SIZE, 15, 9),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, 24, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 25, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, 26, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 27, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 28, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 29, 1),
+  AMDGPU_BITS_ENUM_ENTRY(ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 30, 1),
+  AMDGPU_BITS_ENUM_ENTRY(RESERVED1, 31, 1),
+};
+
+/// \brief Kernel descriptor layout. This layout should be kept backwards
+/// compatible as it is consumed by the command processor.
+struct KernelDescriptor final {
+  uint32_t GroupSegmentFixedSize;
+  uint32_t PrivateSegmentFixedSize;
+  uint32_t MaxFlatWorkgroupSize;
+  uint64_t IsDynamicCallStack : 1;
+  uint64_t IsXNACKEnabled : 1;
+  uint64_t Reserved0 : 30;
+  int64_t KernelCodeEntryByteOffset;
+  uint64_t Reserved1[3];
+  uint32_t ComputePgmRsrc1;
+  uint32_t ComputePgmRsrc2;
+  uint64_t EnableSGPRPrivateSegmentBuffer : 1;
+  uint64_t EnableSGPRDispatchPtr : 1;
+  uint64_t EnableSGPRQueuePtr : 1;
+  uint64_t EnableSGPRKernargSegmentPtr : 1;
+  uint64_t EnableSGPRDispatchID : 1;
+  uint64_t EnableSGPRFlatScratchInit : 1;
+  uint64_t EnableSGPRPrivateSegmentSize : 1;
+  uint64_t EnableSGPRGridWorkgroupCountX : 1;
+  uint64_t EnableSGPRGridWorkgroupCountY : 1;
+  uint64_t EnableSGPRGridWorkgroupCountZ : 1;
+  uint64_t Reserved2 : 54;
+
+  KernelDescriptor() = default;
+};
+
+} // end namespace HSAKD
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_SUPPORT_AMDGPUKERNELDESCRIPTOR_H