[llvm] AMDGPU: Minor updates to program resource registers (PR #69525)

Konstantin Zhuravlyov via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 18 14:39:33 PDT 2023


https://github.com/kzhuravl updated https://github.com/llvm/llvm-project/pull/69525

>From 428714fec6454aefae167d89c8f3d99e57ca0805 Mon Sep 17 00:00:00 2001
From: Konstantin Zhuravlyov <kzhuravl at amd.com>
Date: Wed, 18 Oct 2023 17:11:47 -0400
Subject: [PATCH 1/2] AMDGPU: Minor updates to program resource registers

  - Be explicit about which program resource register is supported by which target
    - RSRC1
      - FP16_OVFL is GFX9+
      - WGP_MODE is GFX10+
      - MEM_ORDERED is GFX10+
      - FWD_PROGRESS is GFX10+
    - RSRC3
      - INST_PREF_SIZE is GFX11+
      - TRAP_ON_START is GFX11+
      - TRAP_ON_END is GFX11+
      - IMAGE_OP is GFX11+
  - Do not emit GFX11+ fields when disassembling GFX10 code objects
  - Tighten enforcement of reserved bits in disassembler
---
 .../llvm/Support/AMDHSAKernelDescriptor.h     | 46 +++++++++++++-----
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |  8 ++--
 .../Disassembler/AMDGPUDisassembler.cpp       | 48 +++++++++++++------
 .../MCTargetDesc/AMDGPUTargetStreamer.cpp     |  8 ++--
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |  4 +-
 .../tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s  | 16 -------
 .../llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s |  4 --
 7 files changed, 79 insertions(+), 55 deletions(-)

diff --git a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
index 1bd65471d3b7c90..0574f96e6e15c4f 100644
--- a/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
+++ b/llvm/include/llvm/Support/AMDHSAKernelDescriptor.h
@@ -79,8 +79,21 @@ enum : uint8_t {
 };
 
 // Compute program resource register 1. Must match hardware definition.
+// GFX6+.
 #define COMPUTE_PGM_RSRC1(NAME, SHIFT, WIDTH) \
   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_ ## NAME, SHIFT, WIDTH)
+// [GFX6-GFX8].
+#define COMPUTE_PGM_RSRC1_GFX6_GFX8(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX8_ ## NAME, SHIFT, WIDTH)
+// [GFX6-GFX9].
+#define COMPUTE_PGM_RSRC1_GFX6_GFX9(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX6_GFX9_ ## NAME, SHIFT, WIDTH)
+// GFX9+.
+#define COMPUTE_PGM_RSRC1_GFX9_PLUS(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX9_PLUS_ ## NAME, SHIFT, WIDTH)
+// GFX10+.
+#define COMPUTE_PGM_RSRC1_GFX10_PLUS(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC1_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
 enum : int32_t {
   COMPUTE_PGM_RSRC1(GRANULATED_WORKITEM_VGPR_COUNT, 0, 6),
   COMPUTE_PGM_RSRC1(GRANULATED_WAVEFRONT_SGPR_COUNT, 6, 4),
@@ -95,11 +108,13 @@ enum : int32_t {
   COMPUTE_PGM_RSRC1(ENABLE_IEEE_MODE, 23, 1),
   COMPUTE_PGM_RSRC1(BULKY, 24, 1),
   COMPUTE_PGM_RSRC1(CDBG_USER, 25, 1),
-  COMPUTE_PGM_RSRC1(FP16_OVFL, 26, 1),    // GFX9+
-  COMPUTE_PGM_RSRC1(RESERVED0, 27, 2),
-  COMPUTE_PGM_RSRC1(WGP_MODE, 29, 1),     // GFX10+
-  COMPUTE_PGM_RSRC1(MEM_ORDERED, 30, 1),  // GFX10+
-  COMPUTE_PGM_RSRC1(FWD_PROGRESS, 31, 1), // GFX10+
+  COMPUTE_PGM_RSRC1_GFX6_GFX8(RESERVED0, 26, 1),
+  COMPUTE_PGM_RSRC1_GFX9_PLUS(FP16_OVFL, 26, 1),
+  COMPUTE_PGM_RSRC1(RESERVED1, 27, 2),
+  COMPUTE_PGM_RSRC1_GFX6_GFX9(RESERVED2, 29, 3),
+  COMPUTE_PGM_RSRC1_GFX10_PLUS(WGP_MODE, 29, 1),
+  COMPUTE_PGM_RSRC1_GFX10_PLUS(MEM_ORDERED, 30, 1),
+  COMPUTE_PGM_RSRC1_GFX10_PLUS(FWD_PROGRESS, 31, 1),
 };
 #undef COMPUTE_PGM_RSRC1
 
@@ -143,15 +158,24 @@ enum : int32_t {
 
 // Compute program resource register 3 for GFX10+. Must match hardware
 // definition.
+// [GFX10].
+#define COMPUTE_PGM_RSRC3_GFX10(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_ ## NAME, SHIFT, WIDTH)
+// GFX10+.
 #define COMPUTE_PGM_RSRC3_GFX10_PLUS(NAME, SHIFT, WIDTH) \
   AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX10_PLUS_ ## NAME, SHIFT, WIDTH)
+// GFX11+.
+#define COMPUTE_PGM_RSRC3_GFX11_PLUS(NAME, SHIFT, WIDTH) \
+  AMDHSA_BITS_ENUM_ENTRY(COMPUTE_PGM_RSRC3_GFX11_PLUS_ ## NAME, SHIFT, WIDTH)
 enum : int32_t {
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4), // GFX10+
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(INST_PREF_SIZE, 4, 6),    // GFX11+
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_START, 10, 1),    // GFX11+
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(TRAP_ON_END, 11, 1),      // GFX11+
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED0, 12, 19),
-  COMPUTE_PGM_RSRC3_GFX10_PLUS(IMAGE_OP, 31, 1),         // GFX11+
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(SHARED_VGPR_COUNT, 0, 4),
+  COMPUTE_PGM_RSRC3_GFX10(RESERVED0, 4, 8),
+  COMPUTE_PGM_RSRC3_GFX11_PLUS(INST_PREF_SIZE, 4, 6),
+  COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_START, 10, 1),
+  COMPUTE_PGM_RSRC3_GFX11_PLUS(TRAP_ON_END, 11, 1),
+  COMPUTE_PGM_RSRC3_GFX10_PLUS(RESERVED1, 12, 19),
+  COMPUTE_PGM_RSRC3_GFX10(RESERVED2, 31, 1),
+  COMPUTE_PGM_RSRC3_GFX11_PLUS(IMAGE_OP, 31, 1),
 };
 #undef COMPUTE_PGM_RSRC3_GFX10_PLUS
 
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index faeaa94f9733576..9e143c77b606c34 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5242,7 +5242,7 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
     } else if (ID == ".amdhsa_fp16_overflow") {
       if (IVersion.Major < 9)
         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL, Val,
                        ValRange);
     } else if (ID == ".amdhsa_tg_split") {
       if (!isGFX90A())
@@ -5252,17 +5252,17 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
     } else if (ID == ".amdhsa_workgroup_processor_mode") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Val,
                        ValRange);
     } else if (ID == ".amdhsa_memory_ordered") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Val,
                        ValRange);
     } else if (ID == ".amdhsa_forward_progress") {
       if (IVersion.Major < 10)
         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
-      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
+      PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Val,
                        ValRange);
     } else if (ID == ".amdhsa_shared_vgpr_count") {
       if (IVersion.Major < 10)
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index d74fd0b3a9ea74e..1b301ee5f49b216 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -1818,16 +1818,23 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC1(
   if (FourByteBuffer & COMPUTE_PGM_RSRC1_CDBG_USER)
     return MCDisassembler::Fail;
 
-  PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_FP16_OVFL);
+  if (isGFX9Plus())
+    PRINT_DIRECTIVE(".amdhsa_fp16_overflow", COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
 
-  if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED0)
+  if (!isGFX9Plus())
+    if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX8_RESERVED0)
+      return MCDisassembler::Fail;
+  if (FourByteBuffer & COMPUTE_PGM_RSRC1_RESERVED1)
     return MCDisassembler::Fail;
+  if (!isGFX10Plus())
+    if (FourByteBuffer & COMPUTE_PGM_RSRC1_GFX6_GFX9_RESERVED2)
+      return MCDisassembler::Fail;
 
   if (isGFX10Plus()) {
     PRINT_DIRECTIVE(".amdhsa_workgroup_processor_mode",
-                    COMPUTE_PGM_RSRC1_WGP_MODE);
-    PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_MEM_ORDERED);
-    PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+                    COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
+    PRINT_DIRECTIVE(".amdhsa_memory_ordered", COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
+    PRINT_DIRECTIVE(".amdhsa_forward_progress", COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
   }
   return MCDisassembler::Success;
 }
@@ -1908,16 +1915,29 @@ MCDisassembler::DecodeStatus AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
       PRINT_PSEUDO_DIRECTIVE_COMMENT(
           "SHARED_VGPR_COUNT", COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
     }
-    PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
-                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_INST_PREF_SIZE);
-    PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
-                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
-    PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
-                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_END);
-    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED0)
+
+    if (isGFX11Plus()) {
+      PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
+                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_INST_PREF_SIZE);
+      PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
+                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+      PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
+                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_END);
+    } else {
+      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED0)
+        return MCDisassembler::Fail;
+    }
+
+    if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_PLUS_RESERVED1)
       return MCDisassembler::Fail;
-    PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
-                                   COMPUTE_PGM_RSRC3_GFX10_PLUS_TRAP_ON_START);
+
+    if (isGFX11Plus()) {
+      PRINT_PSEUDO_DIRECTIVE_COMMENT("IMAGE_OP",
+                                     COMPUTE_PGM_RSRC3_GFX11_PLUS_TRAP_ON_START);
+    } else {
+      if (FourByteBuffer & COMPUTE_PGM_RSRC3_GFX10_RESERVED2)
+        return MCDisassembler::Fail;
+    }
   } else if (FourByteBuffer) {
     return MCDisassembler::Fail;
   }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 6b8c03c1620d26b..70350b83849aaae 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -453,7 +453,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
   if (IVersion.Major >= 9)
     PRINT_FIELD(OS, ".amdhsa_fp16_overflow", KD,
                 compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_FP16_OVFL);
+                amdhsa::COMPUTE_PGM_RSRC1_GFX9_PLUS_FP16_OVFL);
   if (AMDGPU::isGFX90A(STI))
     PRINT_FIELD(OS, ".amdhsa_tg_split", KD,
                 compute_pgm_rsrc3,
@@ -461,13 +461,13 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
   if (IVersion.Major >= 10) {
     PRINT_FIELD(OS, ".amdhsa_workgroup_processor_mode", KD,
                 compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE);
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE);
     PRINT_FIELD(OS, ".amdhsa_memory_ordered", KD,
                 compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED);
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED);
     PRINT_FIELD(OS, ".amdhsa_forward_progress", KD,
                 compute_pgm_rsrc1,
-                amdhsa::COMPUTE_PGM_RSRC1_FWD_PROGRESS);
+                amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS);
     PRINT_FIELD(OS, ".amdhsa_shared_vgpr_count", KD, compute_pgm_rsrc3,
                 amdhsa::COMPUTE_PGM_RSRC3_GFX10_PLUS_SHARED_VGPR_COUNT);
   }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d123b384a27d4cc..954f21b5ec49bda 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1163,10 +1163,10 @@ amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
                     amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
                     STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
+                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE,
                     STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
-                    amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
+                    amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, 1);
   }
   if (AMDGPU::isGFX90A(*STI)) {
     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
index 52b399e4f0c56e2..58b01031afe383e 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx10.s
@@ -12,10 +12,6 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
 ; CHECK-NEXT: .amdhsa_reserve_vcc 0
 ; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -69,10 +65,6 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
 ; CHECK-NEXT: .amdhsa_reserve_vcc 0
 ; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -126,10 +118,6 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
 ; CHECK-NEXT: .amdhsa_reserve_vcc 0
 ; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
@@ -183,10 +171,6 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
-; CHECK-NEXT: ; TRAP_ON_START 0
-; CHECK-NEXT: ; TRAP_ON_END 0
-; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
 ; CHECK-NEXT: .amdhsa_reserve_vcc 0
 ; CHECK-NEXT: .amdhsa_reserve_flat_scratch 0
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
index 04cf28f89e44826..39cef4da4278df2 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-zeroed-gfx10.s
@@ -26,10 +26,6 @@
 ; OBJDUMP-NEXT:         .amdhsa_private_segment_fixed_size 0
 ; OBJDUMP-NEXT:         .amdhsa_kernarg_size 0
 ; OBJDUMP-NEXT:         .amdhsa_shared_vgpr_count 0
-; OBJDUMP-NEXT:         ; INST_PREF_SIZE 0
-; OBJDUMP-NEXT:         ; TRAP_ON_START 0
-; OBJDUMP-NEXT:         ; TRAP_ON_END 0
-; OBJDUMP-NEXT:         ; IMAGE_OP 0
 ; OBJDUMP-NEXT:         .amdhsa_next_free_vgpr 8
 ; OBJDUMP-NEXT:         .amdhsa_reserve_vcc 0
 ; OBJDUMP-NEXT:         .amdhsa_reserve_flat_scratch 0

>From b1a20a30571d87873c6fc6997a467f4e1f7842da Mon Sep 17 00:00:00 2001
From: Konstantin Zhuravlyov <kzhuravl at amd.com>
Date: Wed, 18 Oct 2023 17:38:56 -0400
Subject: [PATCH 2/2] AMDGPU: Add kd-gfx11.s test

---
 .../tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s  | 220 ++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s

diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
new file mode 100644
index 000000000000000..8ec65bda15253ec
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
@@ -0,0 +1,220 @@
+;; Test disassembly for gfx10 kernel descriptor.
+
+; RUN: rm -rf %t && split-file %s %t && cd %t
+
+;--- 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1100 < 1.s > 1.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 1.o | tail -n +7 | tee 1-disasm.s | FileCheck 1.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize32,-wavefrontsize64 -filetype=obj -mcpu=gfx1100 < 1-disasm.s > 1-disasm.o
+; RUN: cmp 1.o 1-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 1
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+  .amdhsa_wavefront_size32 1
+.end_amdhsa_kernel
+
+;--- 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 2.s > 2.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 2.o | tail -n +7 | tee 2-disasm.s | FileCheck 2.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 2-disasm.s > 2-disasm.o
+; RUN: cmp 2.o 2-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+  .amdhsa_shared_vgpr_count 0
+.end_amdhsa_kernel
+
+;--- 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 3.s > 3.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 3.o | tail -n +7 | tee 3-disasm.s | FileCheck 3.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 3-disasm.s > 3-disasm.o
+; RUN: cmp 3.o 3-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+  .amdhsa_shared_vgpr_count 1
+.end_amdhsa_kernel
+
+;--- 4.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 4.s > 4.o
+; RUN: llvm-objdump --disassemble-symbols=kernel.kd 4.o | tail -n +7 | tee 4-disasm.s | FileCheck 4.s
+; RUN: llvm-mc --triple=amdgcn-amd-amdhsa -mattr=+wavefrontsize64,-wavefrontsize32 -filetype=obj -mcpu=gfx1100 < 4-disasm.s > 4-disasm.o
+; RUN: cmp 4.o 4-disasm.o
+; CHECK: .amdhsa_kernel kernel
+; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
+; CHECK-NEXT: .amdhsa_kernarg_size 0
+; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
+; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: ; TRAP_ON_START 0
+; CHECK-NEXT: ; TRAP_ON_END 0
+; CHECK-NEXT: ; IMAGE_OP 0
+; CHECK-NEXT: .amdhsa_next_free_vgpr 32
+; CHECK-NEXT: .amdhsa_reserve_vcc 0
+; CHECK-NEXT: .amdhsa_reserve_xnack_mask 0
+; CHECK-NEXT: .amdhsa_next_free_sgpr 8
+; CHECK-NEXT: .amdhsa_float_round_mode_32 0
+; CHECK-NEXT: .amdhsa_float_round_mode_16_64 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_32 0
+; CHECK-NEXT: .amdhsa_float_denorm_mode_16_64 3
+; CHECK-NEXT: .amdhsa_dx10_clamp 1
+; CHECK-NEXT: .amdhsa_ieee_mode 1
+; CHECK-NEXT: .amdhsa_fp16_overflow 0
+; CHECK-NEXT: .amdhsa_workgroup_processor_mode 1
+; CHECK-NEXT: .amdhsa_memory_ordered 1
+; CHECK-NEXT: .amdhsa_forward_progress 0
+; CHECK-NEXT: .amdhsa_enable_private_segment 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_x 1
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_y 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_id_z 0
+; CHECK-NEXT: .amdhsa_system_sgpr_workgroup_info 0
+; CHECK-NEXT: .amdhsa_system_vgpr_workitem_id 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_invalid_op 0
+; CHECK-NEXT: .amdhsa_exception_fp_denorm_src 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_div_zero 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_overflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_underflow 0
+; CHECK-NEXT: .amdhsa_exception_fp_ieee_inexact 0
+; CHECK-NEXT: .amdhsa_exception_int_div_zero 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_queue_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 0
+; CHECK-NEXT: .amdhsa_user_sgpr_dispatch_id 0
+; CHECK-NEXT: .amdhsa_user_sgpr_private_segment_size 0
+; CHECK-NEXT: .amdhsa_wavefront_size32 0
+; CHECK-NEXT: .end_amdhsa_kernel
+.amdhsa_kernel kernel
+  .amdhsa_next_free_vgpr 32
+  .amdhsa_next_free_sgpr 32
+  .amdhsa_shared_vgpr_count 1
+  .amdhsa_wavefront_size32 0
+.end_amdhsa_kernel



More information about the llvm-commits mailing list