[llvm] [AMDGPU] Create new directive .amdhsa_inst_pref_size (PR #126622)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 10 15:14:07 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Stanislav Mekhanoshin (rampitec)

<details>
<summary>Changes</summary>

The field INST_PREF_SIZE is available since gfx11.

---
Full diff: https://github.com/llvm/llvm-project/pull/126622.diff


10 Files Affected:

- (modified) llvm/docs/AMDGPUUsage.rst (+3) 
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+12) 
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+4-4) 
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+10) 
- (modified) llvm/test/MC/AMDGPU/hsa-diag-v4.s (+10) 
- (modified) llvm/test/MC/AMDGPU/hsa-gfx12-v4.s (+3-1) 
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s (+2) 
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s (+6-2) 
- (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s (+7-4) 
- (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s (+3-2) 


``````````diff
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 84980d0c31d4f98..899b2cf3b490179 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -18213,6 +18213,9 @@ terminated by an ``.end_amdhsa_kernel`` directive.
                                                                                                :ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
      ``.amdhsa_shared_vgpr_count``                            0                   GFX10-GFX11  Controls SHARED_VGPR_COUNT in
                                                                                                :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table`.
+     ``.amdhsa_inst_pref_size``                               0                   GFX11-GFX12  Controls INST_PREF_SIZE in
+                                                                                               :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table` or
+                                                                                               :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx12-table`
      ``.amdhsa_exception_fp_ieee_invalid_op``                 0                   GFX6-GFX12   Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in
                                                                                                :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx12-table`.
      ``.amdhsa_exception_fp_denorm_src``                      0                   GFX6-GFX12   Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 4b6d02fff4aeccc..4ff9cff09f31d22 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5876,6 +5876,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
                        COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
                        ValRange);
+    } else if (ID == ".amdhsa_inst_pref_size") {
+      if (IVersion.Major < 11)
+        return Error(IDRange.Start, "directive requires gfx11+", IDRange);
+      if (IVersion.Major == 11) {
+        PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
+                         COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
+                         ValRange);
+      } else {
+        PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
+                         COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
+                         ValRange);
+      }
     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
       PARSE_BITS_ENTRY(
           KD.compute_pgm_rsrc2,
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 58cdbe6cf373ede..02ad08740049d37 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -2233,15 +2233,15 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
 
     // Bits [4-11].
     if (isGFX11()) {
-      PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
-                                     COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
+      PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
+                      COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
       PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
                                      COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
       PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
                                      COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
     } else if (isGFX12Plus()) {
-      PRINT_PSEUDO_DIRECTIVE_COMMENT(
-          "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
+      PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
+                      COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
     } else {
       CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
                                    "COMPUTE_PGM_RSRC3",
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index eccd77d6c00f0b7..059bab583852667 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -579,7 +579,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
                amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
                ".amdhsa_shared_vgpr_count");
   }
+  if (IVersion.Major == 11) {
+    PrintField(KD.compute_pgm_rsrc3,
+               amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
+               amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
+               ".amdhsa_inst_pref_size");
+  }
   if (IVersion.Major >= 12) {
+    PrintField(KD.compute_pgm_rsrc3,
+               amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
+               amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
+               ".amdhsa_inst_pref_size");
     PrintField(KD.compute_pgm_rsrc1,
                amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
                amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v4.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s
index 3733b162edcfbd2..9ab177cf2b125a6 100644
--- a/llvm/test/MC/AMDGPU/hsa-diag-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s
@@ -280,6 +280,16 @@
   .amdhsa_shared_vgpr_count 15
 .end_amdhsa_kernel
 
+// GCN-LABEL: warning: test_amdhsa_inst_pref_size_invalid
+// PREGFX10: error: directive requires gfx11+
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_inst_pref_size_invalid"
+.amdhsa_kernel test_amdhsa_inst_pref_size_invalid
+  .amdhsa_next_free_vgpr 273
+  .amdhsa_next_free_sgpr 0
+  .amdhsa_inst_pref_size 15
+.end_amdhsa_kernel
+
 // GCN-LABEL: warning: test_next_free_vgpr_invalid
 // AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
 // NONAMDHSA-NOT: error:
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
index ea649bc76116adf..e90a97600822994 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
@@ -33,7 +33,7 @@
 // complete
 // OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 f00f0000
 // OBJDUMP-NEXT: 0070 015021e4 1f0f007f 5e040000 00000000
 // special_sgpr
 // OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
@@ -120,6 +120,7 @@ disabled_user_sgpr:
   .amdhsa_workgroup_processor_mode 1
   .amdhsa_memory_ordered 1
   .amdhsa_forward_progress 1
+  .amdhsa_inst_pref_size 255
   .amdhsa_round_robin_scheduling 1
   .amdhsa_exception_fp_ieee_invalid_op 1
   .amdhsa_exception_fp_denorm_src 1
@@ -158,6 +159,7 @@ disabled_user_sgpr:
 // ASM-NEXT: .amdhsa_workgroup_processor_mode 1
 // ASM-NEXT: .amdhsa_memory_ordered 1
 // ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_inst_pref_size 255
 // ASM-NEXT: .amdhsa_round_robin_scheduling 1
 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
index 85a7ad05b00f484..68cf28f2ac49d00 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
@@ -133,6 +133,7 @@ expr_defined:
 // ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30
 // ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31
 // ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_inst_pref_size 0
 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24
 // ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25
 // ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&67108864)>>26
@@ -180,6 +181,7 @@ expr_defined:
 // ASM-NEXT: .amdhsa_memory_ordered 1
 // ASM-NEXT: .amdhsa_forward_progress 1
 // ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_inst_pref_size 0
 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
 // ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
index 51d0fb30b320c52..6f7a9a26056811a 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
@@ -9,12 +9,12 @@
 // expr_defined_later
 // OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
 // OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 f0020000
 // OBJDUMP-NEXT: 0030 05f02fe4 811f007f 000c0000 00000000
 // expr_defined
 // OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
 // OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 f0020000
 // OBJDUMP-NEXT: 0070 05f02fe4 811f007f 000c0000 00000000
 
 .text
@@ -53,6 +53,7 @@ expr_defined:
   .amdhsa_workgroup_processor_mode defined_boolean
   .amdhsa_memory_ordered defined_boolean
   .amdhsa_forward_progress defined_boolean
+  .amdhsa_inst_pref_size defined_value+6
   .amdhsa_exception_fp_ieee_invalid_op defined_boolean
   .amdhsa_exception_fp_denorm_src defined_boolean
   .amdhsa_exception_fp_ieee_div_zero defined_boolean
@@ -89,6 +90,7 @@ expr_defined:
   .amdhsa_workgroup_processor_mode defined_boolean
   .amdhsa_memory_ordered defined_boolean
   .amdhsa_forward_progress defined_boolean
+  .amdhsa_inst_pref_size defined_value+6
   .amdhsa_exception_fp_ieee_invalid_op defined_boolean
   .amdhsa_exception_fp_denorm_src defined_boolean
   .amdhsa_exception_fp_ieee_div_zero defined_boolean
@@ -132,6 +134,7 @@ expr_defined:
 // ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29
 // ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30
 // ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31
+// ASM-NEXT: .amdhsa_inst_pref_size (((defined_value+6)<<4)&4080)>>4
 // ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21
 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24
 // ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25
@@ -177,6 +180,7 @@ expr_defined:
 // ASM-NEXT: .amdhsa_workgroup_processor_mode 1
 // ASM-NEXT: .amdhsa_memory_ordered 1
 // ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_inst_pref_size 47
 // ASM-NEXT: .amdhsa_round_robin_scheduling 1
 // ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
 // ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
index 750809128189f1f..3cd7a0503e30177 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
@@ -13,7 +13,7 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 0
 ; CHECK-NEXT: ; TRAP_ON_START 0
 ; CHECK-NEXT: ; TRAP_ON_END 0
 ; CHECK-NEXT: ; IMAGE_OP 0
@@ -70,7 +70,7 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 0
 ; CHECK-NEXT: ; TRAP_ON_START 0
 ; CHECK-NEXT: ; TRAP_ON_END 0
 ; CHECK-NEXT: ; IMAGE_OP 0
@@ -114,6 +114,7 @@
   .amdhsa_next_free_vgpr 32
   .amdhsa_next_free_sgpr 32
   .amdhsa_shared_vgpr_count 0
+  .amdhsa_inst_pref_size 0
 .end_amdhsa_kernel
 
 ;--- 3.s
@@ -127,7 +128,7 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 63
 ; CHECK-NEXT: ; TRAP_ON_START 0
 ; CHECK-NEXT: ; TRAP_ON_END 0
 ; CHECK-NEXT: ; IMAGE_OP 0
@@ -171,6 +172,7 @@
   .amdhsa_next_free_vgpr 32
   .amdhsa_next_free_sgpr 32
   .amdhsa_shared_vgpr_count 1
+  .amdhsa_inst_pref_size 63
 .end_amdhsa_kernel
 
 ;--- 4.s
@@ -184,7 +186,7 @@
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
 ; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 63
 ; CHECK-NEXT: ; TRAP_ON_START 0
 ; CHECK-NEXT: ; TRAP_ON_END 0
 ; CHECK-NEXT: ; IMAGE_OP 0
@@ -228,5 +230,6 @@
   .amdhsa_next_free_vgpr 32
   .amdhsa_next_free_sgpr 32
   .amdhsa_shared_vgpr_count 1
+  .amdhsa_inst_pref_size 63
   .amdhsa_wavefront_size32 0
 .end_amdhsa_kernel
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
index c644e15efc8d79d..ed2b87d9885c612 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
@@ -12,7 +12,7 @@
 ; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 0
 ; CHECK-NEXT: ; GLG_EN 0
 ; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
@@ -66,7 +66,7 @@
 ; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
 ; CHECK-NEXT: .amdhsa_kernarg_size 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 255
 ; CHECK-NEXT: ; GLG_EN 0
 ; CHECK-NEXT: ; IMAGE_OP 0
 ; CHECK-NEXT: .amdhsa_next_free_vgpr 32
@@ -108,4 +108,5 @@
   .amdhsa_next_free_vgpr 32
   .amdhsa_next_free_sgpr 32
   .amdhsa_wavefront_size32 0
+  .amdhsa_inst_pref_size 255
 .end_amdhsa_kernel

``````````

</details>


https://github.com/llvm/llvm-project/pull/126622


More information about the llvm-commits mailing list