[llvm] [AMDGPU] Create new directive .amdhsa_inst_pref_size (PR #126622)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 15:14:07 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Stanislav Mekhanoshin (rampitec)
<details>
<summary>Changes</summary>
The field INST_PREF_SIZE is available since gfx11.
---
Full diff: https://github.com/llvm/llvm-project/pull/126622.diff
10 Files Affected:
- (modified) llvm/docs/AMDGPUUsage.rst (+3)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+12)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+4-4)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+10)
- (modified) llvm/test/MC/AMDGPU/hsa-diag-v4.s (+10)
- (modified) llvm/test/MC/AMDGPU/hsa-gfx12-v4.s (+3-1)
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s (+2)
- (modified) llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s (+6-2)
- (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s (+7-4)
- (modified) llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s (+3-2)
``````````diff
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 84980d0c31d4f98..899b2cf3b490179 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -18213,6 +18213,9 @@ terminated by an ``.end_amdhsa_kernel`` directive.
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx12-table`.
``.amdhsa_shared_vgpr_count`` 0 GFX10-GFX11 Controls SHARED_VGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table`.
+ ``.amdhsa_inst_pref_size`` 0 GFX11-GFX12 Controls INST_PREF_SIZE in
+ :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx10-gfx11-table` or
+ :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx12-table`
``.amdhsa_exception_fp_ieee_invalid_op`` 0 GFX6-GFX12 Controls ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx12-table`.
``.amdhsa_exception_fp_denorm_src`` 0 GFX6-GFX12 Controls ENABLE_EXCEPTION_FP_DENORMAL_SOURCE in
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 4b6d02fff4aeccc..4ff9cff09f31d22 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -5876,6 +5876,18 @@ bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT, ExprVal,
ValRange);
+ } else if (ID == ".amdhsa_inst_pref_size") {
+ if (IVersion.Major < 11)
+ return Error(IDRange.Start, "directive requires gfx11+", IDRange);
+ if (IVersion.Major == 11) {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
+ COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE, ExprVal,
+ ValRange);
+ } else {
+ PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3,
+ COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE, ExprVal,
+ ValRange);
+ }
} else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
PARSE_BITS_ENTRY(
KD.compute_pgm_rsrc2,
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 58cdbe6cf373ede..02ad08740049d37 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -2233,15 +2233,15 @@ Expected<bool> AMDGPUDisassembler::decodeCOMPUTE_PGM_RSRC3(
// Bits [4-11].
if (isGFX11()) {
- PRINT_PSEUDO_DIRECTIVE_COMMENT("INST_PREF_SIZE",
- COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
+ PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
+ COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_START",
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_START);
PRINT_PSEUDO_DIRECTIVE_COMMENT("TRAP_ON_END",
COMPUTE_PGM_RSRC3_GFX11_TRAP_ON_END);
} else if (isGFX12Plus()) {
- PRINT_PSEUDO_DIRECTIVE_COMMENT(
- "INST_PREF_SIZE", COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
+ PRINT_DIRECTIVE(".amdhsa_inst_pref_size",
+ COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE);
} else {
CHECK_RESERVED_BITS_DESC_MSG(COMPUTE_PGM_RSRC3_GFX10_RESERVED1,
"COMPUTE_PGM_RSRC3",
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index eccd77d6c00f0b7..059bab583852667 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -579,7 +579,17 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
amdhsa::COMPUTE_PGM_RSRC3_GFX10_GFX11_SHARED_VGPR_COUNT,
".amdhsa_shared_vgpr_count");
}
+ if (IVersion.Major == 11) {
+ PrintField(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX11_INST_PREF_SIZE,
+ ".amdhsa_inst_pref_size");
+ }
if (IVersion.Major >= 12) {
+ PrintField(KD.compute_pgm_rsrc3,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE_SHIFT,
+ amdhsa::COMPUTE_PGM_RSRC3_GFX12_PLUS_INST_PREF_SIZE,
+ ".amdhsa_inst_pref_size");
PrintField(KD.compute_pgm_rsrc1,
amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN_SHIFT,
amdhsa::COMPUTE_PGM_RSRC1_GFX12_PLUS_ENABLE_WG_RR_EN,
diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v4.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s
index 3733b162edcfbd2..9ab177cf2b125a6 100644
--- a/llvm/test/MC/AMDGPU/hsa-diag-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s
@@ -280,6 +280,16 @@
.amdhsa_shared_vgpr_count 15
.end_amdhsa_kernel
+// GCN-LABEL: warning: test_amdhsa_inst_pref_size_invalid
+// PREGFX10: error: directive requires gfx11+
+// NONAMDHSA: error: unknown directive
+.warning "test_amdhsa_inst_pref_size_invalid"
+.amdhsa_kernel test_amdhsa_inst_pref_size_invalid
+ .amdhsa_next_free_vgpr 273
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_inst_pref_size 15
+.end_amdhsa_kernel
+
// GCN-LABEL: warning: test_next_free_vgpr_invalid
// AMDHSA: error: .amdgcn.next_free_{v,s}gpr symbols must be absolute expressions
// NONAMDHSA-NOT: error:
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
index ea649bc76116adf..e90a97600822994 100644
--- a/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
+++ b/llvm/test/MC/AMDGPU/hsa-gfx12-v4.s
@@ -33,7 +33,7 @@
// complete
// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 f00f0000
// OBJDUMP-NEXT: 0070 015021e4 1f0f007f 5e040000 00000000
// special_sgpr
// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
@@ -120,6 +120,7 @@ disabled_user_sgpr:
.amdhsa_workgroup_processor_mode 1
.amdhsa_memory_ordered 1
.amdhsa_forward_progress 1
+ .amdhsa_inst_pref_size 255
.amdhsa_round_robin_scheduling 1
.amdhsa_exception_fp_ieee_invalid_op 1
.amdhsa_exception_fp_denorm_src 1
@@ -158,6 +159,7 @@ disabled_user_sgpr:
// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
// ASM-NEXT: .amdhsa_memory_ordered 1
// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_inst_pref_size 255
// ASM-NEXT: .amdhsa_round_robin_scheduling 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
index 85a7ad05b00f484..68cf28f2ac49d00 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx11.s
@@ -133,6 +133,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30
// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((1621884928|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31
// ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_inst_pref_size 0
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24
// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&67108864)>>26
@@ -180,6 +181,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_memory_ordered 1
// ASM-NEXT: .amdhsa_forward_progress 1
// ASM-NEXT: .amdhsa_shared_vgpr_count 0
+// ASM-NEXT: .amdhsa_inst_pref_size 0
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
diff --git a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
index 51d0fb30b320c52..6f7a9a26056811a 100644
--- a/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
+++ b/llvm/test/MC/AMDGPU/hsa-sym-exprs-gfx12.s
@@ -9,12 +9,12 @@
// expr_defined_later
// OBJDUMP-NEXT: 0000 2b000000 2c000000 00000000 00000000
// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 f0020000
// OBJDUMP-NEXT: 0030 05f02fe4 811f007f 000c0000 00000000
// expr_defined
// OBJDUMP-NEXT: 0040 2a000000 2b000000 00000000 00000000
// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 f0020000
// OBJDUMP-NEXT: 0070 05f02fe4 811f007f 000c0000 00000000
.text
@@ -53,6 +53,7 @@ expr_defined:
.amdhsa_workgroup_processor_mode defined_boolean
.amdhsa_memory_ordered defined_boolean
.amdhsa_forward_progress defined_boolean
+ .amdhsa_inst_pref_size defined_value+6
.amdhsa_exception_fp_ieee_invalid_op defined_boolean
.amdhsa_exception_fp_denorm_src defined_boolean
.amdhsa_exception_fp_ieee_div_zero defined_boolean
@@ -89,6 +90,7 @@ expr_defined:
.amdhsa_workgroup_processor_mode defined_boolean
.amdhsa_memory_ordered defined_boolean
.amdhsa_forward_progress defined_boolean
+ .amdhsa_inst_pref_size defined_value+6
.amdhsa_exception_fp_ieee_invalid_op defined_boolean
.amdhsa_exception_fp_denorm_src defined_boolean
.amdhsa_exception_fp_ieee_div_zero defined_boolean
@@ -132,6 +134,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_workgroup_processor_mode (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&536870912)>>29
// ASM-NEXT: .amdhsa_memory_ordered (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&1073741824)>>30
// ASM-NEXT: .amdhsa_forward_progress (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2147483648)>>31
+// ASM-NEXT: .amdhsa_inst_pref_size (((defined_value+6)<<4)&4080)>>4
// ASM-NEXT: .amdhsa_round_robin_scheduling (((((((((((((((((((((1611399168|(defined_2_bits<<12))&(~49152))|(defined_2_bits<<14))&(~196608))|(defined_2_bits<<16))&(~786432))|(defined_2_bits<<18))&(~67108864))|(defined_boolean<<26))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~2147483648))|(defined_boolean<<31))&(~2097152))|(defined_boolean<<21))&(~63))|(((alignto(max(defined_value+4, 1), 8))/8)-1))&(~960))&2097152)>>21
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&16777216)>>24
// ASM-NEXT: .amdhsa_exception_fp_denorm_src (((((((((((((((((((((((((((128|(defined_2_bits<<11))&(~128))|(defined_boolean<<7))&(~256))|(defined_boolean<<8))&(~512))|(defined_boolean<<9))&(~1024))|(defined_boolean<<10))&(~16777216))|(defined_boolean<<24))&(~33554432))|(defined_boolean<<25))&(~67108864))|(defined_boolean<<26))&(~134217728))|(defined_boolean<<27))&(~268435456))|(defined_boolean<<28))&(~536870912))|(defined_boolean<<29))&(~1073741824))|(defined_boolean<<30))&(~1))|defined_boolean)&(~62))&33554432)>>25
@@ -177,6 +180,7 @@ expr_defined:
// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
// ASM-NEXT: .amdhsa_memory_ordered 1
// ASM-NEXT: .amdhsa_forward_progress 1
+// ASM-NEXT: .amdhsa_inst_pref_size 47
// ASM-NEXT: .amdhsa_round_robin_scheduling 1
// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
index 750809128189f1f..3cd7a0503e30177 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx11.s
@@ -13,7 +13,7 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: ; SHARED_VGPR_COUNT 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 0
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
@@ -70,7 +70,7 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 0
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
@@ -114,6 +114,7 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_shared_vgpr_count 0
+ .amdhsa_inst_pref_size 0
.end_amdhsa_kernel
;--- 3.s
@@ -127,7 +128,7 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 63
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
@@ -171,6 +172,7 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_shared_vgpr_count 1
+ .amdhsa_inst_pref_size 63
.end_amdhsa_kernel
;--- 4.s
@@ -184,7 +186,7 @@
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
; CHECK-NEXT: .amdhsa_shared_vgpr_count 1
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 63
; CHECK-NEXT: ; TRAP_ON_START 0
; CHECK-NEXT: ; TRAP_ON_END 0
; CHECK-NEXT: ; IMAGE_OP 0
@@ -228,5 +230,6 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_shared_vgpr_count 1
+ .amdhsa_inst_pref_size 63
.amdhsa_wavefront_size32 0
.end_amdhsa_kernel
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
index c644e15efc8d79d..ed2b87d9885c612 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-gfx12.s
@@ -12,7 +12,7 @@
; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 0
; CHECK-NEXT: ; GLG_EN 0
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
@@ -66,7 +66,7 @@
; CHECK-NEXT: .amdhsa_group_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_private_segment_fixed_size 0
; CHECK-NEXT: .amdhsa_kernarg_size 0
-; CHECK-NEXT: ; INST_PREF_SIZE 0
+; CHECK-NEXT: .amdhsa_inst_pref_size 255
; CHECK-NEXT: ; GLG_EN 0
; CHECK-NEXT: ; IMAGE_OP 0
; CHECK-NEXT: .amdhsa_next_free_vgpr 32
@@ -108,4 +108,5 @@
.amdhsa_next_free_vgpr 32
.amdhsa_next_free_sgpr 32
.amdhsa_wavefront_size32 0
+ .amdhsa_inst_pref_size 255
.end_amdhsa_kernel
``````````
</details>
https://github.com/llvm/llvm-project/pull/126622
More information about the llvm-commits
mailing list