[llvm] r370250 - [AMDGPU] Fix bug when calculating user_spgr_count for Code Object V3 assembler
Scott Linder via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 28 12:38:15 PDT 2019
Author: scott.linder
Date: Wed Aug 28 12:38:15 2019
New Revision: 370250
URL: http://llvm.org/viewvc/llvm-project?rev=370250&view=rev
Log:
[AMDGPU] Fix bug when calculating user_spgr_count for Code Object V3 assembler
Stop counting explicitly disabled user_spgr's in the user_sgpr_count field of the kernel descriptor.
Differential Revision: https://reviews.llvm.org/D66900
Modified:
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/test/MC/AMDGPU/hsa-v3.s
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=370250&r1=370249&r2=370250&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed Aug 28 12:38:15 2019
@@ -3617,37 +3617,44 @@ bool AMDGPUAsmParser::ParseDirectiveAMDH
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
Val, ValRange);
- UserSGPRCount += 4;
+ if (Val)
+ UserSGPRCount += 4;
} else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
Val, ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
ValRange);
- UserSGPRCount += 2;
+ if (Val)
+ UserSGPRCount += 2;
} else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
PARSE_BITS_ENTRY(KD.kernel_code_properties,
KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
Val, ValRange);
- UserSGPRCount += 1;
+ if (Val)
+ UserSGPRCount += 1;
} else if (ID == ".amdhsa_wavefront_size32") {
if (IVersion.Major < 10)
return getParser().Error(IDRange.Start, "directive requires gfx10+",
Modified: llvm/trunk/test/MC/AMDGPU/hsa-v3.s
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/MC/AMDGPU/hsa-v3.s?rev=370250&r1=370249&r2=370250&view=diff
==============================================================================
--- llvm/trunk/test/MC/AMDGPU/hsa-v3.s (original)
+++ llvm/trunk/test/MC/AMDGPU/hsa-v3.s Wed Aug 28 12:38:15 2019
@@ -8,16 +8,19 @@
// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
+// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
// READOBJ: Relocation section '.rela.rodata' at offset
// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
+// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
// READOBJ: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
// READOBJ: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
+// READOBJ: {{[0-9]+}}: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
+// READOBJ: {{[0-9]+}}: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
// READOBJ: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
// READOBJ: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
// READOBJ: {{[0-9]+}}: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
@@ -40,6 +43,11 @@
// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
+// disabled_user_sgpr
+// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000
.text
// ASM: .text
@@ -62,6 +70,11 @@ complete:
special_sgpr:
s_endpgm
+.p2align 8
+.type disabled_user_sgpr, at function
+disabled_user_sgpr:
+ s_endpgm
+
.rodata
// ASM: .rodata
@@ -182,6 +195,20 @@ special_sgpr:
// ASM-NEXT: .amdhsa_ieee_mode 0
// ASM: .end_amdhsa_kernel
+// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
+// count, i.e. this should produce the same descriptor as minimal.
+.p2align 6
+.amdhsa_kernel disabled_user_sgpr
+ .amdhsa_user_sgpr_private_segment_buffer 0
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel disabled_user_sgpr
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM: .end_amdhsa_kernel
+
.section .foo
.byte .amdgcn.gfx_generation_number
More information about the llvm-commits
mailing list