[PATCH] D91452: AMDGPU: Fix counting kernel arguments towards register usage

Fri Nov 13 11:37:50 PST 2020

arsenm created this revision.
arsenm added reviewers: rampitec, foad, nhaehnle, tpr, scott.linder, kzhuravl.
Herald added subscribers: kerbowa, hiraditya, t-tye, dstuttard, yaxunl, jvesely.
Herald added a project: LLVM.
arsenm requested review of this revision.
Herald added a subscriber: wdng.

Also use DataLayout to get type size. Relying on the IR type size is
also pretty broken here, since this won't perfectly capture how types
are legalized.


https://reviews.llvm.org/D91452

Files:
  llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
  llvm/test/CodeGen/AMDGPU/code-object-v3.ll


Index: llvm/test/CodeGen/AMDGPU/code-object-v3.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/code-object-v3.ll
+++ llvm/test/CodeGen/AMDGPU/code-object-v3.ll
@@ -50,8 +50,9 @@
 ; OSABI-AMDHSA-ELF: .rodata PROGBITS {{[0-9]+}} {{[0-9]+}} {{[0-9a-f]+}} {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
 
 ; OSABI-AMDHSA-ELF: Relocation section '.rela.rodata' at offset
-; OSABI-AMDHSA-ELF: 0000000000000010 0000000100000005 R_AMDGPU_REL64 0000000000000000 fadd + 10
-; OSABI-AMDHSA-ELF: 0000000000000050 0000000300000005 R_AMDGPU_REL64 0000000000000100 fsub + 10
+; OSABI-AMDHSA-ELF: 0000000000000010 0000000300000005 R_AMDGPU_REL64 0000000000000000 fadd + 10
+; OSABI-AMDHSA-ELF: 0000000000000050 0000000500000005 R_AMDGPU_REL64 0000000000000100 fsub + 10
+; OSABI-AMDHSA-ELF: 0000000000000090 0000000100000005 R_AMDGPU_REL64 0000000000000200 empty + 10
 
 ; OSABI-AMDHSA-ELF: Symbol table '.symtab' contains {{[0-9]+}} entries
 ; OSABI-AMDHSA-ELF: {{[0-9]+}}: 0000000000000000 {{[0-9]+}} FUNC   GLOBAL PROTECTED {{[0-9]+}} fadd
@@ -85,3 +86,18 @@
   store float %r.val, float addrspace(1)* %r
   ret void
 }
+
+; Make sure kernel arguments do not count towards the number of
+; registers used.
+;
+; ALL-ASM-LABEL: {{^}}empty:
+; ALL-ASM:     .amdhsa_next_free_vgpr 1
+; ALL-ASM:     .amdhsa_next_free_sgpr 1
+define amdgpu_kernel void @empty(
+    i32 %i,
+    float addrspace(1)* %r,
+    float addrspace(1)* %a,
+    float addrspace(1)* %b) {
+entry:
+  ret void
+}
Index: llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1029,18 +1029,26 @@
   // Account for extra SGPRs and VGPRs reserved for debugger use.
   ProgInfo.NumSGPR += ExtraSGPRs;
 
+  const Function &F = MF.getFunction();
+
   // Ensure there are enough SGPRs and VGPRs for wave dispatch, where wave
   // dispatch registers are function args.
   unsigned WaveDispatchNumSGPR = 0, WaveDispatchNumVGPR = 0;
-  for (auto &Arg : MF.getFunction().args()) {
-    unsigned NumRegs = (Arg.getType()->getPrimitiveSizeInBits() + 31) / 32;
-    if (Arg.hasAttribute(Attribute::InReg))
-      WaveDispatchNumSGPR += NumRegs;
-    else
-      WaveDispatchNumVGPR += NumRegs;
+
+  if (isShader(F.getCallingConv())) {
+    // FIXME: We should be using the number of registers determined during
+    // calling convention lowering to legalize the types.
+    const DataLayout &DL = F.getParent()->getDataLayout();
+    for (auto &Arg : F.args()) {
+      unsigned NumRegs = (DL.getTypeSizeInBits(Arg.getType()) + 31) / 32;
+      if (Arg.hasAttribute(Attribute::InReg))
+        WaveDispatchNumSGPR += NumRegs;
+      else
+        WaveDispatchNumVGPR += NumRegs;
+    }
+    ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
+    ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
   }
-  ProgInfo.NumSGPR = std::max(ProgInfo.NumSGPR, WaveDispatchNumSGPR);
-  ProgInfo.NumVGPR = std::max(ProgInfo.NumVGPR, WaveDispatchNumVGPR);
 
   // Adjust number of registers used to meet default/requested minimum/maximum
   // number of waves per execution unit request.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D91452.305223.patch
Type: text/x-patch
Size: 3255 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20201113/0e8c37a6/attachment.bin>