[llvm] 92f3828 - [AMDGPU] Fix wait counts in the presence of 16bit subregisters

via llvm-commits llvm-commits at lists.llvm.org
Tue May 26 02:26:17 PDT 2020


Author: vpykhtin
Date: 2020-05-26T12:19:27+03:00
New Revision: 92f3828dc5675f9917d909eb75c29ba1e14920ad

URL: https://github.com/llvm/llvm-project/commit/92f3828dc5675f9917d909eb75c29ba1e14920ad
DIFF: https://github.com/llvm/llvm-project/commit/92f3828dc5675f9917d909eb75c29ba1e14920ad.diff

LOG: [AMDGPU] Fix wait counts in the presence of 16bit subregisters

Differential Revision: https://reviews.llvm.org/D80033

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
    llvm/test/CodeGen/AMDGPU/waitcnt.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index c115d26fa6a3..67c7ff1fcda4 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -505,7 +505,7 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
 
   const TargetRegisterClass *RC = TII->getOpRegClass(*MI, OpNo);
   unsigned Size = TRI->getRegSizeInBits(*RC);
-  Result.second = Result.first + (Size / 32);
+  Result.second = Result.first + ((Size + 16) / 32);
 
   return Result;
 }

diff  --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir
index fd81ca83a1a1..c568b8d32a23 100644
--- a/llvm/test/CodeGen/AMDGPU/waitcnt.mir
+++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir
@@ -41,6 +41,9 @@
     ret void
   }
 
+  define amdgpu_kernel void @subregs16bit() {
+    ret void
+  }
 ...
 ---
 
@@ -284,3 +287,19 @@ body: |
       FLAT_STORE_DWORD $vgpr1_vgpr2, $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
     }
 ...
+
+---
+# CHECK-LABEL: name: subregs16bit
+# CHECK: S_WAITCNT 112
+# CHECK-NEXT: V_NOP_e32 
+
+name: subregs16bit
+machineFunctionInfo:
+  isEntryFunction: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4
+      $vgpr0 = FLAT_LOAD_USHORT killed $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+      $vgpr1 = FLAT_LOAD_USHORT killed $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
+      V_NOP_e32 implicit $exec, implicit $vgpr0_lo16, implicit $vgpr1_lo16
+...


        


More information about the llvm-commits mailing list