[llvm] bf98093 - [AMDGPU] Ignore KILLs when forming clauses

Sebastian Neubauer via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 27 07:35:09 PDT 2021


Author: Sebastian Neubauer
Date: 2021-09-27T16:33:52+02:00
New Revision: bf980930e5a7c521952255380793aa172e7130ef

URL: https://github.com/llvm/llvm-project/commit/bf980930e5a7c521952255380793aa172e7130ef
DIFF: https://github.com/llvm/llvm-project/commit/bf980930e5a7c521952255380793aa172e7130ef.diff

LOG: [AMDGPU] Ignore KILLs when forming clauses

KILL instructions are sometimes present and prevented hard
clauses from being formed.

Fix this by ignoring all meta instructions in clauses.

Differential Revision: https://reviews.llvm.org/D106042

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
    llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
    llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
    llvm/test/CodeGen/AMDGPU/hard-clauses.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 3b932ced7520..ccbcc867215c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -268,6 +268,12 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
       return;
     }
 
+    if (MI->isMetaInstruction()) {
+      if (isVerbose())
+        OutStreamer->emitRawComment(" meta instruction");
+      return;
+    }
+
     MCInst TmpInst;
     MCInstLowering.lower(MI, TmpInst);
     EmitToStreamer(*OutStreamer, TmpInst);

diff  --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 7ba20eb6027b..125f006a1d1d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -58,6 +58,8 @@ enum HardClauseType {
   // Internal instructions, which are allowed in the middle of a hard clause,
   // except for s_waitcnt.
   HARDCLAUSE_INTERNAL,
+  // Meta instructions that do not result in any ISA like KILL.
+  HARDCLAUSE_IGNORE,
   // Instructions that are not allowed in a hard clause: SALU, export, branch,
   // message, GDS, s_waitcnt and anything else not mentioned above.
   HARDCLAUSE_ILLEGAL,
@@ -100,6 +102,8 @@ class SIInsertHardClauses : public MachineFunctionPass {
     // It's safe to treat the rest as illegal.
     if (MI.getOpcode() == AMDGPU::S_NOP)
       return HARDCLAUSE_INTERNAL;
+    if (MI.isMetaInstruction())
+      return HARDCLAUSE_IGNORE;
     return HARDCLAUSE_ILLEGAL;
   }
 
@@ -112,25 +116,25 @@ class SIInsertHardClauses : public MachineFunctionPass {
     // The last non-internal instruction in the clause.
     MachineInstr *Last = nullptr;
     // The length of the clause including any internal instructions in the
-    // middle or after the end of the clause.
+    // middle (but not at the end) of the clause.
     unsigned Length = 0;
+    // Internal instructions at the and of a clause should not be included in
+    // the clause. Count them in TrailingInternalLength until a new memory
+    // instruction is added.
+    unsigned TrailingInternalLength = 0;
     // The base operands of *Last.
     SmallVector<const MachineOperand *, 4> BaseOps;
   };
 
   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
-    // Get the size of the clause excluding any internal instructions at the
-    // end.
-    unsigned Size =
-        std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
-    if (Size < 2)
+    if (CI.First == CI.Last)
       return false;
-    assert(Size <= 64 && "Hard clause is too long!");
+    assert(CI.Length <= 64 && "Hard clause is too long!");
 
     auto &MBB = *CI.First->getParent();
     auto ClauseMI =
         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
-            .addImm(Size - 1);
+            .addImm(CI.Length - 1);
     finalizeBundle(MBB, ClauseMI->getIterator(),
                    std::next(CI.Last->getIterator()));
     return true;
@@ -168,6 +172,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
 
         if (CI.Length == 64 ||
             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
+             Type != HARDCLAUSE_IGNORE &&
              (Type != CI.Type ||
               // Note that we lie to shouldClusterMemOps about the size of the
               // cluster. When shouldClusterMemOps is called from the machine
@@ -182,14 +187,20 @@ class SIInsertHardClauses : public MachineFunctionPass {
 
         if (CI.Length) {
           // Extend the current clause.
-          ++CI.Length;
-          if (Type != HARDCLAUSE_INTERNAL) {
-            CI.Last = &MI;
-            CI.BaseOps = std::move(BaseOps);
+          if (Type != HARDCLAUSE_IGNORE) {
+            if (Type == HARDCLAUSE_INTERNAL) {
+              ++CI.TrailingInternalLength;
+            } else {
+              ++CI.Length;
+              CI.Length += CI.TrailingInternalLength;
+              CI.TrailingInternalLength = 0;
+              CI.Last = &MI;
+              CI.BaseOps = std::move(BaseOps);
+            }
           }
         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
           // Start a new clause.
-          CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
+          CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
         }
       }
 

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index 6e89bfe3ae02..16022e33b84c 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -8624,10 +8624,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s50, 14
 ; GFX10-SCRATCH-NEXT:    v_writelane_b32 v40, s51, 15
 ; GFX10-SCRATCH-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX10-SCRATCH-NEXT:    s_clause 0x2
 ; GFX10-SCRATCH-NEXT:    s_load_dword s2, s[0:1], 0x0
-; GFX10-SCRATCH-NEXT:    ; kill: killed $sgpr0_sgpr1
-; GFX10-SCRATCH-NEXT:    ; kill: killed $sgpr0_sgpr1
-; GFX10-SCRATCH-NEXT:    s_clause 0x1
+; GFX10-SCRATCH-NEXT:    ; meta instruction
+; GFX10-SCRATCH-NEXT:    ; meta instruction
 ; GFX10-SCRATCH-NEXT:    s_load_dwordx16 s[36:51], s[0:1], 0x40
 ; GFX10-SCRATCH-NEXT:    s_load_dwordx16 s[4:19], s[0:1], 0x0
 ; GFX10-SCRATCH-NEXT:    s_getpc_b64 s[0:1]

diff  --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
index 9ff28639f434..78e8ab8fe6c8 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
@@ -34,6 +34,27 @@ body: |
     $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
 ...
 
+---
+name: nop3
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1
+    ; CHECK-LABEL: name: nop3
+    ; CHECK: liveins: $sgpr0_sgpr1
+    ; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
+    ; CHECK:   S_CLAUSE 2
+    ; CHECK:   $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    ; CHECK:   S_NOP 2
+    ; CHECK:   $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    ; CHECK: }
+    ; CHECK: S_NOP 2
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    S_NOP 2
+    $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    S_NOP 2
+...
+
 ---
 name: long_clause
 tracksRegLiveness: true
@@ -239,3 +260,43 @@ body: |
     $vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
     $vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec  :: (load (s128))
 ...
+
+---
+name: kill
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr4
+    ; CHECK-LABEL: name: kill
+    ; CHECK: liveins: $sgpr0_sgpr1, $sgpr4
+    ; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+    ; CHECK:   S_CLAUSE 1
+    ; CHECK:   $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    ; CHECK:   KILL undef renamable $sgpr4
+    ; CHECK:   $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    ; CHECK: }
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    KILL undef renamable $sgpr4
+    $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+...
+
+---
+name: kill2
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
+    ; CHECK-LABEL: name: kill2
+    ; CHECK: liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
+    ; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+    ; CHECK:   S_CLAUSE 1
+    ; CHECK:   $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    ; CHECK:   KILL undef renamable $sgpr4
+    ; CHECK:   $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    ; CHECK: }
+    ; CHECK: KILL undef renamable $sgpr5
+    $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+    KILL undef renamable $sgpr4
+    $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+    KILL undef renamable $sgpr5
+...


        


More information about the llvm-commits mailing list