[llvm] bf98093 - [AMDGPU] Ignore KILLs when forming clauses
Sebastian Neubauer via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 27 07:35:09 PDT 2021
Author: Sebastian Neubauer
Date: 2021-09-27T16:33:52+02:00
New Revision: bf980930e5a7c521952255380793aa172e7130ef
URL: https://github.com/llvm/llvm-project/commit/bf980930e5a7c521952255380793aa172e7130ef
DIFF: https://github.com/llvm/llvm-project/commit/bf980930e5a7c521952255380793aa172e7130ef.diff
LOG: [AMDGPU] Ignore KILLs when forming clauses
KILL instructions are sometimes present and prevented hard
clauses from being formed.
Fix this by ignoring all meta instructions in clauses.
Differential Revision: https://reviews.llvm.org/D106042
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
llvm/test/CodeGen/AMDGPU/hard-clauses.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 3b932ced7520..ccbcc867215c 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -268,6 +268,12 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
return;
}
+ if (MI->isMetaInstruction()) {
+ if (isVerbose())
+ OutStreamer->emitRawComment(" meta instruction");
+ return;
+ }
+
MCInst TmpInst;
MCInstLowering.lower(MI, TmpInst);
EmitToStreamer(*OutStreamer, TmpInst);
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 7ba20eb6027b..125f006a1d1d 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -58,6 +58,8 @@ enum HardClauseType {
// Internal instructions, which are allowed in the middle of a hard clause,
// except for s_waitcnt.
HARDCLAUSE_INTERNAL,
+ // Meta instructions that do not result in any ISA like KILL.
+ HARDCLAUSE_IGNORE,
// Instructions that are not allowed in a hard clause: SALU, export, branch,
// message, GDS, s_waitcnt and anything else not mentioned above.
HARDCLAUSE_ILLEGAL,
@@ -100,6 +102,8 @@ class SIInsertHardClauses : public MachineFunctionPass {
// It's safe to treat the rest as illegal.
if (MI.getOpcode() == AMDGPU::S_NOP)
return HARDCLAUSE_INTERNAL;
+ if (MI.isMetaInstruction())
+ return HARDCLAUSE_IGNORE;
return HARDCLAUSE_ILLEGAL;
}
@@ -112,25 +116,25 @@ class SIInsertHardClauses : public MachineFunctionPass {
// The last non-internal instruction in the clause.
MachineInstr *Last = nullptr;
// The length of the clause including any internal instructions in the
- // middle or after the end of the clause.
+ // middle (but not at the end) of the clause.
unsigned Length = 0;
+ // Internal instructions at the and of a clause should not be included in
+ // the clause. Count them in TrailingInternalLength until a new memory
+ // instruction is added.
+ unsigned TrailingInternalLength = 0;
// The base operands of *Last.
SmallVector<const MachineOperand *, 4> BaseOps;
};
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
- // Get the size of the clause excluding any internal instructions at the
- // end.
- unsigned Size =
- std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
- if (Size < 2)
+ if (CI.First == CI.Last)
return false;
- assert(Size <= 64 && "Hard clause is too long!");
+ assert(CI.Length <= 64 && "Hard clause is too long!");
auto &MBB = *CI.First->getParent();
auto ClauseMI =
BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
- .addImm(Size - 1);
+ .addImm(CI.Length - 1);
finalizeBundle(MBB, ClauseMI->getIterator(),
std::next(CI.Last->getIterator()));
return true;
@@ -168,6 +172,7 @@ class SIInsertHardClauses : public MachineFunctionPass {
if (CI.Length == 64 ||
(CI.Length && Type != HARDCLAUSE_INTERNAL &&
+ Type != HARDCLAUSE_IGNORE &&
(Type != CI.Type ||
// Note that we lie to shouldClusterMemOps about the size of the
// cluster. When shouldClusterMemOps is called from the machine
@@ -182,14 +187,20 @@ class SIInsertHardClauses : public MachineFunctionPass {
if (CI.Length) {
// Extend the current clause.
- ++CI.Length;
- if (Type != HARDCLAUSE_INTERNAL) {
- CI.Last = &MI;
- CI.BaseOps = std::move(BaseOps);
+ if (Type != HARDCLAUSE_IGNORE) {
+ if (Type == HARDCLAUSE_INTERNAL) {
+ ++CI.TrailingInternalLength;
+ } else {
+ ++CI.Length;
+ CI.Length += CI.TrailingInternalLength;
+ CI.TrailingInternalLength = 0;
+ CI.Last = &MI;
+ CI.BaseOps = std::move(BaseOps);
+ }
}
} else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
// Start a new clause.
- CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
+ CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
index 6e89bfe3ae02..16022e33b84c 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll
@@ -8624,10 +8624,10 @@ define amdgpu_gfx void @test_call_external_void_func_v32i32_i32_inreg(i32) #0 {
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s50, 14
; GFX10-SCRATCH-NEXT: v_writelane_b32 v40, s51, 15
; GFX10-SCRATCH-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SCRATCH-NEXT: s_clause 0x2
; GFX10-SCRATCH-NEXT: s_load_dword s2, s[0:1], 0x0
-; GFX10-SCRATCH-NEXT: ; kill: killed $sgpr0_sgpr1
-; GFX10-SCRATCH-NEXT: ; kill: killed $sgpr0_sgpr1
-; GFX10-SCRATCH-NEXT: s_clause 0x1
+; GFX10-SCRATCH-NEXT: ; meta instruction
+; GFX10-SCRATCH-NEXT: ; meta instruction
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[36:51], s[0:1], 0x40
; GFX10-SCRATCH-NEXT: s_load_dwordx16 s[4:19], s[0:1], 0x0
; GFX10-SCRATCH-NEXT: s_getpc_b64 s[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
index 9ff28639f434..78e8ab8fe6c8 100644
--- a/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
+++ b/llvm/test/CodeGen/AMDGPU/hard-clauses.mir
@@ -34,6 +34,27 @@ body: |
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
...
+---
+name: nop3
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1
+ ; CHECK-LABEL: name: nop3
+ ; CHECK: liveins: $sgpr0_sgpr1
+ ; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
+ ; CHECK: S_CLAUSE 2
+ ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; CHECK: S_NOP 2
+ ; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+ ; CHECK: }
+ ; CHECK: S_NOP 2
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ S_NOP 2
+ $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+ S_NOP 2
+...
+
---
name: long_clause
tracksRegLiveness: true
@@ -239,3 +260,43 @@ body: |
$vgpr14 = IMAGE_SAMPLE_LZ_V1_V2_gfx10 $vgpr5_vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 1, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s128) from custom "ImageResource")
$vgpr20_vgpr21_vgpr22_vgpr23 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 $vgpr3, $vgpr8, $vgpr7, $vgpr5, $vgpr4, $vgpr6, $vgpr0, $vgpr2, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load (s128))
...
+
+---
+name: kill
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr4
+ ; CHECK-LABEL: name: kill
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr4
+ ; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+ ; CHECK: S_CLAUSE 1
+ ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; CHECK: KILL undef renamable $sgpr4
+ ; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+ ; CHECK: }
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ KILL undef renamable $sgpr4
+ $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+...
+
+---
+name: kill2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
+ ; CHECK-LABEL: name: kill2
+ ; CHECK: liveins: $sgpr0_sgpr1, $sgpr4, $sgpr5
+ ; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1, implicit undef $sgpr4 {
+ ; CHECK: S_CLAUSE 1
+ ; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ ; CHECK: KILL undef renamable $sgpr4
+ ; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+ ; CHECK: }
+ ; CHECK: KILL undef renamable $sgpr5
+ $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0
+ KILL undef renamable $sgpr4
+ $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0
+ KILL undef renamable $sgpr5
+...
More information about the llvm-commits
mailing list