[llvm] befd44b - [AMDGPU] Update hasUnwantedEffectsWhenEXECEmpty (#97982)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 16 19:30:48 PDT 2024
Author: Carl Ritson
Date: 2024-07-17T11:30:44+09:00
New Revision: befd44bcdc6e9d2f4099bf344826b2cd0fd8cbdc
URL: https://github.com/llvm/llvm-project/commit/befd44bcdc6e9d2f4099bf344826b2cd0fd8cbdc
DIFF: https://github.com/llvm/llvm-project/commit/befd44bcdc6e9d2f4099bf344826b2cd0fd8cbdc.diff
LOG: [AMDGPU] Update hasUnwantedEffectsWhenEXECEmpty (#97982)
Add barriers and s_wait_event to hasUnwantedEffectsWhenEXECEmpty.
Add a comment documenting the current expected use of the function.
Added:
Modified:
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.h
llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index ba72152f5668e..e6e74d619003d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4131,14 +4131,17 @@ bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const
// EXEC = 0, but checking for that case here seems not worth it
// given the typical code patterns.
if (Opcode == AMDGPU::S_SENDMSG || Opcode == AMDGPU::S_SENDMSGHALT ||
- isEXP(Opcode) ||
- Opcode == AMDGPU::DS_ORDERED_COUNT || Opcode == AMDGPU::S_TRAP ||
- Opcode == AMDGPU::DS_GWS_INIT || Opcode == AMDGPU::DS_GWS_BARRIER)
+ isEXP(Opcode) || Opcode == AMDGPU::DS_ORDERED_COUNT ||
+ Opcode == AMDGPU::S_TRAP || Opcode == AMDGPU::S_WAIT_EVENT)
return true;
if (MI.isCall() || MI.isInlineAsm())
return true; // conservative assumption
+ // Assume that barrier interactions are only intended with active lanes.
+ if (isBarrier(Opcode))
+ return true;
+
// A mode change is a scalar operation that influences vector instructions.
if (modifiesModeRegister(MI))
return true;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index b723deb9543cd..1712dfe8d406c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -936,6 +936,16 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
Opcode == AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM;
}
+ bool isBarrier(unsigned Opcode) const {
+ return isBarrierStart(Opcode) || Opcode == AMDGPU::S_BARRIER_WAIT ||
+ Opcode == AMDGPU::S_BARRIER_INIT_M0 ||
+ Opcode == AMDGPU::S_BARRIER_INIT_IMM ||
+ Opcode == AMDGPU::S_BARRIER_JOIN_IMM ||
+ Opcode == AMDGPU::S_BARRIER_LEAVE ||
+ Opcode == AMDGPU::DS_GWS_INIT ||
+ Opcode == AMDGPU::DS_GWS_BARRIER;
+ }
+
static bool doesNotReadTiedSource(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::TiedSourceNotRead;
}
@@ -1009,7 +1019,13 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
/// Return true if the instruction modifies the mode register.q
static bool modifiesModeRegister(const MachineInstr &MI);
- /// Whether we must prevent this instruction from executing with EXEC = 0.
+ /// This function is used to determine if an instruction can be safely
+ /// executed under EXEC = 0 without hardware error, indeterminate results,
+ /// and/or visible effects on future vector execution or outside the shader.
+ /// Note: as of 2024 the only use of this is SIPreEmitPeephole where it is
+ /// used in removing branches over short EXEC = 0 sequences.
+ /// As such it embeds certain assumptions which may not apply to every case
+ /// of EXEC = 0 execution.
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;
/// Returns true if the instruction could potentially depend on the value of
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
index 1d3132dbe2af2..b4ed3cafbacb5 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx10.mir
@@ -184,3 +184,33 @@ body: |
bb.2:
S_ENDPGM 0
...
+
+---
+name: skip_barrier
+body: |
+ ; CHECK-LABEL: name: skip_barrier
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER
+
+ bb.2:
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
index c0b839d218a95..2d092974ac566 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skips-gfx12.mir
@@ -300,3 +300,311 @@ body: |
bb.2:
S_ENDPGM 0
...
+
+---
+name: skip_wait_event
+body: |
+ ; CHECK-LABEL: name: skip_wait_event
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_WAIT_EVENT 0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_WAIT_EVENT 0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_IMM -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_SIGNAL_IMM -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_isfirst_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_isfirst_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_SIGNAL_ISFIRST_IMM -1, implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_M0 implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_SIGNAL_M0 implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_signal_isfirst_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_signal_isfirst_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_wait
+body: |
+ ; CHECK-LABEL: name: skip_barrier_wait
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_WAIT -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_WAIT -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_init_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_init_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_INIT_IMM -1, implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_INIT_IMM -1, implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_init_m0
+body: |
+ ; CHECK-LABEL: name: skip_barrier_init_m0
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: $m0 = S_MOV_B32 -1
+ ; CHECK-NEXT: S_BARRIER_INIT_M0 implicit $m0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ $m0 = S_MOV_B32 -1
+ S_BARRIER_INIT_M0 implicit $m0
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_join_imm
+body: |
+ ; CHECK-LABEL: name: skip_barrier_join_imm
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_JOIN_IMM -1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_JOIN_IMM -1
+
+ bb.2:
+ S_ENDPGM 0
+...
+
+---
+name: skip_barrier_leave
+body: |
+ ; CHECK-LABEL: name: skip_barrier_leave
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: V_NOP_e32 implicit $exec
+ ; CHECK-NEXT: S_BARRIER_LEAVE implicit-def $scc
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: S_ENDPGM 0
+ bb.0:
+ successors: %bb.1, %bb.2
+ S_CBRANCH_EXECZ %bb.2, implicit $exec
+
+ bb.1:
+ successors: %bb.2
+ V_NOP_e32 implicit $exec
+ S_BARRIER_LEAVE implicit-def $scc
+
+ bb.2:
+ S_ENDPGM 0
+...
More information about the llvm-commits
mailing list