[llvm] [AMDGPU] Disable hasVALUPartialForwardingHazard for GFX12 (PR #78188)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 15 08:47:53 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Jay Foad (jayfoad)
<details>
<summary>Changes</summary>
- Add GFX12 testing to partial-forwarding-hazards.mir
- [AMDGPU] Disable hasVALUPartialForwardingHazard for GFX12
---
Full diff: https://github.com/llvm/llvm-project/pull/78188.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+1-1)
- (modified) llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir (+231-119)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 070d165cdaadb8f..17b8e4d926c0f24 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1139,7 +1139,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
bool hasVALUPartialForwardingHazard() const {
- return getGeneration() >= GFX11;
+ return getGeneration() == GFX11;
}
bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }
diff --git a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
index 3d269902f3e6421..56eb8ce581a79b9 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
+++ b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
@@ -1,17 +1,24 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s
---
name: partial_forwarding_1_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_1_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_1_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_1_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
@@ -23,24 +30,41 @@ body: |
name: partial_forwarding_2_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_2_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr2 = S_MOV_B32 0
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $sgpr3 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr9 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_2_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr4 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr5 = S_MOV_B32 0
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $sgpr6 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr7 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr8 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr9 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr10 = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_2_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr2 = S_MOV_B32 0
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $sgpr3 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr4 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr5 = S_MOV_B32 0
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $sgpr6 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr7 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr8 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr9 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr10 = S_MOV_B32 0
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 0
$sgpr1 = S_MOV_B32 0
@@ -63,19 +87,31 @@ body: |
name: partial_forwarding_3_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_3_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_3_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_3_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
@@ -186,19 +222,31 @@ body: |
name: partial_forwarding_4_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_4_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_4_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_4_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
@@ -247,19 +295,31 @@ body: |
name: partial_forwarding_5_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_5_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_5_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_5_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
$vgpr11 = V_MOV_B32_e32 0, implicit $exec
@@ -307,33 +367,59 @@ body: |
---
name: partial_forwarding_branching_1a
body: |
- ; GCN-LABEL: name: partial_forwarding_branching_1a
- ; GCN: bb.0:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.2:
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_branching_1a
+ ; GFX11: bb.0:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.1:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.2:
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_branching_1a
+ ; GFX12: bb.0:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.1:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.2:
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
@@ -358,33 +444,59 @@ body: |
---
name: partial_forwarding_branching_1b
body: |
- ; GCN-LABEL: name: partial_forwarding_branching_1b
- ; GCN: bb.0:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.2:
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_branching_1b
+ ; GFX11: bb.0:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.1:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.2:
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_branching_1b
+ ; GFX12: bb.0:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.1:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.2:
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr30 = V_MOV_B32_e32 0, implicit $exec
``````````
</details>
https://github.com/llvm/llvm-project/pull/78188
More information about the llvm-commits
mailing list