[llvm] [AMDGPU] Disable hasVALUPartialForwardingHazard for GFX12 (PR #78188)
Jay Foad via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 15 08:47:24 PST 2024
https://github.com/jayfoad created https://github.com/llvm/llvm-project/pull/78188
- Add GFX12 testing to partial-forwarding-hazards.mir
- [AMDGPU] Disable hasVALUPartialForwardingHazard for GFX12
>From 67a86a3b10c0c16e1f4fab032152bbb57c4ecfdb Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 6 Jan 2023 10:51:44 +0000
Subject: [PATCH 1/2] Add GFX12 testing to partial-forwarding-hazards.mir
---
llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
index 3d269902f3e642..d8831dea2af3aa 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
+++ b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
---
name: partial_forwarding_1_hazard
>From c50980b590803d65bc02aafe6721b8666333be2a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad at amd.com>
Date: Fri, 6 Jan 2023 10:56:16 +0000
Subject: [PATCH 2/2] [AMDGPU] Disable hasVALUPartialForwardingHazard for GFX12
---
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 +-
.../AMDGPU/partial-forwarding-hazards.mir | 351 ++++++++++++------
2 files changed, 232 insertions(+), 121 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 070d165cdaadb8..17b8e4d926c0f2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1139,7 +1139,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasLdsWaitVMSRC() const { return getGeneration() >= GFX12; }
bool hasVALUPartialForwardingHazard() const {
- return getGeneration() >= GFX11;
+ return getGeneration() == GFX11;
}
bool hasVALUTransUseHazard() const { return HasVALUTransUseHazard; }
diff --git a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
index d8831dea2af3aa..56eb8ce581a79b 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
+++ b/llvm/test/CodeGen/AMDGPU/partial-forwarding-hazards.mir
@@ -1,18 +1,24 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
-# RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX11 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1200 -mattr=+wavefrontsize64 -verify-machineinstrs -run-pass post-RA-hazard-rec -o - %s | FileCheck -check-prefixes=GCN,GFX12 %s
---
name: partial_forwarding_1_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_1_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_1_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_1_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
@@ -24,24 +30,41 @@ body: |
name: partial_forwarding_2_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_2_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $sgpr0 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr1 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr2 = S_MOV_B32 0
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $sgpr3 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr4 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr5 = S_MOV_B32 0
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $sgpr6 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr7 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr8 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr9 = S_MOV_B32 0
- ; GCN-NEXT: $sgpr10 = S_MOV_B32 0
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_2_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr2 = S_MOV_B32 0
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $sgpr3 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr4 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr5 = S_MOV_B32 0
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $sgpr6 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr7 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr8 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr9 = S_MOV_B32 0
+ ; GFX11-NEXT: $sgpr10 = S_MOV_B32 0
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_2_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $sgpr0 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr1 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr2 = S_MOV_B32 0
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $sgpr3 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr4 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr5 = S_MOV_B32 0
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $sgpr6 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr7 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr8 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr9 = S_MOV_B32 0
+ ; GFX12-NEXT: $sgpr10 = S_MOV_B32 0
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$sgpr0 = S_MOV_B32 0
$sgpr1 = S_MOV_B32 0
@@ -64,19 +87,31 @@ body: |
name: partial_forwarding_3_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_3_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_3_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_3_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
@@ -187,19 +222,31 @@ body: |
name: partial_forwarding_4_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_4_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_4_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_4_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
@@ -248,19 +295,31 @@ body: |
name: partial_forwarding_5_hazard
body: |
bb.0:
- ; GCN-LABEL: name: partial_forwarding_5_hazard
- ; GCN: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_5_hazard
+ ; GFX11: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_5_hazard
+ ; GFX12: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr10 = V_MOV_B32_e32 0, implicit $exec
$vgpr11 = V_MOV_B32_e32 0, implicit $exec
@@ -308,33 +367,59 @@ body: |
---
name: partial_forwarding_branching_1a
body: |
- ; GCN-LABEL: name: partial_forwarding_branching_1a
- ; GCN: bb.0:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.2:
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_branching_1a
+ ; GFX11: bb.0:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.1:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.2:
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_branching_1a
+ ; GFX12: bb.0:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.1:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.2:
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$exec = S_MOV_B64 -1
@@ -359,33 +444,59 @@ body: |
---
name: partial_forwarding_branching_1b
body: |
- ; GCN-LABEL: name: partial_forwarding_branching_1b
- ; GCN: bb.0:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.1:
- ; GCN-NEXT: successors: %bb.2(0x80000000)
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $exec = S_MOV_B64 -1
- ; GCN-NEXT: S_BRANCH %bb.2
- ; GCN-NEXT: {{ $}}
- ; GCN-NEXT: bb.2:
- ; GCN-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
- ; GCN-NEXT: S_WAITCNT_DEPCTR 4095
- ; GCN-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
- ; GCN-NEXT: S_ENDPGM 0
+ ; GFX11-LABEL: name: partial_forwarding_branching_1b
+ ; GFX11: bb.0:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.1:
+ ; GFX11-NEXT: successors: %bb.2(0x80000000)
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $exec = S_MOV_B64 -1
+ ; GFX11-NEXT: S_BRANCH %bb.2
+ ; GFX11-NEXT: {{ $}}
+ ; GFX11-NEXT: bb.2:
+ ; GFX11-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX11-NEXT: S_WAITCNT_DEPCTR 4095
+ ; GFX11-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX11-NEXT: S_ENDPGM 0
+ ; GFX12-LABEL: name: partial_forwarding_branching_1b
+ ; GFX12: bb.0:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr30 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr31 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.1:
+ ; GFX12-NEXT: successors: %bb.2(0x80000000)
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $exec = S_MOV_B64 -1
+ ; GFX12-NEXT: S_BRANCH %bb.2
+ ; GFX12-NEXT: {{ $}}
+ ; GFX12-NEXT: bb.2:
+ ; GFX12-NEXT: $vgpr10 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr11 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr12 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr13 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr14 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr15 = V_MOV_B32_e32 0, implicit $exec
+ ; GFX12-NEXT: $vgpr2 = V_ADD_F32_e32 $vgpr0, $vgpr1, implicit $mode, implicit $exec
+ ; GFX12-NEXT: S_ENDPGM 0
bb.0:
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
$vgpr30 = V_MOV_B32_e32 0, implicit $exec
More information about the llvm-commits
mailing list