[llvm] [AMPGPU] Emit s_singleuse_vdst instructions when a register is used multiple times in the same instruction. (PR #89601)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 22 04:43:08 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Scott Egerton (ScottEgerton)
<details>
<summary>Changes</summary>
Previously, multiple uses of a register within the same instruction were
being counted as multiple uses. This has been corrected to
only count as a single use as per the specification allowing for
more optimisation candidates.
---
Full diff: https://github.com/llvm/llvm-project/pull/89601.diff
2 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp (+23-19)
- (modified) llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir (+26-3)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
index f6a63502ec2cda..dbf41da3ceac63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -19,6 +19,7 @@
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -83,26 +84,13 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
// instruction to be marked as a single use producer.
bool AllProducerOperandsAreSingleUse = true;
- for (const auto &Operand : MI.operands()) {
- if (!Operand.isReg())
- continue;
- const auto Reg = Operand.getReg();
-
- // Count the number of times each register is read.
- if (Operand.readsReg())
- for (const MCRegUnit &Unit : TRI->regunits(Reg))
- RegisterUseCount[Unit]++;
+ // Gather a list of Registers used before updating use counts to avoid
+ // double counting registers that appear multiple times in a single
+ // MachineInstr.
+ DenseSet<MCRegUnit> RegistersUsed;
- // Do not attempt to optimise across exec mask changes.
- if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
- for (auto &UsedReg : RegisterUseCount)
- UsedReg.second = 2;
- }
-
- // If we are at the point where the register first became live,
- // check if the operands are single use.
- if (!MI.modifiesRegister(Reg, TRI))
- continue;
+ for (const auto &Operand : MI.all_defs()) {
+ const auto Reg = Operand.getReg();
const auto RegUnits = TRI->regunits(Reg);
if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit &Unit) {
@@ -114,6 +102,22 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
for (const MCRegUnit &Unit : RegUnits)
RegisterUseCount.erase(Unit);
}
+
+ for (const auto &Operand : MI.all_uses()) {
+ const auto Reg = Operand.getReg();
+
+ // Count the number of times each register is read.
+ for (const MCRegUnit &Unit : TRI->regunits(Reg))
+ RegistersUsed.insert(Unit);
+ }
+ for (const MCRegUnit &Unit : RegistersUsed)
+ RegisterUseCount[Unit]++;
+
+ // Do not attempt to optimise across exec mask changes.
+ if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+ for (auto &UsedReg : RegisterUseCount)
+ UsedReg.second = 2;
+ }
if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
// TODO: Replace with candidate logging for instruction grouping
// later.
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
index 135a101822bfa6..513734388eb65b 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
@@ -136,6 +136,7 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $vgpr0
; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
; CHECK-NEXT: {{ $}}
@@ -278,6 +279,31 @@ body: |
liveins: $vgpr2, $vgpr3
...
+# Second use is an instruction that reads and writes v1.
+---
+name: multiple_uses_4
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: multiple_uses_4
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: liveins: $vgpr0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ ; CHECK-NEXT: $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2
+ bb.0:
+ liveins: $vgpr0
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+ $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+ bb.1:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+...
+
# Results are live-in to another basic block.
---
name: basic_block_1
@@ -398,7 +424,6 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0_sgpr1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $exec = COPY $sgpr0_sgpr1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
@@ -424,7 +449,6 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $exec_lo = COPY $sgpr0
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
@@ -450,7 +474,6 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $sgpr0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: S_SINGLEUSE_VDST 1
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
; CHECK-NEXT: $exec_hi = COPY $sgpr0
; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
``````````
</details>
https://github.com/llvm/llvm-project/pull/89601
More information about the llvm-commits
mailing list