[llvm] [AMPGPU] Emit s_singleuse_vdst instructions when a register is used multiple times in the same instruction. (PR #89601)

Scott Egerton via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 30 03:58:48 PDT 2024


https://github.com/ScottEgerton updated https://github.com/llvm/llvm-project/pull/89601

>From d8598a3b0deaeb8303c9c88ec3cea713ee04a477 Mon Sep 17 00:00:00 2001
From: Scott Egerton <scott.egerton at amd.com>
Date: Wed, 24 Jan 2024 14:58:50 +0000
Subject: [PATCH 1/3] [AMPGPU] Emit s_singleuse_vdst instructions when a
 register is used multiple times in the same instruction.

Previously, multiple uses of a register within the same instruction were
being counted as multiple uses. This has been corrected to
only count as a single use as per the specification allowing for
more optimisation candidates.
---
 .../AMDGPU/AMDGPUInsertSingleUseVDST.cpp      | 42 ++++++++++---------
 .../CodeGen/AMDGPU/insert-singleuse-vdst.mir  | 29 +++++++++++--
 2 files changed, 49 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
index f6a63502ec2cda..dbf41da3ceac63 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -19,6 +19,7 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -83,26 +84,13 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
         // instruction to be marked as a single use producer.
         bool AllProducerOperandsAreSingleUse = true;
 
-        for (const auto &Operand : MI.operands()) {
-          if (!Operand.isReg())
-            continue;
-          const auto Reg = Operand.getReg();
-
-          // Count the number of times each register is read.
-          if (Operand.readsReg())
-            for (const MCRegUnit &Unit : TRI->regunits(Reg))
-              RegisterUseCount[Unit]++;
+        // Gather a list of Registers used before updating use counts to avoid
+        // double counting registers that appear multiple times in a single
+        // MachineInstr.
+        DenseSet<MCRegUnit> RegistersUsed;
 
-          // Do not attempt to optimise across exec mask changes.
-          if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
-            for (auto &UsedReg : RegisterUseCount)
-              UsedReg.second = 2;
-          }
-
-          // If we are at the point where the register first became live,
-          // check if the operands are single use.
-          if (!MI.modifiesRegister(Reg, TRI))
-            continue;
+        for (const auto &Operand : MI.all_defs()) {
+          const auto Reg = Operand.getReg();
 
           const auto RegUnits = TRI->regunits(Reg);
           if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit &Unit) {
@@ -114,6 +102,22 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
           for (const MCRegUnit &Unit : RegUnits)
             RegisterUseCount.erase(Unit);
         }
+
+        for (const auto &Operand : MI.all_uses()) {
+          const auto Reg = Operand.getReg();
+
+          // Count the number of times each register is read.
+          for (const MCRegUnit &Unit : TRI->regunits(Reg))
+            RegistersUsed.insert(Unit);
+        }
+        for (const MCRegUnit &Unit : RegistersUsed)
+          RegisterUseCount[Unit]++;
+
+        // Do not attempt to optimise across exec mask changes.
+        if (MI.modifiesRegister(AMDGPU::EXEC, TRI)) {
+          for (auto &UsedReg : RegisterUseCount)
+            UsedReg.second = 2;
+        }
         if (AllProducerOperandsAreSingleUse && SIInstrInfo::isVALU(MI)) {
           // TODO: Replace with candidate logging for instruction grouping
           // later.
diff --git a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
index 135a101822bfa6..513734388eb65b 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-singleuse-vdst.mir
@@ -136,6 +136,7 @@ body: |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $vgpr0
   ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 $vgpr0, implicit $exec
   ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr1, $vgpr1, implicit $exec
   ; CHECK-NEXT: {{  $}}
@@ -278,6 +279,31 @@ body: |
     liveins: $vgpr2, $vgpr3
 ...
 
+# Second use is an instruction that reads and writes v1.
+---
+name: multiple_uses_4
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: multiple_uses_4
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $vgpr0
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+  ; CHECK-NEXT:   $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT:   $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2
+  bb.0:
+    liveins: $vgpr0
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr0, implicit $exec
+    $vgpr2 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+    $vgpr1 = V_ADD_U32_e32 $vgpr0, $vgpr1, implicit $exec
+  bb.1:
+    liveins: $vgpr0, $vgpr1, $vgpr2
+...
+
 # Results are live-in to another basic block.
 ---
 name: basic_block_1
@@ -398,7 +424,6 @@ body: |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $sgpr0_sgpr1
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK-NEXT:   $exec = COPY $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
@@ -424,7 +449,6 @@ body: |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $sgpr0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK-NEXT:   $exec_lo = COPY $sgpr0
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec
@@ -450,7 +474,6 @@ body: |
   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
   ; CHECK-NEXT:   liveins: $sgpr0
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   S_SINGLEUSE_VDST 1
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK-NEXT:   $exec_hi = COPY $sgpr0
   ; CHECK-NEXT:   $vgpr0 = V_MOV_B32_e32 $vgpr0, implicit $exec

>From 81b2d8087afb01c14ecb46fd99e2b1411fb46b98 Mon Sep 17 00:00:00 2001
From: Scott Egerton <scott.egerton at amd.com>
Date: Wed, 24 Apr 2024 13:44:31 +0100
Subject: [PATCH 2/3] Pass MCRegUnit by Value

This is a simple integer value and should not be passed by reference.
---
 llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
index dbf41da3ceac63..b5338d6eab0323 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -93,13 +93,13 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
           const auto Reg = Operand.getReg();
 
           const auto RegUnits = TRI->regunits(Reg);
-          if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit &Unit) {
+          if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
                 return RegisterUseCount[Unit] > 1;
               }))
             AllProducerOperandsAreSingleUse = false;
 
           // Reset uses count when a register is no longer live.
-          for (const MCRegUnit &Unit : RegUnits)
+          for (const MCRegUnit Unit : RegUnits)
             RegisterUseCount.erase(Unit);
         }
 
@@ -107,10 +107,10 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
           const auto Reg = Operand.getReg();
 
           // Count the number of times each register is read.
-          for (const MCRegUnit &Unit : TRI->regunits(Reg))
+          for (const MCRegUnit Unit : TRI->regunits(Reg))
             RegistersUsed.insert(Unit);
         }
-        for (const MCRegUnit &Unit : RegistersUsed)
+        for (const MCRegUnit Unit : RegistersUsed)
           RegisterUseCount[Unit]++;
 
         // Do not attempt to optimise across exec mask changes.

>From 9ded9f8cbf3afbd477d24099d9ef136ef383cc5f Mon Sep 17 00:00:00 2001
From: Scott Egerton <scott.egerton at amd.com>
Date: Tue, 30 Apr 2024 11:33:31 +0100
Subject: [PATCH 3/3] NFC: Replace DenseSet with SmallVector

This is purely for performance reasons.
---
 llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
index b5338d6eab0323..bd79dc921d59ce 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
@@ -19,7 +19,6 @@
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 #include "SIInstrInfo.h"
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -87,7 +86,7 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
         // Gather a list of Registers used before updating use counts to avoid
         // double counting registers that appear multiple times in a single
         // MachineInstr.
-        DenseSet<MCRegUnit> RegistersUsed;
+        SmallVector<MCRegUnit> RegistersUsed;
 
         for (const auto &Operand : MI.all_defs()) {
           const auto Reg = Operand.getReg();
@@ -107,8 +106,10 @@ class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
           const auto Reg = Operand.getReg();
 
           // Count the number of times each register is read.
-          for (const MCRegUnit Unit : TRI->regunits(Reg))
-            RegistersUsed.insert(Unit);
+          for (const MCRegUnit Unit : TRI->regunits(Reg)) {
+            if (!llvm::is_contained(RegistersUsed, Unit))
+              RegistersUsed.push_back(Unit);
+          }
         }
         for (const MCRegUnit Unit : RegistersUsed)
           RegisterUseCount[Unit]++;



More information about the llvm-commits mailing list