[llvm] [AMDGPU] Insert copy when only one register can be constrained (PR #161435)

Nico Núñez via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 10 11:09:02 PDT 2025


https://github.com/niconunezz updated https://github.com/llvm/llvm-project/pull/161435

>From 6683837fbf06467f4f70003291f0724502ab709a Mon Sep 17 00:00:00 2001
From: niconunezz <niconuwii at gmail.com>
Date: Tue, 30 Sep 2025 22:29:26 +0200
Subject: [PATCH 1/5] [AMDGPU] Insert copy when only one register can be
 constrained

---
 .../Target/AMDGPU/SILoadStoreOptimizer.cpp    | 19 +++++++---
 .../load-store-opt-ds-regclass-constrain.mir  | 37 +++++++++++++++++++
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index f0d1117664983..21bd75c229042 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1352,12 +1352,21 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
                                               DataRC1, SubReg);
     }
 
-    if (!MRI->constrainRegClass(Data0->getReg(), DataRC0) ||
-        !MRI->constrainRegClass(Data1->getReg(), DataRC1))
+    bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0);
+    bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1);
+    if (!constrainData0 && !constrainData1) {
       return nullptr;
-
-    // TODO: If one register can be constrained, and not the other, insert a
-    // copy.
+    } else if (!constrainData0 || !constrainData1) {
+      MachineBasicBlock::iterator InsertBefore = CI.I;
+      MachineBasicBlock *MBB = CI.I->getParent();
+      DebugLoc DL = CI.I->getDebugLoc();
+      const MachineOperand *activeData = !constrainData0 ? Data0 : Data1;
+      Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+      BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg)
+          .addReg(activeData->getReg(), 0);
+      const_cast<MachineOperand *>(activeData)->setReg(BaseReg);
+    }
   }
 
   return Where;
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 33f210533e10b..7a505f599254e 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -206,5 +206,42 @@ body:             |
   %2:av_64_align2 = COPY $vgpr4_vgpr5
   DS_WRITE_B64_gfx9 %0, %1, 512, 0, implicit $exec :: (store (s64), addrspace 3)
   DS_WRITE_B64_gfx9 %0, %2, 1536, 0, implicit $exec :: (store (s64), addrspace 3)
+...
+
+---
+name:            ds_write_b32__av32_physical
+body:             |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: ds_write_b32__av32_physical
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+  %0:vgpr_32 = COPY $vgpr0
+  %1:av_32 = COPY $vgpr1
+  DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+  DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
 
 ...
+
+---
+name:            ds_write_b32__physical_av32
+body:             |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: ds_write_b32__physical_av32
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+  %0:vgpr_32 = COPY $vgpr0
+  %2:av_32 = COPY $vgpr2
+  DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+  DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
\ No newline at end of file

>From d329dbd961dde8b02489fb91d6dd7f0826bd8603 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
 <125479151+niconunezz at users.noreply.github.com>
Date: Wed, 1 Oct 2025 16:05:33 +0200
Subject: [PATCH 2/5] Update
 llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 .../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 7a505f599254e..0cb91faf088a5 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -244,4 +244,5 @@ body:             |
   %0:vgpr_32 = COPY $vgpr0
   %2:av_32 = COPY $vgpr2
   DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
-  DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
\ No newline at end of file
+  DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
+...

>From d6695fdc9652c182f6ab2e52041bd1ea68563de5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
 <125479151+niconunezz at users.noreply.github.com>
Date: Wed, 1 Oct 2025 16:10:31 +0200
Subject: [PATCH 3/5] Update
 llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 .../test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 0cb91faf088a5..29d2ca7337b22 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -225,6 +225,7 @@ body:             |
   %1:av_32 = COPY $vgpr1
   DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
   DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
+...
 
 ...
 

>From 1381d104c8dcc0026a677ff56f3f31f48119054f Mon Sep 17 00:00:00 2001
From: niconunezz <niconuwii at gmail.com>
Date: Wed, 1 Oct 2025 18:24:33 +0200
Subject: [PATCH 4/5] address suggested changes

---
 .../Target/AMDGPU/SILoadStoreOptimizer.cpp    | 24 +++++++----
 .../load-store-opt-ds-regclass-constrain.mir  | 40 +++++++++++++++++++
 2 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 21bd75c229042..4df78b9e946da 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1352,20 +1352,28 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
                                               DataRC1, SubReg);
     }
 
-    bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0);
-    bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1);
-    if (!constrainData0 && !constrainData1) {
+    bool canBeConstrainedData0 =
+        MRI->constrainRegClass(Data0->getReg(), DataRC0);
+    bool canBeConstrainedData1 =
+        MRI->constrainRegClass(Data1->getReg(), DataRC1);
+    if (!canBeConstrainedData0 && !canBeConstrainedData1) {
       return nullptr;
-    } else if (!constrainData0 || !constrainData1) {
+    }
+    if (!canBeConstrainedData0 || !canBeConstrainedData1) {
       MachineBasicBlock::iterator InsertBefore = CI.I;
       MachineBasicBlock *MBB = CI.I->getParent();
-      DebugLoc DL = CI.I->getDebugLoc();
-      const MachineOperand *activeData = !constrainData0 ? Data0 : Data1;
-      Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      const DebugLoc &DL = DebugLoc::getMergedLocation(CI.I->getDebugLoc(),
+                                                       Paired.I->getDebugLoc());
+      const CombineInfo &ActiveCI = canBeConstrainedData0 ? Paired : CI;
+      MachineOperand *activeData =
+          TII->getNamedOperand(*ActiveCI.I, AMDGPU::OpName::data0);
       const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+      const TargetRegisterClass *RC = getDataRegClass(*CI.I);
+      Register BaseReg = MRI->createVirtualRegister(RC);
       BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg)
           .addReg(activeData->getReg(), 0);
-      const_cast<MachineOperand *>(activeData)->setReg(BaseReg);
+
+      activeData->setReg(BaseReg);
     }
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 29d2ca7337b22..4a142f1c1dca2 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -247,3 +247,43 @@ body:             |
   DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
   DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
 ...
+
+
+---
+name:            ds_write_b64__physical_av64
+body:             |
+  bb.0:
+  liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: ds_write_b64__physical_av64
+    ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
+  %0:vgpr_32 = COPY $vgpr0
+  %1:av_64_align2 = COPY $vgpr2_vgpr3
+  DS_WRITE_B64_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s64), addrspace 3)
+  DS_WRITE_B64_gfx9 %0, $vgpr4_vgpr5, 96, 0, implicit $exec :: (store (s64), addrspace 3)
+
+
+...
+
+---
+name:            ds_write_b64__av64__physical
+body:             |
+  bb.0:
+  liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+
+    ; CHECK-LABEL: name: ds_write_b64__av64__physical
+    ; CHECK: liveins: $vgpr0, $vgpr2_vgpr3, $vgpr4_vgpr5
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vreg_64_align2 = COPY $vgpr4_vgpr5
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vreg_64_align2 = COPY $vgpr2_vgpr3
+    ; CHECK-NEXT: DS_WRITE2_B64_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 5, 12, 0, implicit $exec :: (store (s64), addrspace 3)
+  %0:vgpr_32 = COPY $vgpr0
+  %2:av_64_align2 = COPY $vgpr4_vgpr5
+  DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3)
+  DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
\ No newline at end of file

>From f528451a83207827f48002ccd1b2043f37d36088 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
 <125479151+niconunezz at users.noreply.github.com>
Date: Fri, 10 Oct 2025 20:08:51 +0200
Subject: [PATCH 5/5] Update
 llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 .../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir   | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 4a142f1c1dca2..b3d0fc8b50d38 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -286,4 +286,6 @@ body:             |
   %0:vgpr_32 = COPY $vgpr0
   %2:av_64_align2 = COPY $vgpr4_vgpr5
   DS_WRITE_B64_gfx9 %0, $vgpr2_vgpr3, 40, 0, implicit $exec :: (store (s64), addrspace 3)
-  DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
\ No newline at end of file
+  DS_WRITE_B64_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s64), addrspace 3)
+  ...
+  
\ No newline at end of file



More information about the llvm-commits mailing list