[llvm] [AMDGPU] Insert copy when only one register can be constrained (PR #161435)

Nico Núñez via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 1 07:05:41 PDT 2025


https://github.com/niconunezz updated https://github.com/llvm/llvm-project/pull/161435

>From 6683837fbf06467f4f70003291f0724502ab709a Mon Sep 17 00:00:00 2001
From: niconunezz <niconuwii at gmail.com>
Date: Tue, 30 Sep 2025 22:29:26 +0200
Subject: [PATCH 1/2] [AMDGPU] Insert copy when only one register can be
 constrained

---
 .../Target/AMDGPU/SILoadStoreOptimizer.cpp    | 19 +++++++---
 .../load-store-opt-ds-regclass-constrain.mir  | 37 +++++++++++++++++++
 2 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index f0d1117664983..21bd75c229042 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1352,12 +1352,21 @@ SILoadStoreOptimizer::checkAndPrepareMerge(CombineInfo &CI,
                                               DataRC1, SubReg);
     }
 
-    if (!MRI->constrainRegClass(Data0->getReg(), DataRC0) ||
-        !MRI->constrainRegClass(Data1->getReg(), DataRC1))
+    bool constrainData0 = MRI->constrainRegClass(Data0->getReg(), DataRC0);
+    bool constrainData1 = MRI->constrainRegClass(Data1->getReg(), DataRC1);
+    if (!constrainData0 && !constrainData1) {
       return nullptr;
-
-    // TODO: If one register can be constrained, and not the other, insert a
-    // copy.
+    } else if (!constrainData0 || !constrainData1) {
+      MachineBasicBlock::iterator InsertBefore = CI.I;
+      MachineBasicBlock *MBB = CI.I->getParent();
+      DebugLoc DL = CI.I->getDebugLoc();
+      const MachineOperand *activeData = !constrainData0 ? Data0 : Data1;
+      Register BaseReg = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+      const MCInstrDesc &CopyDesc = TII->get(TargetOpcode::COPY);
+      BuildMI(*MBB, InsertBefore, DL, CopyDesc, BaseReg)
+          .addReg(activeData->getReg(), 0);
+      const_cast<MachineOperand *>(activeData)->setReg(BaseReg);
+    }
   }
 
   return Where;
diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 33f210533e10b..7a505f599254e 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -206,5 +206,42 @@ body:             |
   %2:av_64_align2 = COPY $vgpr4_vgpr5
   DS_WRITE_B64_gfx9 %0, %1, 512, 0, implicit $exec :: (store (s64), addrspace 3)
   DS_WRITE_B64_gfx9 %0, %2, 1536, 0, implicit $exec :: (store (s64), addrspace 3)
+...
+
+---
+name:            ds_write_b32__av32_physical
+body:             |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: ds_write_b32__av32_physical
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY1]], [[COPY2]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+  %0:vgpr_32 = COPY $vgpr0
+  %1:av_32 = COPY $vgpr1
+  DS_WRITE_B32_gfx9 %0, %1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+  DS_WRITE_B32_gfx9 %0, $vgpr2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
 
 ...
+
+---
+name:            ds_write_b32__physical_av32
+body:             |
+  bb.0:
+  liveins: $vgpr0, $vgpr1, $vgpr2
+
+    ; CHECK-LABEL: name: ds_write_b32__physical_av32
+    ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; CHECK-NEXT: DS_WRITE2_B32_gfx9 [[COPY]], [[COPY2]], [[COPY1]], 10, 24, 0, implicit $exec :: (store (s32), addrspace 3)
+  %0:vgpr_32 = COPY $vgpr0
+  %2:av_32 = COPY $vgpr2
+  DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
+  DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
\ No newline at end of file

>From d329dbd961dde8b02489fb91d6dd7f0826bd8603 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Nico=20N=C3=BA=C3=B1ez?=
 <125479151+niconunezz at users.noreply.github.com>
Date: Wed, 1 Oct 2025 16:05:33 +0200
Subject: [PATCH 2/2] Update
 llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir

Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
 .../CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir    | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
index 7a505f599254e..0cb91faf088a5 100644
--- a/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
+++ b/llvm/test/CodeGen/AMDGPU/load-store-opt-ds-regclass-constrain.mir
@@ -244,4 +244,5 @@ body:             |
   %0:vgpr_32 = COPY $vgpr0
   %2:av_32 = COPY $vgpr2
   DS_WRITE_B32_gfx9 %0, $vgpr1, 40, 0, implicit $exec :: (store (s32), addrspace 3)
-  DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
\ No newline at end of file
+  DS_WRITE_B32_gfx9 %0, %2, 96, 0, implicit $exec :: (store (s32), addrspace 3)
+...



More information about the llvm-commits mailing list